Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v2] Add remaining calls shared between command buffer and queue #2695

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 0 additions & 47 deletions source/adapters/level_zero/v2/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,53 +170,6 @@ ur_result_t urBindlessImagesReleaseExternalSemaphoreExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urCommandBufferAppendUSMFillExp(
ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory,
const void *pPattern, size_t patternSize, size_t size,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
ur_exp_command_buffer_command_handle_t *phCommand) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urCommandBufferAppendMemBufferFillExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
const void *pPattern, size_t patternSize, size_t offset, size_t size,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
ur_exp_command_buffer_command_handle_t *phCommand) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urCommandBufferAppendUSMPrefetchExp(
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
size_t size, ur_usm_migration_flags_t flags,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
ur_exp_command_buffer_command_handle_t *phCommand) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urCommandBufferAppendUSMAdviseExp(
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
ur_exp_command_buffer_command_handle_t *phCommand) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urCommandBufferUpdateKernelLaunchExp(
ur_exp_command_buffer_command_handle_t hCommand,
const ur_exp_command_buffer_update_kernel_launch_desc_t
Expand Down
108 changes: 108 additions & 0 deletions source/adapters/level_zero/v2/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,114 @@ ur_result_t urCommandBufferAppendMemBufferReadRectExp(
return exceptionToResult(std::current_exception());
}

ur_result_t urCommandBufferAppendUSMFillExp(
ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory,
const void *pPattern, size_t patternSize, size_t size,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
ur_exp_command_buffer_command_handle_t *phCommand) try {

// the same issue as in urCommandBufferAppendKernelLaunchExp
std::ignore = numEventsInWaitList;
std::ignore = phEventWaitList;
std::ignore = phEvent;
// sync mechanic can be ignored, because all lists are in-order
std::ignore = numSyncPointsInWaitList;
std::ignore = pSyncPointWaitList;
std::ignore = pSyncPoint;

std::ignore = phCommand;

UR_CALL(hCommandBuffer->commandListManager.appendUSMFill(
pMemory, patternSize, pPattern, size, 0, nullptr, nullptr));
return UR_RESULT_SUCCESS;
} catch (...) {
return exceptionToResult(std::current_exception());
}

ur_result_t urCommandBufferAppendMemBufferFillExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer,
const void *pPattern, size_t patternSize, size_t offset, size_t size,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
ur_exp_command_buffer_command_handle_t *phCommand) try {

// the same issue as in urCommandBufferAppendKernelLaunchExp
std::ignore = numEventsInWaitList;
std::ignore = phEventWaitList;
std::ignore = phEvent;
// sync mechanic can be ignored, because all lists are in-order
std::ignore = numSyncPointsInWaitList;
std::ignore = pSyncPointWaitList;
std::ignore = pSyncPoint;

std::ignore = phCommand;

UR_CALL(hCommandBuffer->commandListManager.appendMemBufferFill(
hBuffer, pPattern, patternSize, offset, size, 0, nullptr, nullptr));
return UR_RESULT_SUCCESS;
} catch (...) {
return exceptionToResult(std::current_exception());
}

ur_result_t urCommandBufferAppendUSMPrefetchExp(
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
size_t size, ur_usm_migration_flags_t flags,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
ur_exp_command_buffer_command_handle_t *phCommand) try {

// the same issue as in urCommandBufferAppendKernelLaunchExp
std::ignore = numEventsInWaitList;
std::ignore = phEventWaitList;
std::ignore = phEvent;
// sync mechanic can be ignored, because all lists are in-order
std::ignore = numSyncPointsInWaitList;
std::ignore = pSyncPointWaitList;
std::ignore = pSyncPoint;

std::ignore = phCommand;

UR_CALL(hCommandBuffer->commandListManager.appendUSMPrefetch(
pMemory, size, flags, 0, nullptr, nullptr));

return UR_RESULT_SUCCESS;
} catch (...) {
return exceptionToResult(std::current_exception());
}

ur_result_t urCommandBufferAppendUSMAdviseExp(
ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory,
size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
ur_exp_command_buffer_command_handle_t *phCommand) try {

// the same issue as in urCommandBufferAppendKernelLaunchExp
std::ignore = numEventsInWaitList;
std::ignore = phEventWaitList;
std::ignore = phEvent;
// sync mechanic can be ignored, because all lists are in-order
std::ignore = numSyncPointsInWaitList;
std::ignore = pSyncPointWaitList;
std::ignore = pSyncPoint;

std::ignore = phCommand;

UR_CALL(hCommandBuffer->commandListManager.appendUSMAdvise(pMemory, size,
advice, nullptr));

return UR_RESULT_SUCCESS;
} catch (...) {
return exceptionToResult(std::current_exception());
}
ur_result_t
urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer,
ur_exp_command_buffer_info_t propName,
Expand Down
134 changes: 134 additions & 0 deletions source/adapters/level_zero/v2/command_list_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,50 @@ ur_command_list_manager::~ur_command_list_manager() {
ur::level_zero::urDeviceRelease(device);
}

ur_result_t ur_command_list_manager::appendGenericFillUnlocked(
ur_mem_buffer_t *dst, size_t offset, size_t patternSize,
const void *pPattern, size_t size, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
ur_command_t commandType) {

auto zeSignalEvent = getSignalEvent(phEvent, commandType);

auto waitListView = getWaitListView(phEventWaitList, numEventsInWaitList);

auto pDst = ur_cast<char *>(dst->getDevicePtr(
device, ur_mem_buffer_t::device_access_mode_t::read_only, offset, size,
[&](void *src, void *dst, size_t size) {
ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy,
(zeCommandList.get(), dst, src, size, nullptr,
waitListView.num, waitListView.handles));
waitListView.clear();
}));

// PatternSize must be a power of two for zeCommandListAppendMemoryFill.
// When it's not, the fill is emulated with zeCommandListAppendMemoryCopy.
if (isPowerOf2(patternSize)) {
ZE2UR_CALL(zeCommandListAppendMemoryFill,
(zeCommandList.get(), pDst, pPattern, patternSize, size,
zeSignalEvent, waitListView.num, waitListView.handles));
} else {
// Copy pattern into every entry in memory array pointed by Ptr.
uint32_t numOfCopySteps = size / patternSize;
const void *src = pPattern;

for (uint32_t step = 0; step < numOfCopySteps; ++step) {
void *dst = reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(pDst) +
step * patternSize);
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
(zeCommandList.get(), dst, src, patternSize,
step == numOfCopySteps - 1 ? zeSignalEvent : nullptr,
waitListView.num, waitListView.handles));
waitListView.clear();
}
}

return UR_RESULT_SUCCESS;
}

ur_result_t ur_command_list_manager::appendGenericCopyUnlocked(
ur_mem_buffer_t *src, ur_mem_buffer_t *dst, bool blocking, size_t srcOffset,
size_t dstOffset, size_t size, uint32_t numEventsInWaitList,
Expand Down Expand Up @@ -209,6 +253,96 @@ ur_result_t ur_command_list_manager::appendUSMMemcpy(
return UR_RESULT_SUCCESS;
}

ur_result_t ur_command_list_manager::appendMemBufferFill(
ur_mem_handle_t hMem, const void *pPattern, size_t patternSize,
size_t offset, size_t size, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendMemBufferFill");

auto hBuffer = hMem->getBuffer();
UR_ASSERT(offset + size <= hBuffer->getSize(), UR_RESULT_ERROR_INVALID_SIZE);

std::scoped_lock<ur_shared_mutex, ur_shared_mutex> lock(this->Mutex,
hBuffer->getMutex());

return appendGenericFillUnlocked(hBuffer, offset, patternSize, pPattern, size,
numEventsInWaitList, phEventWaitList,
phEvent, UR_COMMAND_MEM_BUFFER_FILL);
}

ur_result_t ur_command_list_manager::appendUSMFill(
void *pMem, size_t patternSize, const void *pPattern, size_t size,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMFill");

std::scoped_lock<ur_shared_mutex> lock(this->Mutex);

ur_usm_handle_t dstHandle(context, size, pMem);
return appendGenericFillUnlocked(&dstHandle, 0, patternSize, pPattern, size,
numEventsInWaitList, phEventWaitList,
phEvent, UR_COMMAND_USM_FILL);
}

ur_result_t ur_command_list_manager::appendUSMPrefetch(
const void *pMem, size_t size, ur_usm_migration_flags_t flags,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMPrefetch");

std::ignore = flags;

std::scoped_lock<ur_shared_mutex> lock(this->Mutex);

auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_PREFETCH);

auto [pWaitEvents, numWaitEvents] =
getWaitListView(phEventWaitList, numEventsInWaitList);

if (pWaitEvents) {
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
(zeCommandList.get(), numWaitEvents, pWaitEvents));
}
// TODO: figure out how to translate "flags"
ZE2UR_CALL(zeCommandListAppendMemoryPrefetch,
(zeCommandList.get(), pMem, size));
if (zeSignalEvent) {
ZE2UR_CALL(zeCommandListAppendSignalEvent,
(zeCommandList.get(), zeSignalEvent));
}

return UR_RESULT_SUCCESS;
}

ur_result_t
ur_command_list_manager::appendUSMAdvise(const void *pMem, size_t size,
ur_usm_advice_flags_t advice,
ur_event_handle_t *phEvent) {
TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMAdvise");

std::scoped_lock<ur_shared_mutex> lock(this->Mutex);

auto zeAdvice = ur_cast<ze_memory_advice_t>(advice);

auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_ADVISE);

auto [pWaitEvents, numWaitEvents] = getWaitListView(nullptr, 0);

if (pWaitEvents) {
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
(zeCommandList.get(), numWaitEvents, pWaitEvents));
}

ZE2UR_CALL(zeCommandListAppendMemAdvise,
(zeCommandList.get(), device->ZeDevice, pMem, size, zeAdvice));

if (zeSignalEvent) {
ZE2UR_CALL(zeCommandListAppendSignalEvent,
(zeCommandList.get(), zeSignalEvent));
}
return UR_RESULT_SUCCESS;
}

ur_result_t ur_command_list_manager::appendMemBufferRead(
ur_mem_handle_t hMem, bool blockingRead, size_t offset, size_t size,
void *pDst, uint32_t numEventsInWaitList,
Expand Down
27 changes: 27 additions & 0 deletions source/adapters/level_zero/v2/command_list_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,27 @@ struct ur_command_list_manager : public _ur_object {
size_t height, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent);
ur_result_t appendMemBufferFill(ur_mem_handle_t hBuffer, const void *pPattern,
size_t patternSize, size_t offset,
size_t size, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent);

ur_result_t appendUSMFill(void *pMem, size_t patternSize,
const void *pPattern, size_t size,
uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent);

ur_result_t appendUSMPrefetch(const void *pMem, size_t size,
ur_usm_migration_flags_t flags,
uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent);

ur_result_t appendUSMAdvise(const void *pMem, size_t size,
ur_usm_advice_flags_t advice,
ur_event_handle_t *phEvent);

ze_command_list_handle_t getZeCommandList();

Expand All @@ -107,6 +128,12 @@ struct ur_command_list_manager : public _ur_object {
ur_command_t commandType);

private:
ur_result_t appendGenericFillUnlocked(
ur_mem_buffer_t *hBuffer, size_t offset, size_t patternSize,
const void *pPattern, size_t size, uint32_t numEventsInWaitList,
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
ur_command_t commandType);

ur_result_t appendGenericCopyUnlocked(
ur_mem_buffer_t *src, ur_mem_buffer_t *dst, bool blocking,
size_t srcOffset, size_t dstOffset, size_t size,
Expand Down
Loading