Skip to content

Commit

Permalink
Add async kernel kenel submition support
Browse files Browse the repository at this point in the history
  • Loading branch information
ZzEeKkAa committed Nov 29, 2023
1 parent 05aa34d commit 0ca5372
Show file tree
Hide file tree
Showing 9 changed files with 420 additions and 37 deletions.
2 changes: 1 addition & 1 deletion numba_dpex/core/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ python_add_library(${PROJECT_NAME} MODULE ${SOURCES})

# Add SYCL to target, this must come after python_add_library()
# FIXME: sources incompatible with sycl include?
# add_sycl_to_target(TARGET ${PROJECT_NAME} SOURCES ${KERNEL_SOURCES})
add_sycl_to_target(TARGET ${PROJECT_NAME} SOURCES ${KERNEL_SOURCES})

# Link the DPCTLSyclInterface library to target
target_link_libraries(${PROJECT_NAME} PRIVATE DPCTLSyclInterface)
Expand Down
6 changes: 6 additions & 0 deletions numba_dpex/core/runtime/_dpexrt_python.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "_queuestruct.h"
#include "_usmarraystruct.h"

#include "experimental/nrt_reserve_meminfo.h"
#include "numba/core/runtime/nrt_external.h"

// forward declarations
Expand Down Expand Up @@ -1490,6 +1491,8 @@ static PyObject *build_c_helpers_dict(void)
&DPEXRT_sycl_event_from_python);
_declpointer("DPEXRT_sycl_event_to_python", &DPEXRT_sycl_event_to_python);
_declpointer("DPEXRT_sycl_event_init", &DPEXRT_sycl_event_init);
_declpointer("DPEXRT_nrt_acquire_meminfo_and_schedule_release",
&DPEXRT_nrt_acquire_meminfo_and_schedule_release);

#undef _declpointer
return dct;
Expand Down Expand Up @@ -1557,6 +1560,9 @@ MOD_INIT(_dpexrt_python)
PyLong_FromVoidPtr(&DPEXRT_MemInfo_alloc));
PyModule_AddObject(m, "DPEXRT_MemInfo_fill",
PyLong_FromVoidPtr(&DPEXRT_MemInfo_fill));
PyModule_AddObject(
m, "DPEXRT_nrt_acquire_meminfo_and_schedule_release",
PyLong_FromVoidPtr(&DPEXRT_nrt_acquire_meminfo_and_schedule_release));
PyModule_AddObject(m, "c_helpers", build_c_helpers_dict());
return MOD_SUCCESS_VAL(m);
}
38 changes: 38 additions & 0 deletions numba_dpex/core/runtime/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,3 +433,41 @@ def submit_ndrange(
)

return ret

def acquire_meminfo_and_schedule_release(
self, builder: llvmir.IRBuilder, args
):
"""Inserts LLVM IR to call nrt_acquire_meminfo_and_schedule_release.
DPCTLSyclEventRef
DPEXRT_nrt_acquire_meminfo_and_schedule_release(
NRT_api_functions *nrt,
DPCTLSyclQueueRef QRef,
NRT_MemInfo **meminfo_array,
size_t meminfo_array_size,
DPCTLSyclEventRef *depERefs,
size_t nDepERefs,
int *status,
);
"""
mod = builder.module

func_ty = llvmir.FunctionType(
cgutils.voidptr_t,
[
cgutils.voidptr_t,
cgutils.voidptr_t,
cgutils.voidptr_t.as_pointer(),
llvmir.IntType(64),
cgutils.voidptr_t.as_pointer(),
llvmir.IntType(64),
llvmir.IntType(64).as_pointer(),
],
)
fn = cgutils.get_or_insert_function(
mod, func_ty, "DPEXRT_nrt_acquire_meminfo_and_schedule_release"
)
ret = builder.call(fn, args)

return ret
71 changes: 71 additions & 0 deletions numba_dpex/core/runtime/experimental/nrt_reserve_meminfo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// SPDX-FileCopyrightText: 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0

#include "nrt_reserve_meminfo.h"

#include "_dbg_printer.h"
#include "syclinterface/dpctl_sycl_type_casters.hpp"
#include <CL/sycl.hpp>

extern "C"
{
DPCTLSyclEventRef
DPEXRT_nrt_acquire_meminfo_and_schedule_release(NRT_api_functions *nrt,
DPCTLSyclQueueRef QRef,
NRT_MemInfo **meminfo_array,
size_t meminfo_array_size,
DPCTLSyclEventRef *depERefs,
size_t nDepERefs,
int *status)
{
DPEXRT_DEBUG(drt_debug_print(
"DPEXRT-DEBUG: scheduling nrt meminfo release.\n"););

using dpctl::syclinterface::unwrap;
using dpctl::syclinterface::wrap;

sycl::queue *q = unwrap<sycl::queue>(QRef);

std::vector<NRT_MemInfo *> meminfo_vec(
meminfo_array, meminfo_array + meminfo_array_size);

for (size_t i = 0; i < meminfo_array_size; ++i) {
nrt->acquire(meminfo_vec[i]);
}

DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: acquired meminfo.\n"););

try {
sycl::event ht_ev = q->submit([&](sycl::handler &cgh) {
for (size_t ev_id = 0; ev_id < nDepERefs; ++ev_id) {
cgh.depends_on(*(unwrap<sycl::event>(depERefs[ev_id])));
}
cgh.host_task([meminfo_array_size, meminfo_vec, nrt]() {
for (size_t i = 0; i < meminfo_array_size; ++i) {
nrt->release(meminfo_vec[i]);
DPEXRT_DEBUG(
drt_debug_print("DPEXRT-DEBUG: released meminfo "
"from host_task.\n"););
}
});
});

constexpr int result_ok = 0;

*status = result_ok;
auto e_ptr = new sycl::event(ht_ev);
return wrap<sycl::event>(e_ptr);
} catch (const std::exception &e) {
constexpr int result_std_exception = 1;

*status = result_std_exception;
return nullptr;
}

constexpr int result_other_abnormal = 2;

*status = result_other_abnormal;
return nullptr;
}
}
27 changes: 27 additions & 0 deletions numba_dpex/core/runtime/experimental/nrt_reserve_meminfo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// SPDX-FileCopyrightText: 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0

#ifndef _EXPERIMENTAL_H_
#define _EXPERIMENTAL_H_

#include "dpctl_capi.h"
#include "numba/core/runtime/nrt_external.h"

#ifdef __cplusplus
extern "C"
{
#endif
DPCTLSyclEventRef
DPEXRT_nrt_acquire_meminfo_and_schedule_release(NRT_api_functions *nrt,
DPCTLSyclQueueRef QRef,
NRT_MemInfo **meminfo_array,
size_t meminfo_array_size,
DPCTLSyclEventRef *depERefs,
size_t nDepERefs,
int *status);
#ifdef __cplusplus
}
#endif

#endif /* _EXPERIMENTAL_H_ */
4 changes: 2 additions & 2 deletions numba_dpex/experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from .decorators import kernel
from .kernel_dispatcher import KernelDispatcher
from .launcher import call_kernel
from .launcher import call_kernel, call_kernel_async
from .models import *
from .types import KernelDispatcherType

Expand All @@ -26,4 +26,4 @@ def dpex_dispatcher_const(context):
return context.get_dummy_value()


__all__ = ["kernel", "KernelDispatcher", "call_kernel"]
__all__ = ["kernel", "KernelDispatcher", "call_kernel", "call_kernel_async"]
2 changes: 1 addition & 1 deletion numba_dpex/experimental/kernel_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def get_overload_device_ir(self, sig):
args, _ = sigutils.normalize_signature(sig)
return self.overloads[tuple(args)].kernel_device_ir_module

def compile(self, sig) -> _KernelCompileResult:
def compile(self, sig) -> any:
disp = self._get_dispatcher_for_current_target()
if disp is not self:
return disp.compile(sig)
Expand Down
Loading

0 comments on commit 0ca5372

Please sign in to comment.