Skip to content


Merge pull request oneapi-src#1643 from JackAKirk/test-kernel-launch-exp
Browse files Browse the repository at this point in the history
[EXP][CUDA] Add initial version of (kernel) Launch Properties extension.
  • Loading branch information
kbenzie authored May 28, 2024
2 parents 905804c + 1c4478c commit 0354659
Show file tree
Hide file tree
Showing 25 changed files with 1,338 additions and 0 deletions.
145 changes: 145 additions & 0 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ typedef enum ur_function_t {
UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP = 221, ///< Enumerator for ::urCommandBufferGetInfoExp
UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP = 222, ///< Enumerator for ::urCommandBufferCommandGetInfoExp
UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP = 223, ///< Enumerator for ::urEnqueueTimestampRecordingExp
UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP = 224, ///< Enumerator for ::urEnqueueKernelLaunchCustomExp
/// @cond
/// @endcond
Expand Down Expand Up @@ -8935,6 +8936,133 @@ urEnqueueTimestampRecordingExp(
///< reports the timestamp recorded when the command is executed on the device.

#if !defined(__GNUC__)
#pragma endregion
// Intel 'oneAPI' Unified Runtime Experimental APIs for (kernel) Launch Properties
#if !defined(__GNUC__)
#pragma region launch properties(experimental)
/// @brief The extension string that defines support for the Launch Properties
/// extension, which is returned when querying device extensions.
#define UR_LAUNCH_PROPERTIES_EXTENSION_STRING_EXP "ur_exp_launch_properties"

/// @brief Specifies a launch property id
/// @remarks
/// _Analogues_
/// - **CUlaunchAttributeID**
typedef enum ur_exp_launch_property_id_t {
UR_EXP_LAUNCH_PROPERTY_ID_IGNORE = 0, ///< The property has no effect
UR_EXP_LAUNCH_PROPERTY_ID_COOPERATIVE = 1, ///< Whether to launch a cooperative kernel
UR_EXP_LAUNCH_PROPERTY_ID_CLUSTER_DIMENSION = 2, ///< work-group cluster dimensions
/// @cond
/// @endcond

} ur_exp_launch_property_id_t;

/// @brief Specifies a launch property value
/// @remarks
/// _Analogues_
/// - **CUlaunchAttributeValue**
typedef union ur_exp_launch_property_value_t {
uint32_t clusterDim[3]; ///< [in] dimensions of the cluster (units of work-group) (x, y, z). Each
///< value must be a divisor of the corresponding global work-size
///< dimension (in units of work-group).
int cooperative; ///< [in] non-zero value indicates a cooperative kernel

} ur_exp_launch_property_value_t;

/// @brief Kernel launch property
/// @remarks
/// _Analogues_
/// - **cuLaunchAttribute**
typedef struct ur_exp_launch_property_t {
ur_exp_launch_property_id_t id; ///< [in] launch property id
ur_exp_launch_property_value_t value; ///< [in][tagged_by(id)] launch property value

} ur_exp_launch_property_t;

/// @brief Launch kernel with custom launch properties
/// @details
/// - Launches the kernel using the specified launch properties
/// - If numPropsInLaunchPropList == 0 then a regular kernel launch is used:
/// `urEnqueueKernelLaunch`
/// - Consult the appropriate adapter driver documentation for details of
/// adapter specific behavior and native error codes that may be returned.
/// @remarks
/// _Analogues_
/// - **cuLaunchKernelEx**
/// @returns
/// + `NULL == hQueue`
/// + `NULL == hKernel`
/// + NULL == hQueue
/// + NULL == hKernel
/// + `NULL == pGlobalWorkSize`
/// + `NULL == launchPropList`
/// + NULL == pGlobalWorkSize
/// + numPropsInLaunchpropList != 0 && launchPropList == NULL
/// + phEventWaitList == NULL && numEventsInWaitList > 0
/// + phEventWaitList != NULL && numEventsInWaitList == 0
/// + If event objects in phEventWaitList are not valid events.
/// + An event in phEventWaitList has ::UR_EVENT_STATUS_ERROR
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
uint32_t workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and
///< work-group work-items
const size_t *pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the
///< number of global work-items in workDim that will execute the kernel
///< function
const size_t *pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that
///< specify the number of local work-items forming a work-group that will
///< execute the kernel function. If nullptr, the runtime implementation
///< will choose the work-group size.
uint32_t numPropsInLaunchPropList, ///< [in] size of the launch prop list
const ur_exp_launch_property_t *launchPropList, ///< [in][range(0, numPropsInLaunchPropList)] pointer to a list of launch
///< properties
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of
///< events that must be complete before the kernel execution. If nullptr,
///< the numEventsInWaitList must be 0, indicating that no wait event.
ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular
///< kernel execution instance.

#if !defined(__GNUC__)
#pragma endregion
Expand Down Expand Up @@ -10629,6 +10757,23 @@ typedef struct ur_enqueue_write_host_pipe_params_t {
ur_event_handle_t **pphEvent;
} ur_enqueue_write_host_pipe_params_t;

/// @brief Function parameters for urEnqueueKernelLaunchCustomExp
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_enqueue_kernel_launch_custom_exp_params_t {
ur_queue_handle_t *phQueue;
ur_kernel_handle_t *phKernel;
uint32_t *pworkDim;
const size_t **ppGlobalWorkSize;
const size_t **ppLocalWorkSize;
uint32_t *pnumPropsInLaunchPropList;
const ur_exp_launch_property_t **plaunchPropList;
uint32_t *pnumEventsInWaitList;
const ur_event_handle_t **pphEventWaitList;
ur_event_handle_t **pphEvent;
} ur_enqueue_kernel_launch_custom_exp_params_t;

/// @brief Function parameters for urEnqueueCooperativeKernelLaunchExp
/// @details Each entry is a pointer to the parameter passed to the function;
Expand Down
15 changes: 15 additions & 0 deletions include/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -1435,6 +1435,20 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetEnqueueProcAddrTable_t)(
ur_enqueue_dditable_t *);

/// @brief Function-pointer for urEnqueueKernelLaunchCustomExp
typedef ur_result_t(UR_APICALL *ur_pfnEnqueueKernelLaunchCustomExp_t)(
const size_t *,
const size_t *,
const ur_exp_launch_property_t *,
const ur_event_handle_t *,
ur_event_handle_t *);

/// @brief Function-pointer for urEnqueueCooperativeKernelLaunchExp
typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)(
Expand All @@ -1460,6 +1474,7 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueTimestampRecordingExp_t)(
/// @brief Table of EnqueueExp functions pointers
typedef struct ur_enqueue_exp_dditable_t {
ur_pfnEnqueueKernelLaunchCustomExp_t pfnKernelLaunchCustomExp;
ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp;
ur_pfnEnqueueTimestampRecordingExp_t pfnTimestampRecordingExp;
} ur_enqueue_exp_dditable_t;
Expand Down
24 changes: 24 additions & 0 deletions include/ur_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateValueArgDesc(co
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintExpCommandBufferUpdateKernelLaunchDesc(const struct ur_exp_command_buffer_update_kernel_launch_desc_t params, char *buffer, const size_t buff_size, size_t *out_size);

/// @brief Print ur_exp_launch_property_id_t enum
/// @returns
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintExpLaunchPropertyId(enum ur_exp_launch_property_id_t value, char *buffer, const size_t buff_size, size_t *out_size);

/// @brief Print ur_exp_launch_property_t struct
/// @returns
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintExpLaunchProperty(const struct ur_exp_launch_property_t params, char *buffer, const size_t buff_size, size_t *out_size);

/// @brief Print ur_exp_peer_info_t enum
/// @returns
Expand Down Expand Up @@ -1946,6 +1962,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueReadHostPipeParams(const struc
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams(const struct ur_enqueue_write_host_pipe_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);

/// @brief Print ur_enqueue_kernel_launch_custom_exp_params_t struct
/// @returns
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueKernelLaunchCustomExpParams(const struct ur_enqueue_kernel_launch_custom_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);

/// @brief Print ur_enqueue_cooperative_kernel_launch_exp_params_t struct
/// @returns
Expand Down

0 comments on commit 0354659

Please sign in to comment.