Skip to content

Commit

Permalink
Explicitly define which info queries are optional.
Browse files Browse the repository at this point in the history
This is now reflected in the spec and in the CTS tests.

Also includes a number of minor fixes for adapter implementations of
related info queries.
  • Loading branch information
aarongreig committed Nov 18, 2024
1 parent 7241ebf commit 4cbc46f
Show file tree
Hide file tree
Showing 50 changed files with 485 additions and 230 deletions.
58 changes: 31 additions & 27 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1486,7 +1486,7 @@ urDeviceGetSelected(
typedef enum ur_device_info_t {
UR_DEVICE_INFO_TYPE = 0, ///< [::ur_device_type_t] type of the device
UR_DEVICE_INFO_VENDOR_ID = 1, ///< [uint32_t] vendor Id of the device
UR_DEVICE_INFO_DEVICE_ID = 2, ///< [uint32_t] Id of the device
UR_DEVICE_INFO_DEVICE_ID = 2, ///< [uint32_t][optional-query] Id of the device
UR_DEVICE_INFO_MAX_COMPUTE_UNITS = 3, ///< [uint32_t] the number of compute units
UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS = 4, ///< [uint32_t] max work item dimensions
UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES = 5, ///< [size_t[]] return an array of max work item sizes
Expand All @@ -1513,7 +1513,7 @@ typedef enum ur_device_info_t {
UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = 23, ///< [uint32_t] native vector width for double
UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = 24, ///< [uint32_t] native vector width for half float
UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY = 25, ///< [uint32_t] max clock frequency in MHz
UR_DEVICE_INFO_MEMORY_CLOCK_RATE = 26, ///< [uint32_t] memory clock frequency in MHz
UR_DEVICE_INFO_MEMORY_CLOCK_RATE = 26, ///< [uint32_t][optional-query] memory clock frequency in MHz
UR_DEVICE_INFO_ADDRESS_BITS = 27, ///< [uint32_t] address bits
UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = 28, ///< [uint64_t] max memory allocation size
UR_DEVICE_INFO_IMAGE_SUPPORTED = 29, ///< [::ur_bool_t] images are supported
Expand All @@ -1537,7 +1537,8 @@ typedef enum ur_device_info_t {
UR_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = 44, ///< [uint32_t] global memory cache line size in bytes
UR_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = 45, ///< [uint64_t] size of global memory cache in bytes
UR_DEVICE_INFO_GLOBAL_MEM_SIZE = 46, ///< [uint64_t] size of global memory in bytes
UR_DEVICE_INFO_GLOBAL_MEM_FREE = 47, ///< [uint64_t] size of global memory which is free in bytes
UR_DEVICE_INFO_GLOBAL_MEM_FREE = 47, ///< [uint64_t][optional-query] size of global memory which is free in
///< bytes
UR_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = 48, ///< [uint64_t] max constant buffer size in bytes
UR_DEVICE_INFO_MAX_CONSTANT_ARGS = 49, ///< [uint32_t] max number of __const declared arguments in a kernel
UR_DEVICE_INFO_LOCAL_MEM_TYPE = 50, ///< [::ur_device_local_mem_type_t] local memory type
Expand Down Expand Up @@ -1594,15 +1595,16 @@ typedef enum ur_device_info_t {
///< shared memory access
UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT = 87, ///< [::ur_device_usm_access_capability_flags_t] support USM system wide
///< shared memory access
UR_DEVICE_INFO_UUID = 88, ///< [uint8_t[]] return device UUID
UR_DEVICE_INFO_PCI_ADDRESS = 89, ///< [char[]] return device PCI address
UR_DEVICE_INFO_GPU_EU_COUNT = 90, ///< [uint32_t] return Intel GPU EU count
UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH = 91, ///< [uint32_t] return Intel GPU EU SIMD width
UR_DEVICE_INFO_GPU_EU_SLICES = 92, ///< [uint32_t] return Intel GPU number of slices
UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE = 93, ///< [uint32_t] return Intel GPU EU count per subslice
UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE = 94, ///< [uint32_t] return Intel GPU number of subslices per slice
UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 95, ///< [uint32_t] return Intel GPU number of threads per EU
UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH = 96, ///< [uint32_t] return max memory bandwidth in Mb/s
UR_DEVICE_INFO_UUID = 88, ///< [uint8_t[]][optional-query] return device UUID
UR_DEVICE_INFO_PCI_ADDRESS = 89, ///< [char[]][optional-query] return device PCI address
UR_DEVICE_INFO_GPU_EU_COUNT = 90, ///< [uint32_t][optional-query] return Intel GPU EU count
UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH = 91, ///< [uint32_t][optional-query] return Intel GPU EU SIMD width
UR_DEVICE_INFO_GPU_EU_SLICES = 92, ///< [uint32_t][optional-query] return Intel GPU number of slices
UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE = 93, ///< [uint32_t][optional-query] return Intel GPU EU count per subslice
UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE = 94, ///< [uint32_t][optional-query] return Intel GPU number of subslices per
///< slice
UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 95, ///< [uint32_t][optional-query] return Intel GPU number of threads per EU
UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH = 96, ///< [uint32_t][optional-query] return max memory bandwidth in Mb/s
UR_DEVICE_INFO_IMAGE_SRGB = 97, ///< [::ur_bool_t] device supports sRGB images
UR_DEVICE_INFO_BUILD_ON_SUBDEVICE = 98, ///< [::ur_bool_t] Return true if sub-device should do its own program
///< build
Expand All @@ -1621,17 +1623,18 @@ typedef enum ur_device_info_t {
///< available for this device.
UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS = 106, ///< [::ur_bool_t] support the ::urKernelSetSpecializationConstants entry
///< point
UR_DEVICE_INFO_MEMORY_BUS_WIDTH = 107, ///< [uint32_t] return the width in bits of the memory bus interface of the
///< device.
UR_DEVICE_INFO_MEMORY_BUS_WIDTH = 107, ///< [uint32_t][optional-query] return the width in bits of the memory bus
///< interface of the device.
UR_DEVICE_INFO_MAX_WORK_GROUPS_3D = 108, ///< [size_t[3]] return max 3D work groups
UR_DEVICE_INFO_ASYNC_BARRIER = 109, ///< [::ur_bool_t] return true if Async Barrier is supported
UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT = 110, ///< [::ur_bool_t] return true if specifying memory channels is supported
UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED = 111, ///< [::ur_bool_t] Return true if the device supports enqueueing commands
///< to read and write pipes from the host.
UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP = 112, ///< [uint32_t] The maximum number of registers available per block.
UR_DEVICE_INFO_IP_VERSION = 113, ///< [uint32_t] The device IP version. The meaning of the device IP version
///< is implementation-defined, but newer devices should have a higher
///< version than older devices.
UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP = 112, ///< [uint32_t][optional-query] The maximum number of registers available
///< per block.
UR_DEVICE_INFO_IP_VERSION = 113, ///< [uint32_t][optional-query] The device IP version. The meaning of the
///< device IP version is implementation-defined, but newer devices should
///< have a higher version than older devices.
UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT = 114, ///< [::ur_bool_t] return true if the device supports virtual memory.
UR_DEVICE_INFO_ESIMD_SUPPORT = 115, ///< [::ur_bool_t] return true if the device supports ESIMD.
UR_DEVICE_INFO_COMPONENT_DEVICES = 116, ///< [::ur_device_handle_t[]] The set of component devices contained by
Expand Down Expand Up @@ -3386,7 +3389,7 @@ typedef enum ur_usm_alloc_info_t {
UR_USM_ALLOC_INFO_BASE_PTR = 1, ///< [void *] Memory allocation base pointer info
UR_USM_ALLOC_INFO_SIZE = 2, ///< [size_t] Memory allocation size info
UR_USM_ALLOC_INFO_DEVICE = 3, ///< [::ur_device_handle_t] Memory allocation device info
UR_USM_ALLOC_INFO_POOL = 4, ///< [::ur_usm_pool_handle_t] Memory allocation pool info
UR_USM_ALLOC_INFO_POOL = 4, ///< [::ur_usm_pool_handle_t][optional-query] Memory allocation pool info
/// @cond
UR_USM_ALLOC_INFO_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -4527,9 +4530,10 @@ typedef enum ur_program_info_t {
UR_PROGRAM_INFO_BINARY_SIZES = 5, ///< [size_t[]] Return program binary sizes for each device.
UR_PROGRAM_INFO_BINARIES = 6, ///< [unsigned char[]] Return program binaries for all devices for this
///< Program.
UR_PROGRAM_INFO_NUM_KERNELS = 7, ///< [size_t] Number of kernels in Program, return type size_t.
UR_PROGRAM_INFO_KERNEL_NAMES = 8, ///< [char[]] Return a null-terminated, semi-colon separated list of kernel
///< names in Program.
UR_PROGRAM_INFO_NUM_KERNELS = 7, ///< [size_t][optional-query] Number of kernels in Program, return type
///< size_t.
UR_PROGRAM_INFO_KERNEL_NAMES = 8, ///< [char[]][optional-query] Return a null-terminated, semi-colon
///< separated list of kernel names in Program.
/// @cond
UR_PROGRAM_INFO_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -4880,8 +4884,8 @@ typedef enum ur_kernel_info_t {
UR_KERNEL_INFO_CONTEXT = 3, ///< [::ur_context_handle_t] Return Context object associated with Kernel.
UR_KERNEL_INFO_PROGRAM = 4, ///< [::ur_program_handle_t] Return Program object associated with Kernel.
UR_KERNEL_INFO_ATTRIBUTES = 5, ///< [char[]] Return null-terminated kernel attributes string.
UR_KERNEL_INFO_NUM_REGS = 6, ///< [uint32_t] Return the number of registers used by the compiled kernel
///< (device specific).
UR_KERNEL_INFO_NUM_REGS = 6, ///< [uint32_t][optional-query] Return the number of registers used by the
///< compiled kernel.
/// @cond
UR_KERNEL_INFO_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand All @@ -4891,7 +4895,7 @@ typedef enum ur_kernel_info_t {
///////////////////////////////////////////////////////////////////////////////
/// @brief Get Kernel Work Group information
typedef enum ur_kernel_group_info_t {
UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE = 0, ///< [size_t[3]] Return Work Group maximum global size
UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE = 0, ///< [size_t[3]][optional-query] Return Work Group maximum global size
UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE = 1, ///< [size_t] Return maximum Work Group size
UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE = 2, ///< [size_t[3]] Return Work Group size required by the source code, such
///< as __attribute__((required_work_group_size(X,Y,Z)), or (0, 0, 0) if
Expand Down Expand Up @@ -5422,8 +5426,8 @@ typedef enum ur_queue_info_t {
UR_QUEUE_INFO_SIZE = 5, ///< [uint32_t] The size of the queue on the device. Only a valid query
///< if the queue was created with the `ON_DEVICE` queue flag, otherwise
///< `::urQueueGetInfo` will return `::UR_RESULT_ERROR_INVALID_QUEUE`.
UR_QUEUE_INFO_EMPTY = 6, ///< [::ur_bool_t] return true if the queue was empty at the time of the
///< query
UR_QUEUE_INFO_EMPTY = 6, ///< [::ur_bool_t][optional-query] return true if the queue was empty at
///< the time of the query.
/// @cond
UR_QUEUE_INFO_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down
1 change: 1 addition & 0 deletions scripts/YaML.md
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ plural form *enumerators* is abbreviated to `etors`.
- An etor requires the following scalar fields: {`name`, `desc`}
+ `desc` will be used as the etors's description comment
+ If the enum has `typed_etors`, `desc` must begin with type identifier: {`"[type]"`}
+ `desc` may contain the [optional-query] annotation. This denotes the etor as an info query which is optional for adapters to implement, and may legally result in a non-success error code.
+ `name` must be a unique ISO-C standard identifier, and be all caps
- An etor may take the following optional scalar field: {`value`, `version`}
+ `value` must be an ISO-C standard identifier
Expand Down
18 changes: 18 additions & 0 deletions scripts/core/PROG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,24 @@ explicitly created against a context.
// Release the context handle
${x}ContextRelease(hContext);
Object Queries
==============

Queries to get information from API objects follow a common pattern. The entry
points for this are generally of the form:

.. code-block::
ObjectGetInfo(ur_object_handle_t hObject, ur_object_info_t propName,
size_t propSize, void *pPropValue, size_t *pPropSizeRet)
where ``propName`` selects the information to query out. The object info enum
representing possible queries will generally be found in the enums section of
the relevant object. Some info queries would be difficult or impossible to
support for certain backends, these are denoted with [optional-query] in the
enum description. Using any enum marked optional in this way may result in
${X}_RESULT_ERROR_UNSUPPORTED_ENUMERATION if the adapter doesn't support it.

Programs and Kernels
====================

Expand Down
30 changes: 15 additions & 15 deletions scripts/core/device.yml
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ etors:
- name: VENDOR_ID
desc: "[uint32_t] vendor Id of the device"
- name: DEVICE_ID
desc: "[uint32_t] Id of the device"
desc: "[uint32_t][optional-query] Id of the device"
- name: MAX_COMPUTE_UNITS
desc: "[uint32_t] the number of compute units"
- name: MAX_WORK_ITEM_DIMENSIONS
Expand Down Expand Up @@ -248,7 +248,7 @@ etors:
- name: MAX_CLOCK_FREQUENCY
desc: "[uint32_t] max clock frequency in MHz"
- name: MEMORY_CLOCK_RATE
desc: "[uint32_t] memory clock frequency in MHz"
desc: "[uint32_t][optional-query] memory clock frequency in MHz"
- name: ADDRESS_BITS
desc: "[uint32_t] address bits"
- name: MAX_MEM_ALLOC_SIZE
Expand Down Expand Up @@ -290,7 +290,7 @@ etors:
- name: GLOBAL_MEM_SIZE
desc: "[uint64_t] size of global memory in bytes"
- name: GLOBAL_MEM_FREE
desc: "[uint64_t] size of global memory which is free in bytes"
desc: "[uint64_t][optional-query] size of global memory which is free in bytes"
- name: MAX_CONSTANT_BUFFER_SIZE
desc: "[uint64_t] max constant buffer size in bytes"
- name: MAX_CONSTANT_ARGS
Expand Down Expand Up @@ -377,23 +377,23 @@ etors:
- name: USM_SYSTEM_SHARED_SUPPORT
desc: "[$x_device_usm_access_capability_flags_t] support USM system wide shared memory access"
- name: UUID
desc: "[uint8_t[]] return device UUID"
desc: "[uint8_t[]][optional-query] return device UUID"
- name: PCI_ADDRESS
desc: "[char[]] return device PCI address"
desc: "[char[]][optional-query] return device PCI address"
- name: GPU_EU_COUNT
desc: "[uint32_t] return Intel GPU EU count"
desc: "[uint32_t][optional-query] return Intel GPU EU count"
- name: GPU_EU_SIMD_WIDTH
desc: "[uint32_t] return Intel GPU EU SIMD width"
desc: "[uint32_t][optional-query] return Intel GPU EU SIMD width"
- name: GPU_EU_SLICES
desc: "[uint32_t] return Intel GPU number of slices"
desc: "[uint32_t][optional-query] return Intel GPU number of slices"
- name: GPU_EU_COUNT_PER_SUBSLICE
desc: "[uint32_t] return Intel GPU EU count per subslice"
desc: "[uint32_t][optional-query] return Intel GPU EU count per subslice"
- name: GPU_SUBSLICES_PER_SLICE
desc: "[uint32_t] return Intel GPU number of subslices per slice"
desc: "[uint32_t][optional-query] return Intel GPU number of subslices per slice"
- name: GPU_HW_THREADS_PER_EU
desc: "[uint32_t] return Intel GPU number of threads per EU"
desc: "[uint32_t][optional-query] return Intel GPU number of threads per EU"
- name: MAX_MEMORY_BANDWIDTH
desc: "[uint32_t] return max memory bandwidth in Mb/s"
desc: "[uint32_t][optional-query] return max memory bandwidth in Mb/s"
- name: IMAGE_SRGB
desc: "[$x_bool_t] device supports sRGB images"
- name: BUILD_ON_SUBDEVICE
Expand All @@ -418,7 +418,7 @@ etors:
- name: KERNEL_SET_SPECIALIZATION_CONSTANTS
desc: "[$x_bool_t] support the $xKernelSetSpecializationConstants entry point"
- name: MEMORY_BUS_WIDTH
desc: "[uint32_t] return the width in bits of the memory bus interface of the device."
desc: "[uint32_t][optional-query] return the width in bits of the memory bus interface of the device."
- name: MAX_WORK_GROUPS_3D
desc: "[size_t[3]] return max 3D work groups"
- name: ASYNC_BARRIER
Expand All @@ -428,9 +428,9 @@ etors:
- name: HOST_PIPE_READ_WRITE_SUPPORTED
desc: "[$x_bool_t] Return true if the device supports enqueueing commands to read and write pipes from the host."
- name: MAX_REGISTERS_PER_WORK_GROUP
desc: "[uint32_t] The maximum number of registers available per block."
desc: "[uint32_t][optional-query] The maximum number of registers available per block."
- name: IP_VERSION
desc: "[uint32_t] The device IP version. The meaning of the device IP version is implementation-defined, but newer devices should have a higher version than older devices."
desc: "[uint32_t][optional-query] The device IP version. The meaning of the device IP version is implementation-defined, but newer devices should have a higher version than older devices."
- name: VIRTUAL_MEMORY_SUPPORT
desc: "[$x_bool_t] return true if the device supports virtual memory."
- name: ESIMD_SUPPORT
Expand Down
4 changes: 2 additions & 2 deletions scripts/core/kernel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ etors:
- name: ATTRIBUTES
desc: "[char[]] Return null-terminated kernel attributes string."
- name: NUM_REGS
desc: "[uint32_t] Return the number of registers used by the compiled kernel (device specific)."
desc: "[uint32_t][optional-query] Return the number of registers used by the compiled kernel."
--- #--------------------------------------------------------------------------
type: enum
desc: "Get Kernel Work Group information"
Expand All @@ -133,7 +133,7 @@ name: $x_kernel_group_info_t
typed_etors: True
etors:
- name: GLOBAL_WORK_SIZE
desc: "[size_t[3]] Return Work Group maximum global size"
desc: "[size_t[3]][optional-query] Return Work Group maximum global size"
- name: WORK_GROUP_SIZE
desc: "[size_t] Return maximum Work Group size"
- name: COMPILE_WORK_GROUP_SIZE
Expand Down
Loading

0 comments on commit 4cbc46f

Please sign in to comment.