-
Notifications
You must be signed in to change notification settings - Fork 12.8k
/
Copy pathomptarget.h
357 lines (313 loc) · 14.6 KB
/
omptarget.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
//===-------- omptarget.h - Target independent OpenMP target RTL -- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Interface to be used by Clang during the codegen of a
// target region.
//
//===----------------------------------------------------------------------===//
#ifndef _OMPTARGET_H_
#define _OMPTARGET_H_
#include <deque>
#include <stddef.h>
#include <stdint.h>
#include <SourceInfo.h>
#define OFFLOAD_SUCCESS (0)
#define OFFLOAD_FAIL (~0)
#define OFFLOAD_DEVICE_DEFAULT -1
// Don't format out enums and structs.
// clang-format off
/// return flags of __tgt_target_XXX public APIs
enum __tgt_target_return_t : int {
/// successful offload executed on a target device
OMP_TGT_SUCCESS = 0,
/// offload may not execute on the requested target device
/// this scenario can be caused by the device not available or unsupported
/// as described in the Execution Model in the specifcation
/// this status may not be used for target device execution failure
/// which should be handled internally in libomptarget
OMP_TGT_FAIL = ~0
};
/// Data attributes for each data reference used in an OpenMP target region.
enum tgt_map_type {
// No flags
OMP_TGT_MAPTYPE_NONE = 0x000,
// copy data from host to device
OMP_TGT_MAPTYPE_TO = 0x001,
// copy data from device to host
OMP_TGT_MAPTYPE_FROM = 0x002,
// copy regardless of the reference count
OMP_TGT_MAPTYPE_ALWAYS = 0x004,
// force unmapping of data
OMP_TGT_MAPTYPE_DELETE = 0x008,
// map the pointer as well as the pointee
OMP_TGT_MAPTYPE_PTR_AND_OBJ = 0x010,
// pass device base address to kernel
OMP_TGT_MAPTYPE_TARGET_PARAM = 0x020,
// return base device address of mapped data
OMP_TGT_MAPTYPE_RETURN_PARAM = 0x040,
// private variable - not mapped
OMP_TGT_MAPTYPE_PRIVATE = 0x080,
// copy by value - not mapped
OMP_TGT_MAPTYPE_LITERAL = 0x100,
// mapping is implicit
OMP_TGT_MAPTYPE_IMPLICIT = 0x200,
// copy data to device
OMP_TGT_MAPTYPE_CLOSE = 0x400,
// runtime error if not already allocated
OMP_TGT_MAPTYPE_PRESENT = 0x1000,
// use a separate reference counter so that the data cannot be unmapped within
// the structured region
// This is an OpenMP extension for the sake of OpenACC support.
OMP_TGT_MAPTYPE_OMPX_HOLD = 0x2000,
// descriptor for non-contiguous target-update
OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000,
// member of struct, member given by [16 MSBs] - 1
OMP_TGT_MAPTYPE_MEMBER_OF = 0xffff000000000000
};
enum OpenMPOffloadingDeclareTargetFlags {
/// Mark the entry as having a 'link' attribute.
OMP_DECLARE_TARGET_LINK = 0x01,
/// Mark the entry as being a global constructor.
OMP_DECLARE_TARGET_CTOR = 0x02,
/// Mark the entry as being a global destructor.
OMP_DECLARE_TARGET_DTOR = 0x04
};
enum OpenMPOffloadingRequiresDirFlags {
/// flag undefined.
OMP_REQ_UNDEFINED = 0x000,
/// no requires directive present.
OMP_REQ_NONE = 0x001,
/// reverse_offload clause.
OMP_REQ_REVERSE_OFFLOAD = 0x002,
/// unified_address clause.
OMP_REQ_UNIFIED_ADDRESS = 0x004,
/// unified_shared_memory clause.
OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
/// dynamic_allocators clause.
OMP_REQ_DYNAMIC_ALLOCATORS = 0x010
};
enum TargetAllocTy : int32_t {
TARGET_ALLOC_DEVICE = 0,
TARGET_ALLOC_HOST,
TARGET_ALLOC_SHARED,
TARGET_ALLOC_DEFAULT
};
/// This struct contains all of the arguments to a target kernel region launch.
struct __tgt_kernel_arguments {
int32_t Version; // Version of this struct for ABI compatibility.
int32_t NumArgs; // Number of arguments in each input pointer.
void **ArgBasePtrs; // Base pointer of each argument (e.g. a struct).
void **ArgPtrs; // Pointer to the argument data.
int64_t *ArgSizes; // Size of the argument data in bytes.
int64_t *ArgTypes; // Type of the data (e.g. to / from).
void **ArgNames; // Name of the data for debugging, possibly null.
void **ArgMappers; // User-defined mappers, possibly null.
int64_t Tripcount; // Tripcount for the teams / distribute loop, 0 otherwise.
};
static_assert(sizeof(__tgt_kernel_arguments) == 64 ||
sizeof(__tgt_kernel_arguments) == 40,
"Invalid struct size");
/// This struct is a record of an entry point or global. For a function
/// entry point the size is expected to be zero
struct __tgt_offload_entry {
void *addr; // Pointer to the offload entry info (function or global)
char *name; // Name of the function or global
size_t size; // Size of the entry info (0 if it is a function)
int32_t flags; // Flags associated with the entry, e.g. 'link'.
int32_t reserved; // Reserved, to be used by the runtime library.
};
/// This struct is a record of the device image information
struct __tgt_device_image {
void *ImageStart; // Pointer to the target code start
void *ImageEnd; // Pointer to the target code end
__tgt_offload_entry *EntriesBegin; // Begin of table with all target entries
__tgt_offload_entry *EntriesEnd; // End of table (non inclusive)
};
/// This struct contains information about a given image.
struct __tgt_image_info {
const char *Arch;
};
/// This struct is a record of all the host code that may be offloaded to a
/// target.
struct __tgt_bin_desc {
int32_t NumDeviceImages; // Number of device types supported
__tgt_device_image *DeviceImages; // Array of device images (1 per dev. type)
__tgt_offload_entry *HostEntriesBegin; // Begin of table with all host entries
__tgt_offload_entry *HostEntriesEnd; // End of table (non inclusive)
};
/// This struct contains the offload entries identified by the target runtime
struct __tgt_target_table {
__tgt_offload_entry *EntriesBegin; // Begin of the table with all the entries
__tgt_offload_entry
*EntriesEnd; // End of the table with all the entries (non inclusive)
};
// clang-format on
/// This struct contains information exchanged between different asynchronous
/// operations for device-dependent optimization and potential synchronization
struct __tgt_async_info {
// A pointer to a queue-like structure where offloading operations are issued.
// We assume to use this structure to do synchronization. In CUDA backend, it
// is CUstream.
void *Queue = nullptr;
};
struct DeviceTy;
/// The libomptarget wrapper around a __tgt_async_info object directly
/// associated with a libomptarget layer device. RAII semantics to avoid
/// mistakes.
class AsyncInfoTy {
/// Locations we used in (potentially) asynchronous calls which should live
/// as long as this AsyncInfoTy object.
std::deque<void *> BufferLocations;
__tgt_async_info AsyncInfo;
DeviceTy &Device;
public:
AsyncInfoTy(DeviceTy &Device) : Device(Device) {}
~AsyncInfoTy() { synchronize(); }
/// Implicit conversion to the __tgt_async_info which is used in the
/// plugin interface.
operator __tgt_async_info *() { return &AsyncInfo; }
/// Synchronize all pending actions.
///
/// \returns OFFLOAD_FAIL or OFFLOAD_SUCCESS appropriately.
int synchronize();
/// Return a void* reference with a lifetime that is at least as long as this
/// AsyncInfoTy object. The location can be used as intermediate buffer.
void *&getVoidPtrLocation();
};
/// This struct is a record of non-contiguous information
struct __tgt_target_non_contig {
uint64_t Offset;
uint64_t Count;
uint64_t Stride;
};
struct __tgt_device_info {
void *Context = nullptr;
void *Device = nullptr;
};
#ifdef __cplusplus
extern "C" {
#endif
int omp_get_num_devices(void);
int omp_get_device_num(void);
int omp_get_initial_device(void);
void *omp_target_alloc(size_t Size, int DeviceNum);
void omp_target_free(void *DevicePtr, int DeviceNum);
int omp_target_is_present(const void *Ptr, int DeviceNum);
int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
size_t DstOffset, size_t SrcOffset, int DstDevice,
int SrcDevice);
int omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
int NumDims, const size_t *Volume,
const size_t *DstOffsets, const size_t *SrcOffsets,
const size_t *DstDimensions,
const size_t *SrcDimensions, int DstDevice,
int SrcDevice);
int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
size_t Size, size_t DeviceOffset, int DeviceNum);
int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum);
/// Explicit target memory allocators
/// Using the llvm_ prefix until they become part of the OpenMP standard.
void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum);
void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum);
void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum);
/// Dummy target so we have a symbol for generating host fallback.
void *llvm_omp_target_dynamic_shared_alloc();
/// add the clauses of the requires directives in a given file
void __tgt_register_requires(int64_t Flags);
/// adds a target shared library to the target execution image
void __tgt_register_lib(__tgt_bin_desc *Desc);
/// Initialize all RTLs at once
void __tgt_init_all_rtls();
/// removes a target shared library from the target execution image
void __tgt_unregister_lib(__tgt_bin_desc *Desc);
// creates the host to target data mapping, stores it in the
// libomptarget.so internal structure (an entry in a stack of data maps) and
// passes the data to the device;
void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
void **Args, int64_t *ArgSizes, int64_t *ArgTypes);
void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum,
void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes,
int32_t DepNum, void *DepList,
int32_t NoAliasDepNum,
void *NoAliasDepList);
void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
int32_t ArgNum, void **ArgsBase,
void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers);
void __tgt_target_data_begin_nowait_mapper(
ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
void *NoAliasDepList);
// passes data from the target, release target memory and destroys the
// host-target mapping (top entry from the stack of data maps) created by
// the last __tgt_target_data_begin
void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
void **Args, int64_t *ArgSizes, int64_t *ArgTypes);
void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum,
void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes,
int32_t DepNum, void *DepList,
int32_t NoAliasDepNum, void *NoAliasDepList);
void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
int32_t ArgNum, void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes,
map_var_info_t *ArgNames, void **ArgMappers);
void __tgt_target_data_end_nowait_mapper(
ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, int32_t depNum, void *depList, int32_t NoAliasDepNum,
void *NoAliasDepList);
/// passes data to/from the target
void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
void **Args, int64_t *ArgSizes,
int64_t *ArgTypes);
void __tgt_target_data_update_nowait(int64_t DeviceId, int32_t ArgNum,
void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes,
int32_t DepNum, void *DepList,
int32_t NoAliasDepNum,
void *NoAliasDepList);
void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
int32_t ArgNum, void **ArgsBase,
void **Args, int64_t *ArgSizes,
int64_t *ArgTypes,
map_var_info_t *ArgNames,
void **ArgMappers);
void __tgt_target_data_update_nowait_mapper(
ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
void *NoAliasDepList);
// Performs the same actions as data_begin in case ArgNum is non-zero
// and initiates run of offloaded region on target platform; if ArgNum
// is non-zero after the region execution is done it also performs the
// same action as data_end above. The following types are used; this
// function returns 0 if it was able to transfer the execution to a
// target and an int different from zero otherwise.
int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
int32_t ThreadLimit, void *HostPtr,
__tgt_kernel_arguments *Args);
int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
int32_t ThreadLimit, void *HostPtr,
__tgt_kernel_arguments *Args, int32_t DepNum,
void *DepList, int32_t NoAliasDepNum,
void *NoAliasDepList);
void __tgt_set_info_flag(uint32_t);
int __tgt_print_device_info(int64_t DeviceId);
#ifdef __cplusplus
}
#endif
#ifdef __cplusplus
#define EXTERN extern "C"
#else
#define EXTERN extern
#endif
#endif // _OMPTARGET_H_