-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcomputesi.h
130 lines (101 loc) · 3.19 KB
/
computesi.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#ifndef COMPUTESI_H
#define COMPUTESI_H
#include <stdio.h>
#include <stdlib.h>
#include <drm.h>
#include <X11/Xlib.h>
#include <xf86drm.h>
#include <radeon_cs_gem.h>
#include <radeon_bo_gem.h>
#include "sid.h"
struct compute_context;
struct cs_reloc_gem;
struct gpu_buffer
{
struct compute_context* ctx;
uint64_t alignment;
uint32_t handle;
uint32_t domain;
uint32_t flags;
uint64_t size;
uint64_t va;
uint64_t va_size;
uint32_t fragment_number; ///for fragmented allocation
};
struct pool_node
{
uint64_t va;
uint64_t size;
struct gpu_buffer* parent_bo; ///main fragment
struct gpu_buffer* bo;
struct pool_node* prev;
struct pool_node* next;
};
struct compute_context
{
int fd; ///opened DRM interface
Display* display;
XID window;
struct pool_node* vm_pool;
uint64_t gart_size;
uint64_t vram_size;
uint64_t vram_visible;
};
struct compute_relocs
{
size_t reloc_num;
struct cs_reloc_gem* relocs;
};
struct compute_state
{
int id;
unsigned user_data[16]; ///shader user data, mapped to SGPRs
int user_data_length; /// in dwords
int dim[3];
int start[3];
int num_thread[3];
int sgpr_num;
int vgpr_num;
int priority;
int debug_mode; ///BOOL
int priv_mode; ///BOOL
int trap_en; ///BOOL
int ieee_mode;
int scratch_en;
int lds_size;
int excp_en;
int waves_per_sh;
int thread_groups_per_cu;
int lock_threshold;
int simd_dest_cntl;
int se0_sh0_cu_en;
int se0_sh1_cu_en;
int se1_sh0_cu_en;
int se1_sh1_cu_en;
int tmpring_waves;
int tmpring_wavesize;
struct gpu_buffer* binary;
};
enum radeon_bo_domain
{
RADEON_DOMAIN_GTT = 2,
RADEON_DOMAIN_VRAM = 4
};
struct compute_context* compute_create_context(const char* drm_devfile, const char* busid);
void compute_free_context(struct compute_context* ctx);
int compute_flush_caches(const struct compute_context* ctx);
void compute_pool_alloc(struct compute_context* ctx, struct gpu_buffer* bo);
void compute_pool_free(struct compute_context* ctx, struct gpu_buffer* bo);
int compute_copy_to_gpu(struct gpu_buffer* bo, size_t gpu_offset, const void* src, size_t size);
int compute_copy_from_gpu(struct gpu_buffer* bo, size_t gpu_offset, void* dst, size_t size);
int compute_send_sync_dma_req(struct compute_context* ctx, struct gpu_buffer* dst_bo, size_t dst_offset, struct gpu_buffer* src_bo, size_t src_offset, size_t size, int sync_flag, int raw_wait_flag, int use_pfp_engine);
int compute_send_async_dma_req(struct compute_context* ctx, struct gpu_buffer* dst_bo, size_t dst_offset, struct gpu_buffer* src_bo, size_t src_offset, size_t size);
int compute_send_dma_fence(struct compute_context* ctx, struct gpu_buffer* bo);
void compute_free_gpu_buffer(struct gpu_buffer* bo);
struct gpu_buffer* compute_alloc_gpu_buffer(struct compute_context* ctx, size_t size, int domain, int alignment);
int compute_emit_compute_state(const struct compute_context* ctx, const struct compute_state* state);
int compute_emit_compute_state_manual_relocs(const struct compute_context* ctx, const struct compute_state* state, struct compute_relocs crelocs);
void compute_init_relocs(struct compute_relocs* crelocs);
void compute_push_reloc(struct compute_relocs* crelocs, const struct gpu_buffer* bo);
int compute_bo_wait(struct gpu_buffer *boi);
#endif