-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathbackend_avx.impala
54 lines (47 loc) · 1.93 KB
/
backend_avx.impala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
fn @is_nvvm() -> bool { false }
fn @is_cuda() -> bool { false }
fn @is_opencl() -> bool { false }
fn @is_amdgpu() -> bool { false }
fn @is_x86() -> bool { true }
fn @is_sse() -> bool { true }
fn @is_avx() -> bool { true }
fn @is_avx2() -> bool { false }
fn @get_vector_length() -> i32 { 8 }
fn @get_thread_count() -> i32 { 4 }
// amount of full vector iterations that trigger loop vectorization
static simd_iter_threshold = 2;
fn @outer_loop(body: fn(i32) -> ()) = @|lower: i32, upper: i32| {
for i in parallel(get_thread_count(), lower, upper) {
@body(i);
}
};
fn @outer_loop_step(body: fn(i32) -> ()) = @|lower: i32, upper: i32, step: i32| {
for i in parallel(get_thread_count(), 0, (upper - lower) / step) {
@body(i * step + lower);
}
};
fn @inner_loop(body: fn(i32) -> ()) = @|lower: i32, upper: i32| {
if upper - lower < get_vector_length() * simd_iter_threshold {
range(body)(lower, upper);
} else {
let peel_end = round_up(lower, get_vector_length());
let remainder_start = round_up(upper - get_vector_length() + 1, get_vector_length());
range(body)(lower, peel_end);
for i in range_step(peel_end, remainder_start, get_vector_length()) {
vectorize(get_vector_length(), |j| @body(i + j))
}
range(body)(remainder_start, upper);
}
};
fn @inner_loop_step(body: fn(i32) -> ()) = @|lower: i32, upper: i32, step: i32| {
if upper - lower < get_vector_length() * simd_iter_threshold * step {
range_step(body)(lower, upper, step);
} else {
let iter_vec = (upper - lower) / (step * get_vector_length());
let remainder_start = lower + iter_vec * get_vector_length() * step;
for i in range_step(0, iter_vec * get_vector_length(), get_vector_length()) {
vectorize(get_vector_length(), |j| @body((i + j) * get_vector_length() + lower))
}
range_step(body)(remainder_start, upper, step);
}
};