-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathgaussian.impala
69 lines (59 loc) · 2.89 KB
/
gaussian.impala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#[export]
fn main() -> i32 {
let width = 4096;
let height = 4096;
let arr = create_img[f32](width, height, alloc_cpu);
let out = create_img[f32](width, height, alloc_cpu);
init_rand(bitcast[&mut[f32]](arr.buf.data), arr.stride, out.height);
init_zero(bitcast[&mut[f32]](out.buf.data), out.stride, out.height);
let mask_sep = get_mask_sep5([0.070766, 0.244460, 0.369546, 0.244460, 0.070766]);
let mask = get_mask5([[0.005008, 0.017300, 0.026151, 0.017300, 0.005008],
[0.017300, 0.059761, 0.090339, 0.059761, 0.017300],
[0.026151, 0.090339, 0.136565, 0.090339, 0.026151],
[0.017300, 0.059761, 0.090339, 0.059761, 0.017300],
[0.005008, 0.017300, 0.026151, 0.017300, 0.005008]]);
//let lower = wrap_const_fun[f32](const_lower[f32], 0:f32);
//let upper = wrap_const_fun[f32](const_upper[f32], 0:f32);
let lower = clamp_lower[f32];
let upper = clamp_upper[f32];
let sep = true;
if sep {
let iteration_sep_fun = iteration_sep[f32]; // SEP + SS
//let iteration_sep_fun = iteration_sep_bounds[f32]; // SEP + SS + BH
//let iteration_sep_fun = iteration_sep_advanced[f32]; // SEP + SS + SM
for x, y, out_acc, arr_acc, mask, is_row in iteration_sep_fun(out, arr, mask_sep, mask_sep, lower, upper) {
out_acc.write(x, y, apply_stencil_sep(x, y, arr_acc, mask, is_row));
}
} else {
let iteration_fun = iteration[f32]; // SS
//let iteration_fun = iteration_bounds[f32]; // SS + BH
//let iteration_fun = iteration_advanced[f32]; // SS + SM
for x, y, out_acc, accs in iteration_fun(out, make_img_list1(arr, (mask.size_x / 2, mask.size_y / 2)), lower, upper) {
out_acc.write(x, y, apply_stencil(x, y, accs.get(0), mask));
}
}
print_total_timing();
fn reference() -> i32 {
let mut passed = 0;
let arr_acc = get_acc_bh[f32](arr, |idx, val| bitcast[&mut[f32]](arr.buf.data)(idx) = val, |idx| bitcast[&[f32]](arr.buf.data)(idx), (Boundary::Unknown, Boundary::Unknown), lower, upper);
let out_acc = get_acc [f32](out, |idx, val| bitcast[&mut[f32]](out.buf.data)(idx) = val, |idx| bitcast[&[f32]](out.buf.data)(idx));
for y in range(0, out.height) {
for x in range(0, out.width) {
let ref = apply_stencil(x, y, arr_acc, mask);
let mut diff = ref - out_acc.read(x, y);
if diff < 0:f32 { diff = -diff; }
if diff > 0.01:f32 { passed = 42; }
}
}
if passed == 0 {
print_string("Test PASSED!\n");
} else {
print_string("Test FAILED!\n");
}
passed
}
let result = reference();
release(arr.buf);
release(out.buf);
result
}