-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathutils.impala
75 lines (67 loc) · 2.47 KB
/
utils.impala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
fn init_zero(arr: &mut[f32], width: i32, height: i32) -> () {
for i in range(0, width*height) {
arr(i) = 0;
}
}
fn init_rand(arr: &mut[f32], width: i32, height: i32) -> () {
for y in range(0, height) {
for x in range(0, width) {
arr(y*width + x) = random_val_f32() * 1024;
}
}
}
fn @min(a: i32, b: i32) -> i32 { if a < b { a } else { b } }
fn @max(a: i32, b: i32) -> i32 { if a > b { a } else { b } }
fn @abs(a: i32) -> i32 { if a < 0 { -a } else { a } }
// nearest multiple of num
fn @round_up(num: i32, multiple: i32) -> i32 { ((num + multiple - 1) / multiple) * multiple }
fn @round_down(num: i32, multiple: i32) -> i32 { (num / multiple) * multiple }
static mut total_kernel_timing = 0:i64;
static mut total_cpu_timing = 0:i64;
fn @benchmark(get_time: fn() -> i64, num_iter: i32, body: fn() -> (), sync: fn() -> ()) -> i64 {
let times_buf = alloc_cpu(num_iter as i64 * sizeof[i64]());
let times = bitcast[&mut[i64]](times_buf.data);
for i in range(0, num_iter) {
let start = get_time();
body();
sync();
times(i) = get_time() - start;
}
sort(num_iter, times, |a: i64, b: i64| a > b);
print_string("Timing: ");
print_f64(times(num_iter/2) as f64 / 1000.0);
print_string(" | ");
print_f64(times(0) as f64 / 1000.0);
print_string(" | ");
print_f64(times(num_iter-1) as f64 / 1000.0);
print_string(" (median(");
print_i32(num_iter);
print_string(") | minimum | maximum) ms\n");
let median = times(num_iter/2);
release(times_buf);
median
}
static iter_acc = 7;
static iter_cpu = 27;
fn @benchmark_acc(body: fn() -> ()) = @|acc: Accelerator| total_kernel_timing += benchmark(get_kernel_time, iter_acc, body, acc.sync);
fn @benchmark_cpu(body: fn() -> ()) = @|| total_cpu_timing += benchmark(get_micro_time, iter_cpu, body, @|| {});
fn @is_gpu() -> bool { is_nvvm() | is_cuda() | is_opencl() | is_amdgpu() }
fn print_total_timing() -> () {
print_string("Total timing for cpu / kernel: ");
print_f64(total_cpu_timing as f64 / 1000.0);
print_string(" / ");
print_f64(total_kernel_timing as f64 / 1000.0);
print_string(" ms\n")
}
fn @(?num) sort[T](num: i32, arr: &mut[T], cmp: fn(T, T)->bool) -> () {
// insertion sort
for i in range(1, num) {
let x = arr(i);
let mut j = i;
while j > 0 && cmp(arr(j-1), x) {
arr(j) = arr(j-1);
j = j - 1;
}
arr(j) = x;
}
}