-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconvperf.py
138 lines (124 loc) · 4.3 KB
/
convperf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python3
# Main driver to run performance experiments
import argparse
import subprocess
import multiprocessing
import matplotlib.pyplot as plt
import numpy as np
import os
import json
plt.style.use('ggplot')
BAR_WIDTH = 0.15
BAR_COLORS = {
'xsmm': 'red',
'iree': 'blue',
}
def run(cmd, args):
bench_env = os.environ.copy()
bench_env["OMP_NUM_THREADS"] = f"{args.num_threads}"
cmd = ['numactl', '--cpunodebind=1', '-l'] + cmd
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, env=bench_env)
output = p.communicate()
runtimes = []
for line in output[0].decode('utf-8').split('\n'):
if 'real_time' in line:
tokens = line.split()
runtimes.append(int(tokens[1]))
print(line)
p.wait()
return runtimes
def benchmark_iree(args):
cmd = [args.benchmark_tool] \
+ ['-r', 'iree']
if args.num_threads > 1:
cmd += [f'--task_topology_max_group_count={args.num_threads-1}']
return run(cmd, args)
def benchmark_xsmm(args):
cmd = [args.benchmark_tool] \
+ ['-r', 'xsmm']
return run(cmd, args)
def benchmark(args):
runtimes = {"iree": [], "xsmm": []}
for runner in args.runners.split(','):
if runner == "iree":
runtimes["iree"] = benchmark_iree(args)
elif runner == "xsmm":
runtimes["xsmm"] = benchmark_xsmm(args)
else:
print("Unsupported runner!")
return runtimes
def shape_str(config, is_filter):
if is_filter:
N = config['F']
else:
N = config['N']
C = config['C']
H = config['H']
W = config['W']
p_str = ''
for v in config['format']:
if v == 'n' or v == 'f':
p_str += f'{N}x'
if v == 'h':
p_str += f'{H}x'
if v == 'w':
p_str += f'{W}x'
if v == 'c':
p_str += f'{C}x'
p_str = p_str[:-1]
return p_str
def compute_flops(config):
N = config["input"]["N"]
Cout = config["output"]["C"]
Cin = config["input"]["C"]
Hout = config["output"]["H"]
Wout = config["output"]["W"]
Kh = config["filter"]["H"]
Kw = config["filter"]["W"]
flops = 2 * N * Cin * Cout * Hout * Wout * Kh * Kw
return flops
def compute_labels_and_flops(configs):
labels = []
flops = []
for config in configs:
labels.append(shape_str(config["input"], False) + "_" + shape_str(config["filter"], True))
flops.append(compute_flops(config))
return labels, flops
def save_runtimes(args, runtimes):
runtimes["benchmark_sizes"] = args.benchmark_sizes
with open("runtimes.json", "w") as f:
json.dump(runtimes, f, ensure_ascii=False, indent=4, sort_keys=True)
def visualize(args):
generate_x = lambda i, labels : np.arange(len(labels)) + i * BAR_WIDTH
with open(args.runtimes_file, "r") as f:
data = json.load(f)
with open(data["benchmark_sizes"], "r") as f:
sizes = json.load(f)
del data["benchmark_sizes"]
labels, flops = compute_labels_and_flops(sizes["configs"])
for i, method in enumerate(data.keys()):
speed = [(y / x / 1e6) for x, y in zip(data[method], flops)]
print(f"MFLOPS[{method}]: {speed}")
plt.bar(generate_x(i, labels), speed, BAR_WIDTH, label=method, color=BAR_COLORS[method])
x_pos = [i + 0.5*(len(labels) - 1)*BAR_WIDTH for i in range(len(labels))]
plt.xticks(x_pos, labels, rotation=90, fontsize=5)
plt.xlabel('Convolution sizes')
plt.ylabel('MFLOPS')
plt.title("2D Convolution in fp32")
plt.legend(loc='best')
plt.savefig('convs.png', dpi=300, bbox_inches='tight')
def define_options(parser):
parser.add_argument('--benchmark_tool', type=str, help='Path to benchmark tool')
parser.add_argument('--runners', type=str, help='Methods to be benchmarked')
parser.add_argument('--visualize', action=argparse.BooleanOptionalAction)
parser.add_argument('--benchmark_sizes', type=str, help='Path to benchmark sizes file')
parser.add_argument('--runtimes_file', type=str, help='Path to runtimes file')
parser.add_argument('--num_threads', type=int, help='Number of threads to run benchmark on')
parser = argparse.ArgumentParser()
define_options(parser)
args = parser.parse_args()
if args.visualize is None:
runtimes = benchmark(args)
save_runtimes(args, runtimes)
else:
visualize(args)