-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathresult_proc.py
131 lines (101 loc) · 4.43 KB
/
result_proc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
A set of utility methods for processing and analyzing results of experiments
"""
from os import listdir, remove, walk
from os.path import isfile, join, splitext
from gh_query import load_json
from statistics import mean
from shutil import copytree
from datetime import date
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
def plot_result(result_dict: dict, title: str):
labels = [k for k in result_dict.keys()]
recall = [result_dict[l]['recall'] * 100 for l in labels]
precision = [result_dict[l]['precision'] * 100 for l in labels]
f1 = [result_dict[l]['f1-score'] * 100 for l in labels]
x = np.arange(len(labels))
width = 0.2 # the width of the bars
fig, ax = plt.subplots()
recall_bar = ax.bar(x - width - 0.125, recall, width, label="Recall")
precision_bar = ax.bar(x, precision, width, label="Precision")
f1_bar = ax.bar(x + width + 0.125, f1, width, label="F1-score")
ax.set_ylabel("Percent")
ax.set_ylim([0, 100])
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.set_title(title)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
autolabel(recall_bar, ax)
autolabel(precision_bar, ax)
autolabel(f1_bar, ax)
fig.tight_layout()
plt.show()
print(labels, recall, precision, f1)
def autolabel(rects, ax):
"""Attach a text label above each bar in *rects*, displaying its height."""
for rect in rects:
height = rect.get_height()
ax.annotate('{:.2f}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center', va='bottom', fontsize="small")
def eval_result(result_path, model_name, date_time, dataset='combined', filter="unfiltered", macro_avg=True):
res_files = [f for f in listdir(result_path) if isfile(join(result_path, f))]
res_files = [f for f in res_files if model_name in f and dataset == splitext(f)[0].split("_")[1] and
filter == splitext(f)[0].split("_")[4] and date_time == splitext(f)[0].split("_")[5]+"_" +splitext(f)[0].split("_")[6]]
#top_n = {"Top-%s" % r.split("_")[4]:[] for r in res_files}
top_n = {}
avg_type = 'macro avg' if macro_avg else 'weighted avg'
for r in res_files:
res = load_json(join(result_path, r))['result'][avg_type]
if "Top-%s" % r.split("_")[3] in top_n:
top_n["Top-%s" % r.split("_")[3]]['recall'].append(res['recall'])
top_n["Top-%s" % r.split("_")[3]]['precision'].append(res['precision'])
top_n["Top-%s" % r.split("_")[3]]['f1-score'].append(res['f1-score'])
else:
top_n["Top-%s" % r.split("_")[3]] = {}
top_n["Top-%s" % r.split("_")[3]]['recall'] = []
top_n["Top-%s" % r.split("_")[3]]['precision'] = []
top_n["Top-%s" % r.split("_")[3]]['f1-score'] = []
top_n["Top-%s" % r.split("_")[3]]['recall'].append(res['recall'])
top_n["Top-%s" % r.split("_")[3]]['precision'].append(res['precision'])
top_n["Top-%s" % r.split("_")[3]]['f1-score'].append(res['f1-score'])
for i in top_n:
top_n[i]['recall'] = mean(top_n[i]['recall'])
top_n[i]['precision'] = mean(top_n[i]['precision'])
top_n[i]['f1-score'] = mean(top_n[i]['f1-score'])
return top_n
def copy_results(src_path, dest_path):
"""
It copies the results to another path for cleaning output folder
"""
copytree(src_path, join(dest_path, "report-%s" % date.today().strftime("%d-%m-%y")))
def clean_output(output_path):
"""
Delete all the files in output folder.
"""
for root, dirs, files in walk(output_path):
for f in files:
try:
remove(join(root, f))
print("Deleted file %s" % join(root, f))
except PermissionError:
print("Could not delete file %s" % join(root, f))
def plot_top_n_types(types_file, top_n=50):
"""
Plots top n most frequent types in the dataset
"""
# Already sorted
df = pd.read_csv(types_file)
types = df['type'].values[:top_n]
counts = df['count'].values[:top_n]
y_pos = np.arange(len(counts))
plt.barh(y_pos, counts, align='center', alpha=0.5)
plt.yticks(y_pos, types)
plt.xlabel('Counts')
plt.ylabel('Types')
plt.title('Top %d most frequent types in the dataset' % top_n)
plt.show()