-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmetrics.py
executable file
·94 lines (86 loc) · 3.25 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python3
import sys
import argparse
import numpy as np
import pandas as pd
import sklearn.metrics
parser = argparse.ArgumentParser()
parser.add_argument(
"-o", "--out", type=argparse.FileType('w', encoding='UTF-8'),
default=sys.stdout, help="the filename to save the data to"
)
parser.add_argument(
"-n", "--names", action='store_true', help="whether to also output the names of each metric"
)
parser.add_argument(
"-m", "--metrics", default='r,p,b,t,f,a,v', help=(
"a comma separated, ordered list of metrics to output; use 'r' for "
"recall, 'p' for precision, 'b' for the F-beta score, 't' for total "
"positives, 'f' for total negatives, 'a' for the AUROC, and 'v' for "
"the avg precision score"
)
)
to_idx = {'p': 0, 'r': 1, 'b': 2, 'f': 4, 't': 5, 'a': 6, 'v': 7}
metric_names = {'p': "Precision", 'r':"Recall", 'b': "F-beta", 'f': "Total Positives", 't': "Total Negatives", 'a': "AUROC", 'v': "Average Precision"}
parser.add_argument(
"-p", "--ignore-probs", action='store_true', help="whether to only read truth and predict columns and ignore a probs column if it is provided; note that the AUROC and avg precision will not be output"
)
parser.add_argument(
"-f", "--flip", action='store_true', help="whether to flip the probabilities; only relevant if --ignore-probs is not passed"
)
parser.add_argument(
"table", nargs="?", default=sys.stdin,
help="a three column (truth/probs/predicted) table w/o a header"
)
args = parser.parse_args()
# which cols should we read?
fields = ['truth', 'probs', 'predict']
dtypes = {'predict': np.bool_, 'truth': np.bool_, 'probs': np.float_}
if args.ignore_probs:
fields = fields[:2]
dtypes.pop('probs')
# read the file into a pandas data frame
df = pd.read_csv(
args.table, sep='\t', header=None, names=fields,
index_col=False, dtype=dtypes,
low_memory=False, na_values='.'
)
df.fillna(0, inplace=True)
# calculate the metrics
scores = np.append(
sklearn.metrics.precision_recall_fscore_support(
df['truth'], df['predict'], beta=1, average='binary'
),
sklearn.metrics.confusion_matrix(df['truth'], df['predict']).sum(0)
)
# calculate additional metrics if we can
if not args.ignore_probs:
# replace inf values with a number 1 larger than the next largest value
if df['probs'].max() == np.float_('inf'):
df['probs'] = df['probs'].replace(
np.float_('inf'), np.sort(df['probs'].unique())[-2]+1
)
# turn the scores into probabilities if they're not already
probs = df['probs']/df['probs'].max()
if args.flip:
print("Inverting predictions.", file=sys.stderr)
probs = 1-probs
scores = np.append(
scores,
np.array([
sklearn.metrics.roc_auc_score(df['truth'], probs),
sklearn.metrics.average_precision_score(df['truth'], probs)
])
)
# which metrics should we return?
metrics = [
to_idx[metric]
for metric in args.metrics.split(",")
if not args.ignore_probs or metric not in {'a', 'v'}
]
# format results
result = scores[metrics]
if args.names:
metric_names = [metric_names[metric] for metric in args.metrics.split(",")]
result = np.array([metric_names, result]).T
np.savetxt(args.out, result, delimiter="\t", fmt='%s')