-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathutils.py
executable file
·149 lines (111 loc) · 5.07 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import numpy as np
from sklearn import metrics
from config import config
import pandas as pd
SUBGROUP_AUC = 'subgroup_auc'
BPSN_AUC = 'bpsn_auc' # stands for background positive, subgroup negative
BNSP_AUC = 'bnsp_auc' # stands for background negative, subgroup positive
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def compute_auc(y_true, y_pred):
try:
return metrics.roc_auc_score(y_true, y_pred)
except ValueError:
return np.nan
def compute_subgroup_auc(df, subgroup, label, model_name):
subgroup_examples = df[df[subgroup]]
return compute_auc(subgroup_examples[label], subgroup_examples[model_name])
def compute_bpsn_auc(df, subgroup, label, model_name):
"""Computes the AUC of the within-subgroup negative examples and the background positive examples."""
subgroup_negative_examples = df[df[subgroup] & ~df[label]]
non_subgroup_positive_examples = df[~df[subgroup] & df[label]]
examples = subgroup_negative_examples.append(non_subgroup_positive_examples)
return compute_auc(examples[label], examples[model_name])
def compute_bnsp_auc(df, subgroup, label, model_name):
"""Computes the AUC of the within-subgroup positive examples and the background negative examples."""
subgroup_positive_examples = df[df[subgroup] & df[label]]
non_subgroup_negative_examples = df[~df[subgroup] & ~df[label]]
examples = subgroup_positive_examples.append(non_subgroup_negative_examples)
return compute_auc(examples[label], examples[model_name])
def compute_bias_metrics_for_model(dataset,
subgroups,
model,
label_col,
include_asegs=False):
"""Computes per-subgroup metrics for all subgroups and one model."""
records = []
for subgroup in subgroups:
record = {
'subgroup': subgroup,
'subgroup_size': len(dataset[dataset[subgroup]])
}
record[SUBGROUP_AUC] = compute_subgroup_auc(dataset, subgroup, label_col, model)
record[BPSN_AUC] = compute_bpsn_auc(dataset, subgroup, label_col, model)
record[BNSP_AUC] = compute_bnsp_auc(dataset, subgroup, label_col, model)
records.append(record)
return pd.DataFrame(records).sort_values('subgroup_auc', ascending=True)
# Convert taget and identity columns to booleans
def convert_to_bool(df, col_name):
df[col_name] = np.where(df[col_name] >= 0.5, True, False)
def convert_dataframe_to_bool(df, list_cols):
bool_df = df.copy()
for col in list_cols:
convert_to_bool(bool_df, col)
return bool_df
def calculate_overall_auc(df, model_name):
true_labels = df[config.toxicity_column]
predicted_labels = df[model_name]
return metrics.roc_auc_score(true_labels, predicted_labels)
def power_mean(series, p):
total = sum(np.power(series, p))
return np.power(total / len(series), 1 / p)
def get_final_metric(bias_df, overall_auc, POWER=-5, OVERALL_MODEL_WEIGHT=0.25):
bias_score = np.average([
power_mean(bias_df[SUBGROUP_AUC], POWER),
power_mean(bias_df[BPSN_AUC], POWER),
power_mean(bias_df[BNSP_AUC], POWER)
])
return (OVERALL_MODEL_WEIGHT * overall_auc) + ((1 - OVERALL_MODEL_WEIGHT) * bias_score)
def scoring_valid(predict, identity_df, target, model_name='test_model', save_output=False):
valid_df = identity_df.copy()
valid_df['target'] = target
valid_df['quora_multitarget'] = predict
valid_df = convert_dataframe_to_bool(valid_df, ['target'] + config.identity_columns)
bias_metrics_df = compute_bias_metrics_for_model(valid_df, config.identity_columns, 'quora_multitarget', 'target')
if save_output:
bias_metrics_df.to_csv("z_bias_metrics_df.csv", index=False)
valid_df.to_csv("z_valid_df.csv", index=False)
score = get_final_metric(bias_metrics_df, calculate_overall_auc(valid_df, model_name))
return score
class ProgressMeter(object):
def __init__(self, num_batches, *meters, prefix=""):
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
self.meters = meters
self.prefix = prefix
def print(self, batch):
entries = [self.prefix + self.batch_fmtstr.format(batch)]
entries += [str(meter) for meter in self.meters]
print('\t'.join(entries))
def _get_batch_fmtstr(self, num_batches):
num_digits = len(str(num_batches // 1))
fmt = '{:' + str(num_digits) + 'd}'
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self, name, fmt=':f'):
self.name = name
self.fmt = fmt
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def __str__(self):
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
return fmtstr.format(**self.__dict__)