-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpostprocessing.py
86 lines (69 loc) · 4.12 KB
/
postprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from math import sqrt
import numpy as np
from scipy.stats import norm, normaltest, ttest_ind, ks_2samp
from statsmodels.stats.power import TTestIndPower
# two submissions means we can always get championship game correct
def override_final_predictions(slots, seeds, matchups, predictions, new_value):
diff_predictions = list(predictions)
for idx, matchup in enumerate(matchups):
if _possible_tourney_final(slots, seeds, matchup):
diff_predictions[idx] = new_value
return diff_predictions
def _possible_tourney_final(slots, seeds, matchup):
year, teama, teamb = matchup.split('_')
teama_region = seeds[year][teama][0]
teamb_region = seeds[year][teamb][0]
(champ_regions1, champ_regions2) = slots[year]
return ((champ_regions1.find(teama_region) > -1 and champ_regions2.find(teamb_region) > -1) or
(champ_regions2.find(teama_region) > -1 and champ_regions1.find(teamb_region) > -1))
# https://www.pro-football-reference.com/about/win_prob.htm
def mov_to_win_percent(u, m=11, offset=0):
u = u + offset
return 1 - norm.cdf(0.5, loc=u, scale=m) + .5 * (norm.cdf(0.5, loc=u, scale=m) - norm.cdf(-0.5, loc=u, scale=m))
def average_prediction_probas(regression_models, classification_models, X, low_clip=.01, high_clip=.99):
predictions = [model.predict(X) for model in regression_models] + [model.predict_proba(X)[:, -1] for model in classification_models]
return np.clip(np.mean(np.array(predictions), axis=0), low_clip, high_clip) # clip based on historical upset probabilities for R1
#TODO instead of clipping it would make sense to push R1 1/2 seed predictions closer to .99/.93
def average_predictions(regression_models, classification_models, X):
predictions = [np.reshape([1 if yi >= .5 else 0 for yi in model.predict(X)], (X.shape[0])) for model in regression_models] + \
[np.reshape(model.predict(X), (X.shape[0])) for model in classification_models]
return np.rint(np.mean(np.array(predictions), axis=0))
# https://machinelearningmastery.com/use-statistical-significance-tests-interpret-machine-learning-results/
# https://machinelearningmastery.com/a-gentle-introduction-to-the-bootstrap-method/
# https://machinelearningmastery.com/calculate-bootstrap-confidence-intervals-machine-learning-results-python/
# tells if one model is really better than another
# null hypothesis H0 is that both samples drawn from same distribution, i.e. models work equally well
# false is same distribution (fail to reject H0 and no statistical significance), true is 95% confidence means are different
# type I error is erroneously rejecting the null hypothesis
# type II error is erroneously not rejecting the null hypothesis
def significance_test(vals1, vals2):
if _normal(vals1) and _normal(vals2):
return _t_test(vals1, vals2) <= .05
return _ks_test(vals1, vals2) <= .05
def _normal(vals):
_, p = normaltest(vals)
return p >= 0.5
def _t_test(vals1, vals2):
var = np.std(vals1) == np.std(vals2)
_, p = ttest_ind(vals1, vals2, equal_var=var) # student is equal var, welch's is unequal var
return p
def _ks_test(vals1, vals2):
_, p = ks_2samp(vals1, vals2) # kolmogorov-smirnov
return p
# tells how good final model is
def confidence_intervals(vals, alpha=.95):
p = ((1.0-alpha)/2.0) * 100
lower = max(0.0, np.percentile(vals, p))
p = (alpha+((1.0-alpha)/2.0)) * 100
upper = min(1.0, np.percentile(vals, p))
return lower, upper
# https://www.kdnuggets.com/2019/01/comparing-machine-learning-models-statistical-vs-practical-significance.html
# quantifies size of effect if statistically significant, small if less than .1 or .3
def effect_size(vals1, vals2):
p = _t_test(vals1, vals2) if _normal(vals1) and _normal(vals2) else _ks_test(vals1, vals2)
return abs(norm.ppf(p)) / sqrt(len(vals1))
# probability of true positive (only useful when null hypothesis rejected), used to estimate min sample size
# defaults 20% chance of type II error, 5% chance type I error
def statistical_power(effect=.8, power=.8, alpha=.05):
analysis = TTestIndPower()
return analysis.solve_power(effect, power=power, nobs1=None, ratio=1.0, alpha=alpha)