-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathML_function.py
104 lines (74 loc) · 2.35 KB
/
ML_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
from scipy import stats as st
np.set_printoptions(suppress=True) #to discard the scientific notation
def ML_regression(X,Y):
#a column of ones is created to add to the explanatory variable matrix
column_of_ones = np.ones((len(X),1), dtype=int)
column_of_ones
#concatenating the explanatory variables matrix with the column of ones
X_final = np.concatenate((column_of_ones,X), axis=1)
#the normal equation
res1 = np.dot(X_final.T,X_final)
res2 = np.linalg.inv(res1)
res3 = np.dot(res2,X_final.T)
res4 = np.dot(res3,Y)
print("Coefficients: ",res4.reshape(res4.shape[0],).tolist())
#Y-pred
Y_pred = np.dot(X_final,res4)
#Sum of Squared Errors (SSE)
#E = Errors
E = Y - Y_pred
sse = np.dot(E.T,E)
sse
print("SSE: ",sse.reshape(sse.shape[0],).tolist())
#Mean of Squared Errors (MSE)
E = Y - Y_pred
n = len(Y)
mse = sse / (n - X.shape[1])
print("MSE: ", mse.reshape(mse.shape[0],).tolist())
#Standard Errors
se = math.sqrt(mse)
print("SE: ",se)
#a list of variable standard errors
se_variables = np.sqrt(np.diag(mse*res2)).tolist()
#a list of betas
betalist = res4.reshape(res4.shape[0],).tolist()
#a dictionary of beta - se tuples to be used in the CI formula
d1 = dict(zip(betalist,se_variables))
#t-stat
#%95 confidence
t = st.t.ppf(1-0.025,n-X.shape[1])
print("t-stat: ", t)
#%95 Confidence Intervals
#Lower CI's
#intercept lower
#GDP lower
#Undernourishment lower
for beta in d1:
print("Lower CI's: ", beta-(t*d1[beta]))
#Upper CI's
#intercept upper
#GDP upper
#Undernourishment upper
for beta in d1:
print("Upper CI's: ", beta+(t*d1[beta]))
#t-values
t_list = []
#intercept t-value
#GDP t-value
#Undernourishment t-value
for beta in d1:
beta/d1[beta]
t_list.append(beta/d1[beta])
print("t-values: ", t_list)
#intercept t
#GDP t
#Undernourishment t
for t_v in t_list:
if t_v >= t:
print('Null hypothesis rejected.')
else:
print("Failed to reject the null hypothesis.")