-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollinear_dataset.py
37 lines (28 loc) · 938 Bytes
/
collinear_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from linearRegression.linearRegression import LinearRegression
from metrics import *
np.random.seed(42)
N = 30
print("----------------------------------- Multi collinear ----------------------------------")
P = 4
X = pd.DataFrame(np.random.randn(N, P))
y = pd.Series(np.random.randn(N))
X[P] = X.iloc[:][P-1]*6
# print(X)
LR = LinearRegression(fit_intercept=True)
LR.fit_vectorised(X, y)
y_hat = LR.predict(X)
print('RMSE: ', rmse(y_hat, y))
print('MAE: ', mae(y_hat, y))
print("----------------------------------------- Normal dataset -------------------------------------")
P = 5
Xnew = pd.DataFrame(np.random.randn(N, P))
ynew = pd.Series(np.random.randn(N))
# print(Xnew)
LRnew = LinearRegression(fit_intercept=True)
LRnew.fit_vectorised(Xnew, ynew)
y_hatnew = LRnew.predict(Xnew)
print('RMSE: ', rmse(y_hatnew, ynew))
print('MAE: ', mae(y_hatnew, ynew))