This repository has been archived by the owner on Aug 11, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsession3.py
111 lines (82 loc) · 3.22 KB
/
session3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
# from sklearn import svm
# from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt
################################################
############ Cross Validation ###########
################################################
########### train_test_split
from sklearn.model_selection import train_test_split
boston = datasets.load_boston()
print(boston.data.shape, boston.target.shape)
X_train, X_test, y_train, y_test = train_test_split(
boston.data, boston.target, test_size=0.1, random_state=0)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
regr = LinearRegression()
regr.fit(X_train,y_train)
print(regr.score(X_train,y_train))
predictions= regr.predict(X_test)
##plot
plt.scatter(y_test, predictions)
plt.xlabel("True Values")
plt.ylabel("Predictions")
# model = svm.SVC(kernel='linear', C=1)
# scores = cross_val_score(model , boston.data, boston.target, cv=10)
# print(scores)
############ KFold
from sklearn.model_selection import KFold
# X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
# y = np.array([1, 2, 3, 4])
kf = KFold(n_splits=5)
kfsplit = kf.get_n_splits(boston.data)
print(kf)
kfoldregr = LinearRegression()
# kfoldregr.fit(X_train,y_train)
# print(kfoldregr.score(X_train,y_train))
# print(kfoldregr.predict(X_test))
KFoldScore=[]
for train_index, test_index in kf.split(boston.data):
print("TRAIN:", train_index, "TEST:", test_index)
X_train, X_test = boston.data[train_index], boston.data[test_index]
y_train, y_test = boston.target[train_index], boston.target[test_index]
kfoldregr.fit(X_train, y_train)
KFoldScore.append(kfoldregr.score(X_train, y_train))
print(KFoldScore)
######## Leave One Out Cross Validation
from sklearn.model_selection import LeaveOneOut
X = np.array([[1, 2], [3, 4]])
y = np.array([1, 2])
loo = LeaveOneOut()
loo.get_n_splits(X)
for train_index, test_index in loo.split(X):
print("TRAIN:", train_index, "TEST:", test_index)
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
print(X_train, X_test, y_train, y_test)
########## Little sample of cross validation
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn import metrics
lm = LinearRegression()
scores = cross_val_score(lm, boston.data, boston.target, cv=6)
print("Cross-validated scores:", scores)
predictions = cross_val_predict(lm,boston.data, boston.target, cv=6)
plt.scatter(boston.target, predictions)
r2_score = metrics.r2_score(boston.target, predictions)
print("Cross-Predicted R^2",r2_score)
##########################################
############## preprocessing
##########################################
from sklearn import preprocessing
X_train, X_test, y_train, y_test = train_test_split(
boston.data, boston.target, test_size=0.4, random_state=0)
scaler = preprocessing.StandardScaler().fit(X_train)
X_train_transformed = scaler.transform(X_train)
clf = regr.fit(X_train_transformed, y_train)
X_test_transformed = scaler.transform(X_test)
print(clf.score(X_train_transformed,y_train))
print(regr.predict(X_test_transformed))
5+2