-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLR_NB.py
125 lines (100 loc) · 4.66 KB
/
LR_NB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from Preprocessing import Load_mushroom_dataset, Load_MushroomDataset_with_Splits
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
import time as time
class Mushroom:
def __init__(self, x_data, y_data, x_data_pca):
self.x_data = x_data
self.y_data = y_data
self.x_data_pca = x_data_pca
def classifiers(self):
classifiers = {'Naive Bayes':BernoulliNB(), 'Logistic Regression': LogisticRegression(solver='newton-cg')}
accuracies = []
for key, val_classifier in classifiers.items():
# Using the features after PCA for Logistic regression alone.
if(key == "Logistic Regression"):
x_data = self.x_data_pca
y_data = self.y_data
else:
x_data = self.x_data
y_data = self.y_data
model = val_classifier
# Training the model with cross validation
accuracy = self.fit_predict_crossvalidate(key,model,x_data,y_data)
accuracies.append(accuracy)
return accuracies
# Training the mode with Cross validation
def fit_predict_crossvalidate(self, estimator_name,estimator,x_data,y_data):
start_time = time.time()
scoring = ['accuracy', 'f1', 'precision', 'recall']
# 10 fold cross validation(stratified, shuffled)
scores = cross_validate(estimator, x_data, y_data, cv=10, scoring=scoring)
avg_accuracy = scores['test_accuracy'].mean()
avg_f1 = scores['test_f1'].mean()
avg_precision = scores['test_precision'].mean()
avg_recall = scores['test_recall'].mean()
end_time = time.time()
# calculate total time
total_time = end_time - start_time
# display accuracy and time
print("-------------------------------------")
print("Cross validation results ")
print("-------------------------------------")
print("Classifier:", estimator_name)
print("Accuracy:", (avg_accuracy * 100), "%")
# print("F1 score:", (avg_f1 * 100), "%")
# print("Precision score:", (avg_precision * 100), "%")
# print("Recall score:", (avg_recall * 100), "%")
print("Time:", total_time, "sec")
print("-------------------------------------")
return avg_accuracy
# Training the models with train test split and without cross validation
def fit_predict(self):
x_train, x_test, y_train, y_test, x_train_pca, x_test_pca, y_train_pca, y_test_pca = Load_MushroomDataset_with_Splits()
classifiers = {'Naive Bayes': BernoulliNB(),'Logistic Regression':LogisticRegression(solver='newton-cg')}
for key, val_classifier in classifiers.items():
if(key == "Logistic Regression"):
x_train_data = x_train_pca
x_test_data = x_test_pca
y_train_data = y_train_pca
y_test_data = y_test_pca
else:
x_train_data = x_train
y_train_data = y_train
x_test_data = x_test
y_test_data = y_test
model = val_classifier
start_time = time.time()
# train the classifier
model.fit(x_train_data, y_train_data)
# test the classifier
predicted = model.predict(x_test_data)
# calculating accuracy
accuracy = accuracy_score(y_test_data, predicted)
# end time
end_time = time.time()
# calculate total time
total_time = end_time - start_time
# display accuracy and time
print("-------------------------------------")
print("Without Cross validation results ")
print("-------------------------------------")
print("Classifier:",key)
print("Accuracy:", (accuracy * 100), "%")
print("Time:", total_time, "sec")
print("-------------------------------------")
def Classify_Mushroom():
x_data, y_data, x_data_pca = Load_mushroom_dataset()
mushroom_model = Mushroom(x_data, y_data, x_data_pca)
mushroom_model.fit_predict()
accuracy = mushroom_model.classifiers()
return accuracy
def time_classify():
x_data, y_data, x_data_pca = Load_mushroom_dataset()
mushroom_model = Mushroom(x_data, y_data, x_data_pca)
mushroom_model.fit_predict()
total_time = mushroom_model.classifiers()
return total_time
Classify_Mushroom()