-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsupervised_experiment.py
73 lines (68 loc) · 2.39 KB
/
supervised_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from sklearn import svm, metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import KFold
from sklearn import metrics
import pickle
import numpy as np
import random
import time
min_terms_preprocess = 10000
dataset = 'all_data_' + str(min_terms_preprocess)+ '.pickle'
all_data = pickle.load(open(dataset,"r"))
file= open('supervised_experiment'+ str(min_terms_preprocess)+ '.txt','w')
print "Random Forest is loading..."
random.shuffle(all_data)
n_samples = len(all_data)
X = []
y = []
for value,label in all_data:
X.append(value)
y.append(label)
X = np.array(X)
y = np.array(y)
kf = KFold(n_samples, n_folds=10)
k = 0
start_time = time.time()
for train, test in kf:
rfc = RandomForestClassifier(n_estimators=100)
rfc.fit(X[train], y[train])
predicted = rfc.predict(X[train])
if (kf == 10):
start_time = time.time() # doesn't work because scoping
# file.write(str("Classification report for classifier %s \n%s\n" % (rfc, metrics.classification_report(y[test], predicted))))
file.write(str(rfc.score(X[test],y[test])))
file.write(str("\n"))
k+=1
file.write(str("Time it took to execute: " + str(time.time() - start_time) + " seconds."))
print "Random Forest Output presented in the .txt file"
print "Loading next classifier..."
# Load the preprocessed data
all_data = pickle.load(open(dataset,"r"))
print 'The Support Vector Machine is training ...' + str(min_terms_preprocess)
# initialize varables
n_samples = len(all_data)
X = []
y = []
# Partition the preprocessed data into labels and values(samples)
for value,label in all_data:
X.append(value)
y.append(label)
# convert lists into arrays
X = np.array(X)
y = np.array(y)
# K-Fold cross validation
kf = KFold(n_samples, n_folds=10, shuffle=True)
k = 0
#Build the classifier
classifier = svm.SVC(gamma=0.001).fit(X, y)
start_time = time.time()
#For each Kfold attempt to predict new values based on previous training
for train, test in kf:
predicted = classifier.predict(X[test])
if (kf == 10):
start_time = time.time()
file.write(str("Classification report for classifier %s for fold %d:\n%s\n" % (classifier, k, metrics.classification_report(y[test], predicted))))
k+=1
file.write(str("Time it took to execute: " + str(time.time() - start_time) + " seconds."))
print "Output displayed in txt file"
file.close