forked from brandonrobertz/autoscrape-py
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrain.py
executable file
·82 lines (64 loc) · 1.91 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import argparse
import pickle
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
def parse_args():
desc = "Convenience script for training autoscrape models."
parser = argparse.ArgumentParser(
description=desc
)
parser.add_argument(
"--data", type=str, required=True,
help="Input data pickle."
)
parser.add_argument(
"--output", type=str, required=True,
help="Fileame to output trained model to."
)
parser.add_argument(
"--model", type=str, default="kNN",
choices=["kNN", "SVC"],
help="Which classifier to use (default: kNN)."
)
return parser.parse_args()
def load_data(filepath):
print("Loading data...")
with open(filepath, "rb") as f:
return pickle.load(f)
if __name__ == "__main__":
args = parse_args()
MODEL = "k-NN"
X = []
y = []
data = load_data(args.data)
print("X shape", data.X.shape)
for i in range(data.X.shape[0]):
x = data.X[i]
if np.any(np.isnan(x)):
continue
X.append(x)
y.append(data.y[i][0])
X = np.array(X)
y = np.array(y)
if args.model == "kNN":
model = KNeighborsClassifier(1)
elif args.model == "SVC":
print("Fitting SVC model..")
model = SVC()
else:
raise NotImplementedError("Bad model selected: %s" % args.model)
# model.fit(X, y)
scores = cross_val_score(model, X, y, cv=3)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
# print("Predicting on training data...")
# y_pred = model.predict(X)
# print("Complete!")
# print(classification_report(y, y_pred))
print("Saving model...")
with open(args.output, "wb") as f:
pickle.dump(model, f)
print("Done!")