-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_management.py
76 lines (55 loc) · 2.46 KB
/
data_management.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
from glob import glob
import os
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.externals import joblib
from keras.models import load_model
from keras.wrappers.scikit_learn import KerasClassifier
import model as m
import config
def load_image_paths(data_folder):
"""
Makes dataframe with image path and target
"""
images_df = []
# navigate within each folder
for class_folder_name in os.listdir(DATA_FOLDER):
class_folder_path = os.path.join(DATA_FOLDER, class_folder_name)
# collect every image path
for image_path in glob(os.path.join(class_folder_path, "*.png")):
tmp = pd.DataFrame([image_path, class_folder_name]).T
images_df.append(tmp)
# concatenate the final df
images_df = pd.concat(images_df, axis=0, ignore_index=True)
images_df.columns = ['image', 'target']
return images_df
def get_train_test_target(df):
X_train, X_test, y_train, y_test = train_test_split(images_df['image'], images_df['target'], test_size=0.20,
random_state=101)
X_train.reset_index(drop=True, inplace=True)
X_test.reset_index(drop=True, inplace=True)
y_train.reset_index(drop=True, inplace=True)
y_test.reset_index(drop=True, inplace=True)
return X_train, X_test, y_train,y_test
def save_pipeline_keras(model):
joblib.dump(model.named_steps['dataset'], config.PIPELINE_PATH)
joblib.dump(model.named_steps['cnn_model'].classes_,config.CLASSES_PATH)
model.named_steps['cnn_model'].model.save(config.MODEL_PATH)
def load_pipeline_keras():
datasets = joblib.load(config.PIPELINE_PATH)
build_model = lambda: load_model(config.MODEL_PATH)
classifier = KerasClassifier(build_fn = build_model,
batch_size = config.BATCH_SIZE,
validation_split = 10,
epochs = config.EPOCHS,
verbose = 2,
callbacks = m.callbacks_list,
# image_size = config.IMAGE_SIZE
)
classifier.classes = joblib.load(config.CLASSES_PATH)
classifier.model = build_model()
return Pipeline([
('dataset', dataset),
('cnn_model', classifier)
])