-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgrid_search.py
138 lines (122 loc) · 6.35 KB
/
grid_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
Find best hyper params of model by grid search.
Then, save them to hyper params configuration class.
"""
from configs import net_conf, params
from configs.net_conf import available_models, model_name_abbr_full
from configs.params import available_datasets
from models.model_factory import ModelFactory
from utils import tools
def tune_dropout_rate_SBLDModel():
model_name = available_models[1]
model_full_name = model_name_abbr_full[model_name]
print('============ ' + model_full_name + ' tune dropout rate ============')
# Don't set dropout rate too large, because it will cause information loss.
# According to previous experiment: lstm rate >= 0.5, 0 <= dense rate <= 0.2
lstm_p_dropouts = [0.5, 0.6, 0.7]
dense_p_dropouts = [0, 0.1, 0.2]
for lstm_rate in lstm_p_dropouts:
for dense_rate in dense_p_dropouts:
text_match_model = ModelFactory.make_model(model_name)
hyperparams = net_conf.get_hyperparams(model_name)
hyperparams.lstm_p_dropout = lstm_rate
hyperparams.dense_p_dropout = dense_rate
dataset_name = available_datasets[0]
dataset_params = params.get_dataset_params(dataset_name)
tools.train_model(text_match_model, hyperparams, dataset_params)
def tune_layer_num_SBLDModel():
run_this_model = available_models[1]
model_full_name = model_name_abbr_full[run_this_model]
print('============ ' + model_full_name + ' tune layer num ============')
# RNMTPlusEncoderBiLSTMDenseModel | StackedBiLSTMDenseModel
layer_nums = [0, 1, 2, 3]
for num in layer_nums:
text_match_model = ModelFactory.make_model(run_this_model)
hyperparams = net_conf.get_hyperparams(run_this_model)
hyperparams.bilstm_retseq_layer_num = num
dataset_name = available_datasets[0]
dataset_params = params.get_dataset_params(dataset_name)
tools.train_model(text_match_model, hyperparams, dataset_params)
def tune_l2_lambda_SBLDModel():
run_this_model = available_models[1]
model_full_name = model_name_abbr_full[run_this_model]
print('============ ' + model_full_name + ' tune l2 lambda ============')
# RNMTPlusEncoderBiLSTMDenseModel | StackedBiLSTMDenseModel
kernel_l2_lambdas = [1e-5, 1e-4]
recurrent_l2_lambdas = [1e-5, 1e-4]
bias_l2_lambdas = [1e-5, 1e-4]
activity_l2_lambdas = [0, 1e-5, 1e-4]
for kernel_l2_lambda in kernel_l2_lambdas:
for recurrent_l2_lambda in recurrent_l2_lambdas:
for bias_l2_lambda in bias_l2_lambdas:
for activity_l2_lambda in activity_l2_lambdas:
text_match_model = ModelFactory.make_model(run_this_model)
hyperparams = net_conf.get_hyperparams(run_this_model)
hyperparams.kernel_l2_lambda = kernel_l2_lambda
hyperparams.recurrent_l2_lambda = recurrent_l2_lambda
hyperparams.bias_l2_lambda = bias_l2_lambda
hyperparams.activity_l2_lambda = activity_l2_lambda
dataset_name = available_datasets[0]
dataset_params = params.get_dataset_params(dataset_name)
tools.train_model(text_match_model, hyperparams, dataset_params)
def tune_state_dim_SBLDModel():
run_this_model = available_models[1]
model_full_name = model_name_abbr_full[run_this_model]
print('============ ' + model_full_name + ' tune hidden state dim num ============')
# RNMTPlusEncoderBiLSTMDenseModel | StackedBiLSTMDenseModel
# The hidden state dim of LSTM should have a certain relationship with the word emb dim.
# Information will be lost if dim is set to small.
state_dims = [100, 200, 300, 400, 500, 600, 700]
for state_dim in state_dims:
text_match_model = ModelFactory.make_model(run_this_model)
hyperparams = net_conf.get_hyperparams(run_this_model)
hyperparams.state_dim = state_dim
dataset_name = available_datasets[0]
dataset_params = params.get_dataset_params(dataset_name)
tools.train_model(text_match_model, hyperparams, dataset_params)
def tune_dropout_rate_REBLDModel():
model_name = available_models[3]
model_full_name = model_name_abbr_full[model_name]
print('============ ' + model_full_name + ' tune dropout rate ============')
p_dropouts = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
for p_dropout in p_dropouts:
text_match_model = ModelFactory.make_model(model_name)
hyperparams = net_conf.get_hyperparams(model_name)
hyperparams.lstm_p_dropout = p_dropout
hyperparams.dense_p_dropout = p_dropout
dataset_name = available_datasets[0]
dataset_params = params.get_dataset_params(dataset_name)
tools.train_model(text_match_model, hyperparams, dataset_params)
def tune_enc_layer_num_TEBLDModel():
run_this_model = available_models[2]
model_full_name = model_name_abbr_full[run_this_model]
print('============ ' + model_full_name + ' tune enc layer num ============')
enc_layer_nums = [1, 2, 3, 4, 5, 6]
for layer_num in enc_layer_nums:
text_match_model = ModelFactory.make_model(run_this_model)
hyperparams = net_conf.get_hyperparams(run_this_model)
hyperparams.layers_num = layer_num
dataset_name = available_datasets[0]
dataset_params = params.get_dataset_params(dataset_name)
tools.train_model(text_match_model, hyperparams, dataset_params)
def tune_dropout_rate_TEBLDModel():
run_this_model = available_models[2]
model_full_name = model_name_abbr_full[run_this_model]
print('============ ' + model_full_name + ' tune dropout rate ============')
dropout_rates = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
for p_dropout in dropout_rates:
text_match_model = ModelFactory.make_model(run_this_model)
hyperparams = net_conf.get_hyperparams(run_this_model)
hyperparams.p_dropout = p_dropout
hyperparams.lstm_p_dropout = p_dropout
hyperparams.dense_p_dropout = p_dropout
dataset_name = available_datasets[0]
dataset_params = params.get_dataset_params(dataset_name)
tools.train_model(text_match_model, hyperparams, dataset_params)
if __name__ == '__main__':
# tune_dropout_rate_SBLDModel()
# tune_layer_num_SBLDModel()
# tune_l2_lambda_SBLDModel()
# tune_state_dim_SBLDModel()
# tune_dropout_rate_REBLDModel()
tune_dropout_rate_TEBLDModel()