-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathgenome_neural_network.py
542 lines (449 loc) · 28.8 KB
/
genome_neural_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
from deconstruct_genome import DeconstructGenome
from neural_network import *
from genome import *
from gene import *
from neural_network_components import *
class GenomeNeuralNetwork:
def __init__(self, genome, x_train, y_train, create_weights_bias_from_genome, activation_type, learning_rate=0.0001,
num_epochs=1000, batch_size=64, show_connections=False):
"""
:param genome: The genome class from which the the neural network will be made from
:param x_train: The x_training data in the shape (num_examples, num_features)
:param y_train: The y_training data in the shape (num_examples, 1) only one column indicating which category
:param create_weights_bias_from_genome: True or False if you want the neural network to initialise with the weights
from the genome connections and the biases from the genome nodes
:param activation_type: Specify the activation type for each layer excluding the last layer. This will be used
for every layer apart from the last.
:param learning_rate: Learning rate
:param num_epochs: Number of iterations to run
:param batch_size: Batch size since it is coded with stochastic gradient descent
:param show_connections: True or false if the connections for the neural network AFTER it has been unpack from
the genome are needed to be shown
"""
self.genome = genome
# This is a matrix containing 1's and 0's depending on if there is a connection or not for each weight matrix
# in the neural network
self.connection_matrices_per_layer = genome.connection_matrices_per_layer
# Similar to above, contains a matrix for the biases for which nodes should never have a bias applied (due to
# being a dummy node)
self.no_activations_matrix_per_layer = genome.no_activations_matrix_per_layer
# A list of connections which will always have a constant 1 connection value
self.constant_weight_connections = genome.constant_weight_connections
# The number of nodes for each layer
self.nodes_per_layer = genome.nodes_per_layer
# A dictionary with which position a node is in for it's respective layer
self.node_map = genome.node_map
# Dictionary with the connections for each layer (so weight matrix can be made)
self.layer_connections_dict = genome.layer_connections_dict
# An updated list of the nodes. Contains dummys nodes as well.
self.updated_nodes = genome.updated_nodes
# A dictionary with the nodes for each layer
self.layer_nodes = genome.layer_nodes
# A dict containing which layer each node is on
self.node_layers = genome.node_layers
# Key: input_node for a connection that spans multiple layers, value: the last dummy node of the set of connections to reach the final node in the connection
self.last_dummy_related_to_connection = genome.last_dummy_related_to_connection
self.x_train = x_train
self.y_train = y_train
self.batch_size = batch_size
# Where weights are saved
self.weights_dict = {}
# Where bias for every layer is saved
self.bias_dict = {}
# Dictionary of activation functions for each layer
self.activation_function_dict = {}
self.learning_rate = learning_rate
self.num_epochs = num_epochs
# Minus one because we include the data input as a layer in 'nodes_per_layer'
self.num_layers = max(self.nodes_per_layer) - 1
# So far there have only been two implementations for the activation type
assert (activation_type in ('relu', 'sigmoid'))
# Set the activation functions for each layer
self.initialise_activation_functions(activation_type=activation_type)
self.initialise_parameters(have_bias=True,
create_weight_bias_matrix_from_genome=create_weights_bias_from_genome)
self.x_train = self.modify_x_data_for_sources()
# This is to check that there is an activation function specified for each layer
assert (len(self.connection_matrices_per_layer) == len(self.activation_function_dict))
# The activation function for the last layer should be a sigmoid due to how the gradients were calculated
assert (self.activation_function_dict[len(self.connection_matrices_per_layer)] == ActivationFunctions.sigmoid)
if self.x_train.shape[1] != self.weights_dict[1].shape[0]:
raise Exception('The training data has not been correctly modified to fit the connections')
if show_connections:
for key, value in self.layer_connections_dict.items():
print('Layer: {}'.format(key), 'Connections: {}'.format(value), '\n')
def initialise_activation_functions(self, activation_type):
for layer in range(1, self.num_layers):
self.activation_function_dict[
layer] = ActivationFunctions.relu if activation_type == 'relu' else ActivationFunctions.sigmoid
# The last activation should always be a sigmoid because that's how the gradients are calculated
self.activation_function_dict[self.num_layers] = ActivationFunctions.sigmoid
def ensure_correct_weights(self):
"""
Goes through the weights dict to ensure the where there are no connections the weight is set to zero, and where
the connection should be a constant 1, that it is set as such.
"""
for layer in self.weights_dict:
# Zeroes out the connections which there isn't one
self.weights_dict[layer] *= self.connection_matrices_per_layer[layer]
# All the connections where the connection should have a constant one connection are set to one
for connection in self.constant_weight_connections[layer]:
# Need to convert to their position in their layer
# Minus one because of python indexing
input_position_within_layer = self.node_map[connection.input_node] - 1
output_position_within_layer = self.node_map[connection.output_node] - 1
# Set the weight to one
self.weights_dict[layer][input_position_within_layer, output_position_within_layer] = 1
def ensure_correct_bias(self):
"""
Zeroes out the bias values where the node is a dummy node
"""
for layer in range(1, self.num_layers + 1):
# Broadcast to fit the batch_size and also multiply by the matrix which contains which bias values should
# be zero
self.bias_dict[layer] *= self.no_activations_matrix_per_layer[layer]
@staticmethod
def xavier_initalizer(num_inputs, num_outputs):
"""
NOTE: if using RELU then use constant 2 instead of 1 for sqrt
"""
np.random.seed(7)
weights = np.random.randn(num_inputs, num_outputs) * np.sqrt(2 / num_inputs)
return weights
def create_weight_bias_matrix_from_genome(self):
# connections_dict_by_nodes = {}
# for connection in self.genome.connections.values():
# connections_dict_by_nodes[(connection.input_node, connection.output_node)] = connection
#
# try:
# for layer in range(1, self.num_layers + 1):
# for layer_connection in self.layer_connections_dict[layer]:
# connection = connections_dict_by_nodes[(layer_connection.input_node, layer_connection.output_node)]
# # connection = connections_dict_by_nodes.get((layer_connection.input_node, layer_connection.output_node))
# # if connection and connection.enabled:
# if connection.enabled:
# # They both need to be set if we're going to create the weight matrix from them
# if connection.weight is None:
# raise ValueError('You have not set a weight for this connection')
# if self.updated_nodes[connection.output_node].bias is None:
# raise ValueError('The node doesnt have a bias value, please set one in the nodes list')
#
# # Get their relative position inside their respective layer. Minus one for python indexing
# input_node_position = self.node_map[connection.input_node] - 1
# output_node_position = self.node_map[connection.output_node] - 1
# self.weights_dict[layer][input_node_position, output_node_position] = connection.weight
# self.bias_dict[layer][0, output_node_position] = self.genome.nodes[connection.output_node].bias
# except:
# raise Exception('Error in initialising weight and bias matrices')
for layer in range(1, self.num_layers + 1):
for connection in self.layer_connections_dict[layer]:
if connection.enabled:
# They both need to be set if we're going to create the weight matrix from them
if connection.weight is None:
raise ValueError('You have not set a weight for this connection')
if self.updated_nodes[connection.output_node].bias is None:
raise ValueError('The node doesnt have a bias value, please set one in the nodes list')
# Get their relative position inside their respective layer. Minus one for python indexing
input_node_position = self.node_map[connection.input_node] - 1
output_node_position = self.node_map[connection.output_node] - 1
self.weights_dict[layer][input_node_position, output_node_position] = connection.weight
self.bias_dict[layer][0, output_node_position] = self.updated_nodes[connection.output_node].bias
def initialise_parameters(self, create_weight_bias_matrix_from_genome, have_bias=False):
"""
:param create_weight_bias_matrix_from_genome: A boolean of whether we should use the weights already given to each gene
:param have_bias: Indicates whether to intialise a bias parameter as well
"""
# Initialise parameters
for layer in range(1, self.num_layers + 1):
# We multiply by the connection_matrices_per_layer because zeroes out the weights where there isn't a
# connection between the nodes. Add one because nodes per_layer counts the first layer as the data_inputs.
# So indexing nodes_per_layer[1] would actually give you the number of features in the training set.
self.weights_dict[layer] = self.xavier_initalizer(
num_inputs=self.nodes_per_layer[layer],
num_outputs=self.nodes_per_layer[layer + 1])
if have_bias:
# Shape is (1, num_outputs) for the layer
self.bias_dict[layer] = np.zeros((1, self.nodes_per_layer[layer + 1]))
if create_weight_bias_matrix_from_genome:
self.create_weight_bias_matrix_from_genome()
# Zeroes out connections where there aren't any and sets the constant connections to one
self.ensure_correct_weights()
# Zeroes out the bias values for each layer where there is a dummy node
self.ensure_correct_bias()
def run_one_pass(self, input_data, labels=None, return_cost_only=False, return_prediction_only=False):
"""
One pass counts as one forward propagation and one backware propogation including the optimisation of the
paramters
:param return_prediction_only: Boolean on whether just to return the predictions
:param labels: The correct labels
:param input_data: the input data used to train
:type return_cost_only: True or false of it you just want the cost instead of optimising as well
:return: The cost for the current step
"""
n_examples = input_data.shape[0]
# We have to modify the input data incase one of the features isn't present in the genome connections (i.e. it got removed due to a mutation)
# This automatically gets done to the data which is passed to the GenomeNeuralNetwork initialisation, but since
# we allow custom input data for this function we must do the check again
if self.x_train.shape[1] != input_data.shape[1]:
input_data = self.modify_x_data_for_sources(x_data=input_data)
prediction, layer_input_dict = ForwardProp.genome_forward_prop(num_layers=self.num_layers,
initial_input=input_data,
layer_weights=self.weights_dict,
layer_biases=self.bias_dict,
layer_activation_functions=self.activation_function_dict,
keep_constant_connections=self.constant_weight_connections,
node_map=self.node_map)
if return_prediction_only:
return prediction
# Because labels is optional, if the function hasn't already returned, labels should exist.
assert (labels is not None)
# Asserting that the prediction gives the same number of outputs as expected
assert (labels.shape[0] == prediction.shape[0])
# Define cost function
loss = -((labels * np.log(prediction)) + ((1 - labels) * np.log(1 - prediction)))
cost = (1 / n_examples) * np.sum(loss + 1e-8, axis=0)
# We optimise in the case where we're not just interested in the cost
if not return_cost_only:
# Excluded bias gradients here
weight_gradients, bias_gradients = BackProp.back_prop(num_layers=self.num_layers,
layer_inputs=layer_input_dict,
layer_weights=self.weights_dict,
layer_activation_functions=self.activation_function_dict,
expected_y=labels, predicted_y=prediction)
self.optimise_parameters(weight_gradients=weight_gradients, bias_gradients=bias_gradients)
return cost[0]
def optimise_parameters(self, weight_gradients, bias_gradients=None):
"""
:param weight_gradients: Dictionary containing weight gradients for each layer
:param bias_gradients: Dictionary containing bias gradients for each layer
"""
for layer_number in weight_gradients:
self.weights_dict[layer_number] = self.weights_dict[layer_number] - (
self.learning_rate * weight_gradients[layer_number])
if bias_gradients is not None:
self.bias_dict[layer_number] = self.bias_dict[layer_number] - (
self.learning_rate * bias_gradients[layer_number])
# Zeroes out connections where there aren't any and sets the constant connections to one
self.ensure_correct_weights()
# Zeroes out the bias values for each layer where there is a dummy node
self.ensure_correct_bias()
def update_genes(self):
"""
Here we are updating all the connection weights with the optimised weights, after the neural network for the
genome has run
:return:
"""
for connection in self.genome.connections.values():
if connection.enabled:
input_node_layer = self.node_layers[connection.input_node]
output_node_layer = self.node_layers[connection.output_node]
layer_span = output_node_layer - input_node_layer
if layer_span == 1:
# Minus one because of python indexing
input_node_position = self.node_map[connection.input_node] - 1
output_node_position = self.node_map[connection.output_node] - 1
connection_weight = self.weights_dict[output_node_layer - 1][
input_node_position, output_node_position]
# TODO: Improve implementation
# I'm not sure why I have layer connections and the connections in a genome as seperate. They should be pointing to the same connection instance.
# This is just a workaround for now.
for layer_connection in self.genome.layer_connections_dict[input_node_layer]:
if (layer_connection.input_node, layer_connection.output_node) == (
connection.input_node, connection.output_node):
layer_connection.weight = connection_weight
break
# Minus one because node_layers counts the first layer as a layer where as the weights dict doesn't
connection.weight = connection_weight
# If the connection spans multiple layers
else:
try:
# Start at one because layers start at one
last_dummy_node = self.last_dummy_related_to_connection[
(connection.input_node, connection.output_node)]
# Minus one because of python indexing
last_dummy_node_position = self.node_map[last_dummy_node] - 1
# This is the position within the layer
output_node_position = self.node_map[connection.output_node] - 1
last_dummy_node_layer = self.node_layers[last_dummy_node]
connection_weight = self.weights_dict[output_node_layer - 1][
last_dummy_node_position, output_node_position]
# TODO: Improve implementation
# I'm not sure why I have layer connections and the connections in a genome as seperate. They should be pointing to the same connection instance.
# This is just a workaround for now.
for layer_connection in self.genome.layer_connections_dict[last_dummy_node_layer]:
if (layer_connection.input_node, layer_connection.output_node) == (
last_dummy_node, connection.output_node):
layer_connection.weight = connection_weight
break
# Minus one because node_layers counts the first layer as a layer where as the weights dict doesn't
connection.weight = connection_weight
except KeyError as e:
raise Exception(e)
for node in self.genome.nodes.values():
node_layer = self.node_layers.get(node.node_id)
# Can't modify the bias of a source node and if the node_layer is not found it means the connection it is
# related to is not enabled
if node.node_type != 'source' and node_layer:
# These should be the same value since we're updating two instances of it due to poor implementation on my part
updated_node = self.genome.updated_nodes[node.node_id]
# Minus one for indexing
node_position = self.node_map[node.node_id] - 1
# Minus one because node_layers counts the first layer as a layer where as the bias dict doesn't
bias_value = self.bias_dict[node_layer - 1][0, node_position]
updated_node.bias = bias_value
node.bias = bias_value
def optimise(self, print_epoch, error_stop=None):
"""
Train the neural network
:param print_epoch: Boolean, print the cost every epoch or not
:param error_stop: whether to stop if a certange error threshold is reached
:return: a list of each epoch with the cost associated with it
"""
epoch_list = []
cost_list = []
for epoch in range(self.num_epochs):
for batch_start in range(0, self.x_train.shape[0], self.batch_size):
current_batch = self.x_train[batch_start:batch_start + self.batch_size, :]
current_labels = self.y_train[batch_start:batch_start + self.batch_size, :]
epoch_cost = self.run_one_pass(input_data=current_batch, labels=current_labels)
epoch_list.append(epoch)
cost_list.append(epoch_cost)
# Finish early if it is optimised to a certain error
if error_stop and epoch_cost < error_stop:
break
# Print progress every 10%
# if epoch % (self.num_epochs * 0.1) == 0 and epoch != 0:
# percentage_complete = (epoch / self.num_epochs) * 100
# print('Optimising {}% done...'.format(percentage_complete))
if print_epoch:
print('EPOCH:', epoch, 'Cost:', round(epoch_cost, 3))
first_cost = cost_list[0]
# Check progress quarter of the way through
if epoch == (self.num_epochs // 4):
quarter_cost = cost_list[self.num_epochs // 4]
percentage_increase_quarter = (1 - quarter_cost / first_cost) * 100
print('Beginning cost: {}'.format(round(first_cost, 3)))
print('Quarter cost: {}'.format(round(quarter_cost, 3)))
print(
'Percentage increase from beginning to quarter of the way: {}'.format(
round(percentage_increase_quarter, 1)))
if percentage_increase_quarter < 1:
print('STOPPING OPTIMISATION DUE TO POOR PERFORMANCE')
break
# Check progress halfway through
if epoch == (self.num_epochs // 2):
midway_cost = cost_list[self.num_epochs // 2]
percentage_increase_midway = (1 - midway_cost / first_cost) * 100
percentage_increase_between_quarter_and_midway = (1 - midway_cost / quarter_cost) * 100
print('Midway cost: {}'.format(round(midway_cost, 3)))
print('Percentage increase from beginning to halfway: {}'.format(round(percentage_increase_midway, 1)))
print('Percentage increase from quarter to halfway: {}'.format(
round(percentage_increase_between_quarter_and_midway, 1)))
if percentage_increase_quarter == percentage_increase_midway \
or percentage_increase_quarter > percentage_increase_midway \
or percentage_increase_between_quarter_and_midway < 1:
print('STOPPING OPTIMISATION DUE TO POOR PERFORMANCE')
break
# Check progress 3/4 of the way
if epoch == (self.num_epochs * 3 // 4):
three_quarters_cost = cost_list[self.num_epochs * 3 // 4]
percentage_increase_three_quarters = (1 - three_quarters_cost / first_cost) * 100
percentage_increase_between_midway_and_three_quarters = (1 - three_quarters_cost / midway_cost) * 100
print('Three quarters cost: {}'.format(round(three_quarters_cost, 3)))
print('Percentage increase from beginning to halfway: {}'.format(
round(percentage_increase_three_quarters, 1)))
print('Percentage increase from midway to three quarters: {}'.format(
round(percentage_increase_between_midway_and_three_quarters, 1)))
if percentage_increase_midway == percentage_increase_three_quarters \
or percentage_increase_midway > percentage_increase_three_quarters \
or percentage_increase_between_midway_and_three_quarters < 1:
print('STOPPING OPTIMISATION DUE TO POOR PERFORMANCE')
break
# # If the cost is getting worse we stop the algorithm
# if cost_list[epoch] > cost_list[epoch-1]:
# print('STOPPING OPTIMISATION DUE TO POOR PERFORMANCE COST GETTING WORSE')
# break
print('Optimising 100% done...')
self.update_genes()
return epoch_list, cost_list
def modify_x_data_for_sources(self, x_data=None):
"""
Since not every genome will have a connection for all the source nodes, we must account for this in the
training data by removing features if there isn't a connection to it.
"""
# x_data = self.x_train
if x_data is None:
x_data = self.x_train
source_nodes = {}
# Find the source nodes in the genome
for node in self.genome.nodes.values():
if node.node_type == 'source':
if node not in source_nodes:
source_nodes[node.node_id] = 0
# Check how many connections there are for each source node
for connection in self.genome.connections.values():
if connection.enabled:
if connection.input_node in source_nodes:
source_nodes[connection.input_node] += 1
not_connection_sources = set()
has_connection_sources = set()
# Filter the one's which don't have any connections
for node_id, connections_amount in source_nodes.items():
if connections_amount == 0:
not_connection_sources.add(node_id)
else:
has_connection_sources.add(node_id)
# Keeps track of which positions (i.e. columns) will be deleted from x_data
node_positions_to_delete = []
# Account for the fact that a source node might be deleted
if len(source_nodes) != x_data.shape[1]:
possible_positions = set(range(x_data.shape[1]))
included_position_by_node = set()
for node_id in has_connection_sources:
included_position_by_node.add(self.node_map[node_id] - 1)
for node_position in possible_positions:
if node_position not in included_position_by_node:
node_positions_to_delete.append(node_position)
# Delete the columns for the source nodes which don't have connections
for node in not_connection_sources:
# Get the position of the node inside it's own layer. Minus 1 for python indexing
node_position = self.node_map[node] - 1
node_positions_to_delete.append(node_position)
# Remove duplicated
node_positions_to_delete = list(set(node_positions_to_delete))
# Put the highest position first, and delete it first so that if there is more than one needed to be deleted,
# the index is not now messed up due to previous deletion
node_positions_to_delete.sort(reverse=True)
for node_position in node_positions_to_delete:
if node_position < x_data.shape[1]:
x_data = np.delete(x_data, node_position, axis=1)
return x_data
def main():
node_list = [NodeGene(node_id=1, node_type='source'),
NodeGene(node_id=2, node_type='source'),
NodeGene(node_id=3, node_type='hidden', bias=0.5),
NodeGene(node_id=4, node_type='hidden', bias=-1.5),
NodeGene(node_id=5, node_type='output', bias=1.5)]
connection_list = [ConnectionGene(input_node=1, output_node=5, innovation_number=1, enabled=True, weight=9),
ConnectionGene(input_node=1, output_node=3, innovation_number=2, enabled=True, weight=2),
ConnectionGene(input_node=2, output_node=3, innovation_number=3, enabled=True, weight=3),
ConnectionGene(input_node=2, output_node=4, innovation_number=4, enabled=True, weight=4),
ConnectionGene(input_node=2, output_node=5, innovation_number=5, enabled=True, weight=3),
ConnectionGene(input_node=3, output_node=5, innovation_number=6, enabled=True, weight=5),
ConnectionGene(input_node=4, output_node=5, innovation_number=7, enabled=True, weight=7)]
genome = Genome(nodes=node_list, connections=connection_list, key=1)
# Test and Train data
data_train, labels_train = create_data(n_generated=5000)
genome_nn = GenomeNeuralNetwork(genome=genome, x_train=data_train, y_train=labels_train, learning_rate=0.1,
create_weights_bias_from_genome=False, activation_type='sigmoid',
show_connections=True)
optimise = False
if optimise:
epochs, cost = genome_nn.optimise(error_stop=0.09)
plt.plot(epochs, cost)
plt.xlabel('Epoch Number')
plt.ylabel('Error')
plt.title('Error vs Epoch Number')
plt.show()
if __name__ == "__main__":
main()