-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsandbox.py
114 lines (83 loc) · 4.44 KB
/
sandbox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
####################
# IMPORTS
####################
from tensorflow.contrib.layers import *
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import datetime
import time
import sys
from osim.env import *
from osim.http.client import Client
import argparse
import math
####################
# SOME FUNCTIONS
####################
def transform_obs(obs_value):
obs_value_transformed = np.asarray(obs_value)
obs_value_transformed[242:] = obs_value_transformed[242:]/5
obs_value_transformed = obs_value_transformed/10
return obs_value_transformed
def visualize_layer_responses(what_to_plot, title):
plt.hist(what_to_plot.flatten(), 50, density=True, facecolor='g', alpha=0.75)
plt.title(title)
plt.show()
####################
# INITIAL SETUP
####################
# Command line parameters
parser = argparse.ArgumentParser(description='Train or test neural net motor controller')
parser.add_argument('--log_path', action='store', default=".")
args = parser.parse_args()
# Load walking environment
env = L2M2019Env(visualize=False)
env.reset()
nb_actions = env.action_space.shape[0]
exponentially_decay_action = True
exp_action_decay_const = 0.9
exp_action_addition = 0.1
####################
# NETWORK CREATION
####################
x = tf.placeholder(tf.float32, shape=(None,) + env.observation_space.shape, name='x')
flattened_x = tf.contrib.layers.flatten(x)
weights_init = tf.contrib.layers.variance_scaling_initializer()
bias_init = tf.constant_initializer(0.0)
# ACTOR(s)
a_h1 = fully_connected(inputs=flattened_x, num_outputs=444, activation_fn=tf.nn.relu, weights_initializer=weights_init, weights_regularizer=None, biases_initializer=bias_init, scope='a_h1')
a_h2 = fully_connected(inputs=a_h1, num_outputs=444, activation_fn=tf.nn.relu, weights_initializer=weights_init, weights_regularizer=None, biases_initializer=bias_init, scope='a_h2')
a_h3 = fully_connected(inputs=a_h2, num_outputs=444, activation_fn=tf.nn.relu, weights_initializer=weights_init, weights_regularizer=None, biases_initializer=bias_init, scope='a_h3')
actor_heads = []
actor_heads_logits = []
if exponentially_decay_action:
num_options = 2
else:
num_options = 2
action_choices = np.arange(num_options)/(num_options - 1)
for i in range(nb_actions):
actor_heads_logits.append(fully_connected(inputs=a_h3, num_outputs=num_options, activation_fn=None, weights_initializer=weights_init, weights_regularizer=None, biases_initializer=bias_init, scope='a' + str(i)))
actor_heads.append(tf.nn.softmax(actor_heads_logits[-1]))
# CRITIC
v_h1 = fully_connected(inputs=flattened_x, num_outputs=444, activation_fn=tf.nn.relu, weights_initializer=weights_init, weights_regularizer=None, biases_initializer=bias_init, scope='v_h1')
v_h2 = fully_connected(inputs=v_h1, num_outputs=444, activation_fn=tf.nn.relu, weights_initializer=weights_init, weights_regularizer=None, biases_initializer=bias_init, scope='v_h2')
v_h3 = fully_connected(inputs=v_h2, num_outputs=444, activation_fn=tf.nn.relu, weights_initializer=weights_init, weights_regularizer=None, biases_initializer=bias_init, scope='v_h3')
critic = fully_connected(inputs=v_h3, num_outputs=1, activation_fn=None, weights_initializer=weights_init, weights_regularizer=None, biases_initializer=bias_init, scope='vf')
saver = tf.train.Saver()
####################
# NETWORK TESTING
####################
sess = tf.Session()
sess.run(tf.global_variables_initializer())
obs_batch_shape = (-1,) + env.observation_space.shape
loaded_obs = np.load(args.log_path + "/saved_trajectories.npy")
visualize_layer_responses(sess.run(a_h1, feed_dict={x: np.reshape(loaded_obs, obs_batch_shape)}), "a_h1")
visualize_layer_responses(sess.run(a_h2, feed_dict={x: np.reshape(loaded_obs, obs_batch_shape)}), "a_h2")
visualize_layer_responses(sess.run(a_h3, feed_dict={x: np.reshape(loaded_obs, obs_batch_shape)}), "a_h3")
visualize_layer_responses(np.asarray(sess.run(actor_heads, feed_dict={x: np.reshape(loaded_obs, obs_batch_shape)})), "probas")
saver.restore(sess, args.log_path + "/model.ckpt")
visualize_layer_responses(sess.run(a_h1, feed_dict={x: np.reshape(loaded_obs, obs_batch_shape)}), "a_h1_trained")
visualize_layer_responses(sess.run(a_h2, feed_dict={x: np.reshape(loaded_obs, obs_batch_shape)}), "a_h2_trained")
visualize_layer_responses(sess.run(a_h3, feed_dict={x: np.reshape(loaded_obs, obs_batch_shape)}), "a_h3_trained")
visualize_layer_responses(np.asarray(sess.run(actor_heads, feed_dict={x: np.reshape(loaded_obs, obs_batch_shape)})), "probas_trained")