-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathddpg_keras_rl.py
120 lines (100 loc) · 4.61 KB
/
ddpg_keras_rl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Derived from keras-rl
import numpy as np
import sys
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, concatenate
from keras.optimizers import Adam
import numpy as np
from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess
from osim.env import *
from osim.http.client import Client
from keras.optimizers import RMSprop
import argparse
import math
# Command line parameters
parser = argparse.ArgumentParser(description='Train or test neural net motor controller')
parser.add_argument('--train', dest='train', action='store_true', default=True)
parser.add_argument('--test', dest='train', action='store_false', default=True)
parser.add_argument('--steps', dest='steps', action='store', default=100000, type=int)
parser.add_argument('--visualize', dest='visualize', action='store_true', default=False)
parser.add_argument('--model', dest='model', action='store', default="example.h5f")
parser.add_argument('--token', dest='token', action='store', required=False)
args = parser.parse_args()
model_path = args.model + "/example.h5f"
# Load walking environment
env = L2M2019Env(args.visualize)
env.reset()
nb_actions = env.action_space.shape[0]
# Total number of steps in training
nallsteps = args.steps
# Create networks for DDPG
# Next, we build a very simple model.
actor = Sequential()
actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
actor.add(Dense(444, kernel_initializer='he_normal'))
actor.add(Activation('relu'))
actor.add(Dense(444, kernel_initializer='he_normal'))
actor.add(Activation('relu'))
actor.add(Dense(444, kernel_initializer='he_normal'))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('sigmoid'))
print(actor.summary())
action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
flattened_observation = Flatten()(observation_input)
x = concatenate([action_input, flattened_observation])
x = Dense(444, kernel_initializer='he_normal')(x)
x = Activation('relu')(x)
x = Dense(444, kernel_initializer='he_normal')(x)
x = Activation('relu')(x)
x = Dense(444, kernel_initializer='he_normal')(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())
# Set up the agent for training
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.get_action_space_size())
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
random_process=random_process, gamma=.99, target_model_update=1e-3,
delta_clip=1.)
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
# memory=memory, nb_steps_warmup=1000, random_process=random_process,
# gamma=.99, target_model_update=0.1)
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
if args.train:
agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=env.time_limit, log_interval=10000)
# After training is done, we save the final weights.
agent.save_weights(model_path, overwrite=True)
# If TEST and TOKEN, submit to crowdAI
if not args.train and args.token:
agent.load_weights(model_path)
# Settings
remote_base = 'http://grader.crowdai.org:1729'
client = Client(remote_base)
# Create environment
observation = client.env_create(args.token)
# Run a single step
# The grader runs 3 simulations of at most 1000 steps each. We stop after the last one
while True:
v = np.array(observation).reshape((env.observation_space.shape[0]))
action = agent.forward(v)
[observation, reward, done, info] = client.env_step(action.tolist())
if done:
observation = client.env_reset()
if not observation:
break
client.submit()
# If TEST and no TOKEN, run some test experiments
if not args.train and not args.token:
agent.load_weights(model_path)
# Finally, evaluate our algorithm for 1 episode.
agent.test(env, nb_episodes=10, visualize=False, nb_max_episode_steps=500)