-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathrun_exp.py
39 lines (27 loc) · 1.08 KB
/
run_exp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from env import Env
from ppo import Agent
def main(dic_agent_conf, dic_env_conf, dic_exp_conf, dic_path):
env = Env(dic_env_conf)
dic_agent_conf["ACTION_DIM"] = env.action_dim
dic_agent_conf["STATE_DIM"] = (env.state_dim, )
agent = Agent(dic_agent_conf, dic_path, dic_env_conf)
for cnt_episode in range(dic_exp_conf["TRAIN_ITERATIONS"]):
s = env.reset()
r_sum = 0
for cnt_step in range(dic_exp_conf["MAX_EPISODE_LENGTH"]):
if cnt_episode > dic_exp_conf["TRAIN_ITERATIONS"] - 10:
env.render()
a = agent.choose_action(s)
s_, r, done, _ = env.step(a)
r /= 100
r_sum += r
if done:
r = -1
agent.store_transition(s, a, s_, r, done)
if cnt_step % dic_agent_conf["BATCH_SIZE"] == 0 and cnt_step != 0:
agent.train_network()
s = s_
if done:
break
if cnt_step % 10 == 0:
print("Episode:{}, step:{}, r_sum:{}".format(cnt_episode, cnt_step, r_sum))