Skip to content

Commit

Permalink
upd final
Browse files Browse the repository at this point in the history
  • Loading branch information
L-ED committed May 29, 2024
1 parent 181d0e5 commit 71c9b1d
Show file tree
Hide file tree
Showing 7 changed files with 855 additions and 102 deletions.
699 changes: 644 additions & 55 deletions devel.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -105,15 +105,6 @@ def reset_buffers(self):
self.last_action = np.zeros(4)


# def reset(self, seed=None, options=None):
# # action = self.create_initial_action()
# # obs = self.drone.step(action, self)
# obs, inf = super().reset()
# # print('IM HERE')
# return self.preprocess_observation(obs), inf



def preprocess_observation(self, observation):

pos = observation['FS_0'][0]
Expand Down
15 changes: 15 additions & 0 deletions gym_pybullet_drones/envs/single_agent_rl/hover/HoverFullState.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,21 @@ def create_initial_action(self):
return np.zeros(4)


def reset_manual(self, pos):
# pb.resetSimulation(physicsClientId=self.client)
# self.init_sim()
self.step_idx = 0
state = self.create_initial_state()
# print(state.world.pos)
state.world.pos = pos.copy()
self.drone.reset_state(state)
action = self.create_initial_action()
obs = self.drone.step(action, self)
# print("DRONE", obs)
return self.preprocess_observation(obs), {}



def reward(self):

# safe_radius= 0.3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ def __init__(
self.max_g = 2*9.8
self.max_ang_vel = 2
# self.max_radius = 1
self.max_radius = 4
self.max_radius = 2

self.alpha = 0.2

# self.target_pos = np.array([0, 0, 1])
self.target_pos = np.array([0, 0, 5])
self.target_pos = np.array([0, 0, 3])
self.last_action = np.zeros(4)
self.randomize = True
self.validation = False
Expand Down Expand Up @@ -89,7 +89,6 @@ def normalize_observation_space(self):
dtype=np.float32
)



def preprocess_action(self, action):
self.last_action = action.copy()
Expand All @@ -100,15 +99,6 @@ def reset_buffers(self):
self.last_action = np.zeros(4)


# def reset(self, seed=None, options=None):
# # action = self.create_initial_action()
# # obs = self.drone.step(action, self)
# obs, inf = super().reset()
# # print('IM HERE')
# return self.preprocess_observation(obs), inf



def preprocess_observation(self, observation):

max_disp = self.max_radius
Expand Down Expand Up @@ -140,25 +130,6 @@ def preprocess_observation(self, observation):
]


# stats = [
# pos,
# ang,
# world_ang_vel,
# world_lin_vel,
# # imu[:3],
# # imu[3:],
# # a_acc,
# # acc,
# targ_disp
# ]

# for i in range(len(stats)):
# value = stats[i]
# value_norm = np.linalg.norm(value)
# if value_norm != 0:
# value = value/value_norm
# stats[i] = value

return np.concatenate(stats).reshape((1, self.elem_num))
# return np.concatenate(stats).reshape((1, 12))

Expand Down Expand Up @@ -205,7 +176,7 @@ def create_initial_state_(self):
def create_initial_state(self):
state = super().create_initial_state()
if self.randomize:
delta = (np.random.rand(3)* - 1)*self.max_radius*self.alpha
delta = (np.random.rand(3)*2 - 1)*self.max_radius*self.alpha
new_pos = self.target_pos + delta
else:
new_pos = np.zeros(3)
Expand Down Expand Up @@ -246,11 +217,9 @@ def reward(self):
# closenes_reward +=1
dir_reward=1

# print(state.world.ang_vel, state.local.ang_vel)
angles_reward = np.exp(-np.linalg.norm(state.world.ang_vel)*0.3)
angles_reward = np.exp(-np.linalg.norm(state.world.ang_vel)*0.1)
# print(closenes_reward)
reward = closenes_reward*angles_reward
# reward = closenes_reward + angles_reward + dir_reward*0.1

return reward

Expand Down
94 changes: 94 additions & 0 deletions gym_pybullet_drones/examples/hover/hover_test_auto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from stable_baselines3 import PPO, SAC, TD3
from gym_pybullet_drones.envs.single_agent_rl import HoverIMU, HoverGPS, HoverFullState
import time
# import torch
import os
import pybullet as pb
from scipy.spatial.transform import Rotation as R
import numpy as np


def fly_to_point(delta, env, agent, max_timestemp=500):

target_point = np.array([0., 0., 10])
state, _=env.reset_manual(target_point+delta)
# env.drone.reset_state(state)

term = False
success = False

while not term:

env.target_pos = target_point

action, _ = agent.predict(
state.reshape(1,-1),
deterministic=True
)

state, reward, terminated, truncated, info = env.step(action)

pos = env.drone.state.world.pos.copy()
if pos[2]<0.2 or env.step_idx>max_timestemp:
term = True

if np.sum((pos-target_point)**2)< 0.2:
success = True
term = True

time.sleep(env.timestep)

return success


def main(test=True):

savedir = '/home/led/robotics/engines/Bullet_sym/gym-pybullet-drones/gym_pybullet_drones/results/hover/multienv/'
savepath= os.path.join(
savedir,
# 'PPO_35'
# 'PPO_39' #best
# 'PPO_43' #bestb
'PPO_43'
# 'curriculum/PPO_2'
)
trainer = PPO
# # trainer = SAC

env_class = HoverFullState

env = env_class(visualize=True)
env.randomize = False
env.validation = True

agent = trainer.load(
os.path.join(savepath, 'best_model'),
env=env)

# iterate through 100 points on 10 spheres with radius step 0.2
radius = np.linspace(0.5, 3, 6)

points = [
np.array([0., 0., 1.]),
np.array([0., 0., -1.])
]
for pitch in np.linspace(180/4, 180 - 180/4, 3):
for yaw in np.linspace(0.0, 360 - 360/8, 8):
rot = R.from_euler("xyz", np.array([0, pitch, yaw]), degrees=True)
points.append(rot.apply([0, 0, 1]))

points = np.array(points)

# radius = np.linspace(0.5, 3, 6)
radius = np.linspace(1, 10, 19)

for r in radius:
sucess_num = 0
for point in points*r:
sucess_num += fly_to_point(point, env=env, agent=agent)
print("Radius ", r, "success rate", sucess_num/len(points))



if __name__=='__main__':
main()
94 changes: 94 additions & 0 deletions gym_pybullet_drones/examples/hover/hover_test_auto2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from stable_baselines3 import PPO, SAC, TD3
from gym_pybullet_drones.envs.single_agent_rl import HoverIMU, HoverGPS, HoverFullState
import time
# import torch
import os
import pybullet as pb
from scipy.spatial.transform import Rotation as R
import numpy as np


def fly_to_point(delta, env, agent, max_timestemp=500):

target_point = np.array([0., 0., 10])
state, _=env.reset_manual(target_point+delta)
# env.drone.reset_state(state)

term = False
success = False

while not term:

env.target_pos = target_point

action, _ = agent.predict(
state.reshape(1,-1),
deterministic=True
)

state, reward, terminated, truncated, info = env.step(action)

pos = env.drone.state.world.pos.copy()
if pos[2]<0.2 or env.step_idx>max_timestemp:
term = True

if np.sum((pos-target_point)**2)< 0.2:
success = True
term = True

time.sleep(env.timestep)

return success


def main(test=True):

savedir = '/home/led/robotics/engines/Bullet_sym/gym-pybullet-drones/gym_pybullet_drones/results/hover/multienv/'
savepath= os.path.join(
savedir,
# 'PPO_35'
# 'PPO_39' #best
# 'PPO_43' #bestb
'PPO_43'
# 'curriculum/PPO_2'
)
trainer = PPO
# # trainer = SAC

env_class = HoverFullState

env = env_class(visualize=True)
env.randomize = False
env.validation = True

agent = trainer.load(
os.path.join(savepath, 'best_model'),
env=env)

# iterate through 100 points on 10 spheres with radius step 0.2
radius = np.linspace(0.5, 3, 6)

points = [
np.array([0., 0., 1.]),
np.array([0., 0., -1.])
]
for pitch in np.linspace(180/4, 180 - 180/4, 3):
for yaw in np.linspace(0.0, 360 - 360/8, 8):
rot = R.from_euler("xyz", np.array([0, pitch, yaw]), degrees=True)
points.append(rot.apply([0, 0, 1]))

points = np.array(points)

# radius = np.linspace(0.5, 3, 6)
radius = np.linspace(1, 10, 19)

for r in radius:
sucess_num = 0
for point in points*r:
sucess_num += fly_to_point(point, env=env, agent=agent)
print("Radius ", r, "success rate", sucess_num/len(points))



if __name__=='__main__':
main()
7 changes: 4 additions & 3 deletions gym_pybullet_drones/examples/hover/hover_test_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ def main(test=True):
savedir,
# 'PPO_35'
# 'PPO_39' #best
'PPO_43'
'PPO_43' #bestb
# 'PPO_44'
# 'curriculum/PPO_2'
)
trainer = PPO
Expand All @@ -27,8 +28,8 @@ def main(test=True):
env.randomize = False
env.validation = True

x = pb.addUserDebugParameter('x', -1, 10, 0.)
y = pb.addUserDebugParameter('y', -1, 10, 0.)
x = pb.addUserDebugParameter('x', -10, 10, 0.)
y = pb.addUserDebugParameter('y', -10, 10, 0.)
z = pb.addUserDebugParameter('z', 0.2, 20, 1.)
reset = pb.addUserDebugParameter('reset', 1, 0, 1)

Expand Down

0 comments on commit 71c9b1d

Please sign in to comment.