upd final

L-ED · May 29, 2024 · 71c9b1d · 71c9b1d
1 parent 181d0e5
commit 71c9b1d
Show file tree

Hide file tree

Showing 7 changed files with 855 additions and 102 deletions.
diff --git a/devel.ipynb b/devel.ipynb
diff --git a/gym_pybullet_drones/envs/single_agent_rl/flight/FlightFullState.py b/gym_pybullet_drones/envs/single_agent_rl/flight/FlightFullState.py
@@ -105,15 +105,6 @@ def reset_buffers(self):
         self.last_action = np.zeros(4)
 
 
-    # def reset(self, seed=None, options=None):
-    #     # action = self.create_initial_action()
-    #     # obs = self.drone.step(action, self)
-    #     obs, inf = super().reset()
-    #     # print('IM HERE')
-    #     return self.preprocess_observation(obs), inf
-
-
-
     def preprocess_observation(self, observation):
 
         pos = observation['FS_0'][0]

diff --git a/gym_pybullet_drones/envs/single_agent_rl/hover/HoverFullState.py b/gym_pybullet_drones/envs/single_agent_rl/hover/HoverFullState.py
@@ -181,6 +181,21 @@ def create_initial_action(self):
         return np.zeros(4)
 
 
+    def reset_manual(self, pos):
+        # pb.resetSimulation(physicsClientId=self.client)
+        # self.init_sim()
+        self.step_idx = 0
+        state = self.create_initial_state()
+        # print(state.world.pos)
+        state.world.pos = pos.copy()
+        self.drone.reset_state(state)
+        action = self.create_initial_action()
+        obs = self.drone.step(action, self)
+        # print("DRONE", obs)
+        return self.preprocess_observation(obs), {}
+
+
+
     def reward(self):
 
         # safe_radius= 0.3

diff --git a/gym_pybullet_drones/envs/single_agent_rl/hover/HoverFullStateCurriculum.py b/gym_pybullet_drones/envs/single_agent_rl/hover/HoverFullStateCurriculum.py
@@ -38,12 +38,12 @@ def __init__(
         self.max_g = 2*9.8
         self.max_ang_vel = 2 
         # self.max_radius = 1
-        self.max_radius = 4
+        self.max_radius = 2
 
         self.alpha = 0.2
 
         # self.target_pos = np.array([0, 0, 1])
-        self.target_pos = np.array([0, 0, 5])
+        self.target_pos = np.array([0, 0, 3])
         self.last_action = np.zeros(4)
         self.randomize = True
         self.validation = False
@@ -89,7 +89,6 @@ def normalize_observation_space(self):
             dtype=np.float32
         )
 
-
 
     def preprocess_action(self, action):
         self.last_action = action.copy()
@@ -100,15 +99,6 @@ def reset_buffers(self):
         self.last_action = np.zeros(4)
 
 
-    # def reset(self, seed=None, options=None):
-    #     # action = self.create_initial_action()
-    #     # obs = self.drone.step(action, self)
-    #     obs, inf = super().reset()
-    #     # print('IM HERE')
-    #     return self.preprocess_observation(obs), inf
-
-
-
     def preprocess_observation(self, observation):
 
         max_disp = self.max_radius
@@ -140,25 +130,6 @@ def preprocess_observation(self, observation):
         ]
 
 
-        # stats = [
-        #     pos,
-        #     ang,
-        #     world_ang_vel,
-        #     world_lin_vel, 
-        #     # imu[:3],
-        #     # imu[3:],
-        #     # a_acc, 
-        #     # acc,
-        #     targ_disp
-        # ]
-
-        # for i in range(len(stats)):
-        #     value = stats[i]
-        #     value_norm = np.linalg.norm(value)
-        #     if value_norm != 0:
-        #         value = value/value_norm 
-        #     stats[i] = value
-
         return np.concatenate(stats).reshape((1, self.elem_num))
         # return np.concatenate(stats).reshape((1, 12))
 
@@ -205,7 +176,7 @@ def create_initial_state_(self):
     def create_initial_state(self):
         state = super().create_initial_state()
         if self.randomize:
-            delta = (np.random.rand(3)* - 1)*self.max_radius*self.alpha
+            delta = (np.random.rand(3)*2 - 1)*self.max_radius*self.alpha
             new_pos = self.target_pos + delta
         else:
             new_pos = np.zeros(3)
@@ -246,11 +217,9 @@ def reward(self):
             # closenes_reward +=1
             dir_reward=1
 
-        # print(state.world.ang_vel, state.local.ang_vel)
-        angles_reward = np.exp(-np.linalg.norm(state.world.ang_vel)*0.3) 
+        angles_reward = np.exp(-np.linalg.norm(state.world.ang_vel)*0.1) 
         # print(closenes_reward)
         reward = closenes_reward*angles_reward
-        # reward = closenes_reward + angles_reward + dir_reward*0.1
 
         return reward
 

diff --git a/gym_pybullet_drones/examples/hover/hover_test_auto.py b/gym_pybullet_drones/examples/hover/hover_test_auto.py
@@ -0,0 +1,94 @@
+from stable_baselines3 import PPO, SAC, TD3
+from gym_pybullet_drones.envs.single_agent_rl import HoverIMU, HoverGPS, HoverFullState
+import time
+# import torch
+import os
+import pybullet as pb
+from scipy.spatial.transform import Rotation as R
+import numpy as np
+
+
+def fly_to_point(delta, env, agent, max_timestemp=500):
+
+    target_point = np.array([0., 0., 10])
+    state, _=env.reset_manual(target_point+delta)
+    # env.drone.reset_state(state)
+
+    term = False
+    success = False
+
+    while not term:
+
+        env.target_pos = target_point
+
+        action, _ = agent.predict(
+            state.reshape(1,-1),
+            deterministic=True
+        )
+
+        state, reward, terminated, truncated, info = env.step(action)
+
+        pos = env.drone.state.world.pos.copy()
+        if pos[2]<0.2 or env.step_idx>max_timestemp:
+            term = True
+
+        if np.sum((pos-target_point)**2)< 0.2:
+            success = True
+            term = True
+
+        time.sleep(env.timestep)
+
+    return success
+
+
+def main(test=True):
+
+    savedir = '/home/led/robotics/engines/Bullet_sym/gym-pybullet-drones/gym_pybullet_drones/results/hover/multienv/' 
+    savepath= os.path.join(
+        savedir,
+        # 'PPO_35'
+        # 'PPO_39' #best
+        # 'PPO_43' #bestb
+        'PPO_43'
+        # 'curriculum/PPO_2'
+    )
+    trainer = PPO
+    # # trainer = SAC
+
+    env_class = HoverFullState
+
+    env = env_class(visualize=True)
+    env.randomize = False
+    env.validation = True
+
+    agent = trainer.load(
+        os.path.join(savepath, 'best_model'), 
+        env=env)
+
+    # iterate through 100 points on 10 spheres with radius step 0.2
+    radius = np.linspace(0.5, 3, 6)
+
+    points = [
+        np.array([0., 0., 1.]),
+        np.array([0., 0., -1.])
+    ]
+    for pitch in np.linspace(180/4, 180 - 180/4, 3):
+        for yaw in np.linspace(0.0, 360 - 360/8, 8):
+            rot = R.from_euler("xyz", np.array([0, pitch, yaw]), degrees=True)
+            points.append(rot.apply([0, 0, 1]))
+
+    points = np.array(points)
+
+    # radius = np.linspace(0.5, 3, 6)
+    radius = np.linspace(1, 10, 19)
+
+    for r in radius:
+        sucess_num = 0
+        for point in points*r:
+            sucess_num += fly_to_point(point, env=env, agent=agent)
+        print("Radius ", r, "success rate", sucess_num/len(points))
+
+
+
+if __name__=='__main__':
+    main()
diff --git a/gym_pybullet_drones/examples/hover/hover_test_auto2.py b/gym_pybullet_drones/examples/hover/hover_test_auto2.py
@@ -0,0 +1,94 @@
+from stable_baselines3 import PPO, SAC, TD3
+from gym_pybullet_drones.envs.single_agent_rl import HoverIMU, HoverGPS, HoverFullState
+import time
+# import torch
+import os
+import pybullet as pb
+from scipy.spatial.transform import Rotation as R
+import numpy as np
+
+
+def fly_to_point(delta, env, agent, max_timestemp=500):
+
+    target_point = np.array([0., 0., 10])
+    state, _=env.reset_manual(target_point+delta)
+    # env.drone.reset_state(state)
+
+    term = False
+    success = False
+
+    while not term:
+
+        env.target_pos = target_point
+
+        action, _ = agent.predict(
+            state.reshape(1,-1),
+            deterministic=True
+        )
+
+        state, reward, terminated, truncated, info = env.step(action)
+
+        pos = env.drone.state.world.pos.copy()
+        if pos[2]<0.2 or env.step_idx>max_timestemp:
+            term = True
+
+        if np.sum((pos-target_point)**2)< 0.2:
+            success = True
+            term = True
+
+        time.sleep(env.timestep)
+
+    return success
+
+
+def main(test=True):
+
+    savedir = '/home/led/robotics/engines/Bullet_sym/gym-pybullet-drones/gym_pybullet_drones/results/hover/multienv/' 
+    savepath= os.path.join(
+        savedir,
+        # 'PPO_35'
+        # 'PPO_39' #best
+        # 'PPO_43' #bestb
+        'PPO_43'
+        # 'curriculum/PPO_2'
+    )
+    trainer = PPO
+    # # trainer = SAC
+
+    env_class = HoverFullState
+
+    env = env_class(visualize=True)
+    env.randomize = False
+    env.validation = True
+
+    agent = trainer.load(
+        os.path.join(savepath, 'best_model'), 
+        env=env)
+
+    # iterate through 100 points on 10 spheres with radius step 0.2
+    radius = np.linspace(0.5, 3, 6)
+
+    points = [
+        np.array([0., 0., 1.]),
+        np.array([0., 0., -1.])
+    ]
+    for pitch in np.linspace(180/4, 180 - 180/4, 3):
+        for yaw in np.linspace(0.0, 360 - 360/8, 8):
+            rot = R.from_euler("xyz", np.array([0, pitch, yaw]), degrees=True)
+            points.append(rot.apply([0, 0, 1]))
+
+    points = np.array(points)
+
+    # radius = np.linspace(0.5, 3, 6)
+    radius = np.linspace(1, 10, 19)
+
+    for r in radius:
+        sucess_num = 0
+        for point in points*r:
+            sucess_num += fly_to_point(point, env=env, agent=agent)
+        print("Radius ", r, "success rate", sucess_num/len(points))
+
+
+
+if __name__=='__main__':
+    main()
diff --git a/gym_pybullet_drones/examples/hover/hover_test_gui.py b/gym_pybullet_drones/examples/hover/hover_test_gui.py
@@ -13,7 +13,8 @@ def main(test=True):
         savedir,
         # 'PPO_35'
         # 'PPO_39' #best
-        'PPO_43'
+        'PPO_43' #bestb
+        # 'PPO_44'
         # 'curriculum/PPO_2'
     )
     trainer = PPO
@@ -27,8 +28,8 @@ def main(test=True):
     env.randomize = False
     env.validation = True
 
-    x = pb.addUserDebugParameter('x', -1, 10, 0.)
-    y = pb.addUserDebugParameter('y', -1, 10, 0.)
+    x = pb.addUserDebugParameter('x', -10, 10, 0.)
+    y = pb.addUserDebugParameter('y', -10, 10, 0.)
     z = pb.addUserDebugParameter('z', 0.2, 20, 1.)
     reset = pb.addUserDebugParameter('reset', 1, 0, 1)