stable hover

L-ED · May 6, 2024 · 181d0e5 · 181d0e5
1 parent 585f005
commit 181d0e5
Show file tree

Hide file tree

Showing 16 changed files with 1,373 additions and 71 deletions.
diff --git a/devel.ipynb b/devel.ipynb
@@ -65,23 +65,164 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "3"
+       "array([1., 2., 3.])"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "import numpy as np\n",
-    "max(np.array([1, 2, 3]))"
+    "a = np.array([1., 2., 3.])\n",
+    "a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.spatial.transform import Rotation as R\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rot = R.from_euler(\"ZYX\", np.array([0, 180, 0]), degrees=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 1.2246468e-16,  0.0000000e+00, -1.0000000e+00])"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rot.apply([0, 0, 1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 1.11022302e-16, -1.00000000e+00, -1.11022302e-16],\n",
+       "       [ 8.66025404e-01,  1.11022302e-16, -5.00000000e-01],\n",
+       "       [ 5.00000000e-01,  0.00000000e+00,  8.66025404e-01]])"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rot.as_matrix()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "45.091584"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "4*1.9571e-08*(24000**2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_command(alpha, max_vel):\n",
+    "    new_pos = np.array([0, 0, 20])\n",
+    "    command = (np.random.rand(4)*2 - 1)*alpha\n",
+    "    command[0]=0\n",
+    "    rot = R.from_euler(\"ZYX\", command[:3]*180, degrees=True)\n",
+    "    command[:3] = rot.apply([0, 0, 1])\n",
+    "\n",
+    "    # command[3] *= self.drone.max_speed/(2-self.alpha)\n",
+    "    command[3] = max_vel*(0.5 + 0.5*command[3])\n",
+    "    return command"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3.830339193428011e-172"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.exp(-400)*200"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0.66666667, 2.        , 3.33333333])"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a *=2/3\n",
+    "a"
    ]
   },
   {

diff --git a/gym_pybullet_drones/envs/single_agent_rl/__init__.py b/gym_pybullet_drones/envs/single_agent_rl/__init__.py
@@ -4,5 +4,5 @@
 from gym_pybullet_drones.envs.single_agent_rl.AimAviary import AimAviary
 
 from gym_pybullet_drones.envs.single_agent_rl.BaseRL import BaseRL
-from gym_pybullet_drones.envs.single_agent_rl.hover import HoverIMU, HoverGPS, HoverFullState
-from gym_pybullet_drones.envs.single_agent_rl.flight import FlightFullState#, FlightGPS, FlightIMU
+from gym_pybullet_drones.envs.single_agent_rl.hover import HoverIMU, HoverGPS, HoverFullState, HoverFullStateCurriculum
+from gym_pybullet_drones.envs.single_agent_rl.flight import FlightFullState, FlightFullStateBuffer#, FlightGPS, FlightIMU
diff --git a/gym_pybullet_drones/envs/single_agent_rl/flight/FlightFullState.py b/gym_pybullet_drones/envs/single_agent_rl/flight/FlightFullState.py
@@ -1,5 +1,6 @@
 from gym_pybullet_drones.envs.single_agent_rl import BaseRL
 from gymnasium import spaces
+from scipy.spatial.transform import Rotation as R
 import numpy as np
 import pybullet as pb
 
@@ -31,19 +32,25 @@ def __init__(
         self.rank = rank
         self.set_seed(seed)
 
-        self.elem_num = 17
+        self.elem_num = 17 - 1
 
         self.max_step = max_step
 
         self.max_g = 2*9.8
         self.max_ang_vel = 2 #10 
+        self.max_vel=40
 
         self.alpha = 0.01
         # self.max_speed = deepcopy(drone.max_speed)/2
         self.command = np.array(
-            [1, 0, 0, 1]
+            # [1, 0, 0, 1]
+            [0, 0, 20]
         )
 
+        # self.prev_lin_vel
+        # self.prev_lin_vel
+        self.history = []
+
         self.last_action = np.zeros(4)
         self.randomize = True
 
@@ -89,7 +96,6 @@ def normalize_observation_space(self):
         )
 
 
-
     def preprocess_action(self, action):
         self.last_action = action.copy()
         return self.drone.max_rpm/(1+np.exp(-action))
@@ -148,37 +154,102 @@ def check_truncation(self):
         return trunc
 
 
-    def create_initial_state(self):
+    def create_initial_state_(self):
         state = super().create_initial_state()
         new_pos = np.array([0, 0, 20])
-        command = np.random.rand(4)
-        command[3] *= self.drone.max_speed/(2-self.alpha)
-        command[:2] *= self.alpha
-        command[:3] /= np.linalg.norm(command[:3])
+        command = (np.random.rand(4)*2 - 1)*self.alpha
+        command[0]=0
+        rot = R.from_euler("zxy", command[:3]*180, degrees=True)
+        command[:3] = rot.apply([0, 0, 1])
+
+        # command[3] *= self.max_vel/(2-self.alpha)
+        command[3] = self.max_vel*(0.5 + 0.5*command[3])
         self.command = command
-        # print(self.command)
+        print(self.command)
         state.world.pos = new_pos
         return state
 
 
+    def create_initial_state(self):
+        state = super().create_initial_state()
+        new_pos = np.array([0, 0, 20])
+        command = (np.random.rand(4)*2 - 1)*self.alpha
+        command[0]=0
+        rot = R.from_euler("zxy", command[:3]*180, degrees=True)
+        command[:3] = rot.apply([0, 0, 1])
+
+        # command[3] *= self.max_vel/(2-self.alpha)
+        command[3] = self.max_vel*(0.5 + 0.5*command[3])
+        self.command = command[:3]*command[3]
+        print(self.command)
+        state.world.pos = new_pos
+        return state
+
+
     def create_initial_action(self):
         return np.zeros(4)
 
 
-    def reward(self):
+    def reward_(self):
 
         state = deepcopy(self.drone.state)
 
+        comm = self.command.copy()
         vel = state.world.vel
         flight_mag = np.linalg.norm(vel)
         flight_dir = vel/flight_mag
 
         dir_reward = np.dot(flight_dir, self.command[:3])
-        mag_diff = np.exp(-(flight_mag - self.command[3])**2)
-
-        reward = dir_reward*mag_diff #+ dir_reward*0.3
+
+        # mag_abs_diff = np.abs(flight_mag - self.command[3])/self.command[3]
+        # mag_sq_diff = (flight_mag - self.command[3])**2
+        mag_sq_diff = ((flight_mag - self.command[3])/self.command[3])**2
+        mag_diff = np.exp(-mag_sq_diff)
+        self.save_stats(dir_reward.copy(), mag_sq_diff.copy())
+        angles_reward = np.exp(-np.linalg.norm(state.world.ang_vel)*0.1)
+
+        # reward = (dir_reward + mag_diff)*angles_reward 
+        # reward = (dir_reward + mag_sq_diff)*angles_reward 
+        reward = np.exp(-np.sum(((vel-comm[:3]*comm[3])/self.command[3])**2))
+        # reward = (1+dir_reward)*mag_diff
+        # print(mag_sq_diff, mag_diff, flight_mag, self.command[3])
+
+        return reward
+
+
+    def reward(self):
+
+        state = deepcopy(self.drone.state)
+
+        comm = self.command.copy()
+        vel = state.world.vel
+        flight_mag = np.linalg.norm(vel)
+        comm_mag = np.linalg.norm(comm)
+        flight_dir = vel/flight_mag
 
+        dir_reward = np.dot(flight_dir, self.command[:3]/comm_mag)
+
+        # mag_abs_diff = np.abs(flight_mag - self.command[3])/self.command[3]
+        # mag_sq_diff = (flight_mag - self.command[3])**2
+        mag_sq_diff = ((flight_mag - comm_mag)/comm_mag)**2
+        mag_diff = np.exp(-mag_sq_diff)
+        self.save_stats(dir_reward.copy(), mag_sq_diff.copy())
+        angles_reward = np.exp(-np.linalg.norm(state.world.ang_vel)*0.1)
+
+        # reward = (dir_reward + mag_diff)*angles_reward 
+        # reward = (dir_reward + mag_sq_diff)*angles_reward 
+        reward = np.exp(-np.sum(((vel-comm)/comm_mag)**2))
+        # reward = (1+dir_reward)*mag_diff
+        # print(mag_sq_diff, mag_diff, flight_mag, self.command[3])
+
         return reward
+
+
+    def save_stats(self, flight_err, mag_err):
+        if self.step_idx > 0.3*self.max_step:
+            self.history.append([flight_err, mag_err])
+        else:
+            self.history = []
 
 
     def set_alpha(self, alpha):