Skip to content

Commit

Permalink
stable hover
Browse files Browse the repository at this point in the history
  • Loading branch information
L-ED committed May 6, 2024
1 parent 585f005 commit 181d0e5
Show file tree
Hide file tree
Showing 16 changed files with 1,373 additions and 71 deletions.
149 changes: 145 additions & 4 deletions devel.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -65,23 +65,164 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3"
"array([1., 2., 3.])"
]
},
"execution_count": 3,
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"max(np.array([1, 2, 3]))"
"a = np.array([1., 2., 3.])\n",
"a"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"from scipy.spatial.transform import Rotation as R\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"rot = R.from_euler(\"ZYX\", np.array([0, 180, 0]), degrees=True)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1.2246468e-16, 0.0000000e+00, -1.0000000e+00])"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rot.apply([0, 0, 1])"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 1.11022302e-16, -1.00000000e+00, -1.11022302e-16],\n",
" [ 8.66025404e-01, 1.11022302e-16, -5.00000000e-01],\n",
" [ 5.00000000e-01, 0.00000000e+00, 8.66025404e-01]])"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rot.as_matrix()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"45.091584"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"4*1.9571e-08*(24000**2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_command(alpha, max_vel):\n",
" new_pos = np.array([0, 0, 20])\n",
" command = (np.random.rand(4)*2 - 1)*alpha\n",
" command[0]=0\n",
" rot = R.from_euler(\"ZYX\", command[:3]*180, degrees=True)\n",
" command[:3] = rot.apply([0, 0, 1])\n",
"\n",
" # command[3] *= self.drone.max_speed/(2-self.alpha)\n",
" command[3] = max_vel*(0.5 + 0.5*command[3])\n",
" return command"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3.830339193428011e-172"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.exp(-400)*200"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.66666667, 2. , 3.33333333])"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a *=2/3\n",
"a"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions gym_pybullet_drones/envs/single_agent_rl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
from gym_pybullet_drones.envs.single_agent_rl.AimAviary import AimAviary

from gym_pybullet_drones.envs.single_agent_rl.BaseRL import BaseRL
from gym_pybullet_drones.envs.single_agent_rl.hover import HoverIMU, HoverGPS, HoverFullState
from gym_pybullet_drones.envs.single_agent_rl.flight import FlightFullState#, FlightGPS, FlightIMU
from gym_pybullet_drones.envs.single_agent_rl.hover import HoverIMU, HoverGPS, HoverFullState, HoverFullStateCurriculum
from gym_pybullet_drones.envs.single_agent_rl.flight import FlightFullState, FlightFullStateBuffer#, FlightGPS, FlightIMU
97 changes: 84 additions & 13 deletions gym_pybullet_drones/envs/single_agent_rl/flight/FlightFullState.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from gym_pybullet_drones.envs.single_agent_rl import BaseRL
from gymnasium import spaces
from scipy.spatial.transform import Rotation as R
import numpy as np
import pybullet as pb

Expand Down Expand Up @@ -31,19 +32,25 @@ def __init__(
self.rank = rank
self.set_seed(seed)

self.elem_num = 17
self.elem_num = 17 - 1

self.max_step = max_step

self.max_g = 2*9.8
self.max_ang_vel = 2 #10
self.max_vel=40

self.alpha = 0.01
# self.max_speed = deepcopy(drone.max_speed)/2
self.command = np.array(
[1, 0, 0, 1]
# [1, 0, 0, 1]
[0, 0, 20]
)

# self.prev_lin_vel
# self.prev_lin_vel
self.history = []

self.last_action = np.zeros(4)
self.randomize = True

Expand Down Expand Up @@ -89,7 +96,6 @@ def normalize_observation_space(self):
)



def preprocess_action(self, action):
self.last_action = action.copy()
return self.drone.max_rpm/(1+np.exp(-action))
Expand Down Expand Up @@ -148,37 +154,102 @@ def check_truncation(self):
return trunc


def create_initial_state(self):
def create_initial_state_(self):
state = super().create_initial_state()
new_pos = np.array([0, 0, 20])
command = np.random.rand(4)
command[3] *= self.drone.max_speed/(2-self.alpha)
command[:2] *= self.alpha
command[:3] /= np.linalg.norm(command[:3])
command = (np.random.rand(4)*2 - 1)*self.alpha
command[0]=0
rot = R.from_euler("zxy", command[:3]*180, degrees=True)
command[:3] = rot.apply([0, 0, 1])

# command[3] *= self.max_vel/(2-self.alpha)
command[3] = self.max_vel*(0.5 + 0.5*command[3])
self.command = command
# print(self.command)
print(self.command)
state.world.pos = new_pos
return state


def create_initial_state(self):
state = super().create_initial_state()
new_pos = np.array([0, 0, 20])
command = (np.random.rand(4)*2 - 1)*self.alpha
command[0]=0
rot = R.from_euler("zxy", command[:3]*180, degrees=True)
command[:3] = rot.apply([0, 0, 1])

# command[3] *= self.max_vel/(2-self.alpha)
command[3] = self.max_vel*(0.5 + 0.5*command[3])
self.command = command[:3]*command[3]
print(self.command)
state.world.pos = new_pos
return state


def create_initial_action(self):
return np.zeros(4)


def reward(self):
def reward_(self):

state = deepcopy(self.drone.state)

comm = self.command.copy()
vel = state.world.vel
flight_mag = np.linalg.norm(vel)
flight_dir = vel/flight_mag

dir_reward = np.dot(flight_dir, self.command[:3])
mag_diff = np.exp(-(flight_mag - self.command[3])**2)

reward = dir_reward*mag_diff #+ dir_reward*0.3

# mag_abs_diff = np.abs(flight_mag - self.command[3])/self.command[3]
# mag_sq_diff = (flight_mag - self.command[3])**2
mag_sq_diff = ((flight_mag - self.command[3])/self.command[3])**2
mag_diff = np.exp(-mag_sq_diff)
self.save_stats(dir_reward.copy(), mag_sq_diff.copy())
angles_reward = np.exp(-np.linalg.norm(state.world.ang_vel)*0.1)

# reward = (dir_reward + mag_diff)*angles_reward
# reward = (dir_reward + mag_sq_diff)*angles_reward
reward = np.exp(-np.sum(((vel-comm[:3]*comm[3])/self.command[3])**2))
# reward = (1+dir_reward)*mag_diff
# print(mag_sq_diff, mag_diff, flight_mag, self.command[3])

return reward


def reward(self):

state = deepcopy(self.drone.state)

comm = self.command.copy()
vel = state.world.vel
flight_mag = np.linalg.norm(vel)
comm_mag = np.linalg.norm(comm)
flight_dir = vel/flight_mag

dir_reward = np.dot(flight_dir, self.command[:3]/comm_mag)

# mag_abs_diff = np.abs(flight_mag - self.command[3])/self.command[3]
# mag_sq_diff = (flight_mag - self.command[3])**2
mag_sq_diff = ((flight_mag - comm_mag)/comm_mag)**2
mag_diff = np.exp(-mag_sq_diff)
self.save_stats(dir_reward.copy(), mag_sq_diff.copy())
angles_reward = np.exp(-np.linalg.norm(state.world.ang_vel)*0.1)

# reward = (dir_reward + mag_diff)*angles_reward
# reward = (dir_reward + mag_sq_diff)*angles_reward
reward = np.exp(-np.sum(((vel-comm)/comm_mag)**2))
# reward = (1+dir_reward)*mag_diff
# print(mag_sq_diff, mag_diff, flight_mag, self.command[3])

return reward


def save_stats(self, flight_err, mag_err):
if self.step_idx > 0.3*self.max_step:
self.history.append([flight_err, mag_err])
else:
self.history = []


def set_alpha(self, alpha):
Expand Down
Loading

0 comments on commit 181d0e5

Please sign in to comment.