-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathtask.py
125 lines (101 loc) · 4.05 KB
/
task.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# +-----------------------------------------------+
# | RL-ROBOT. Reinforcement Learning for Robotics |
# | Angel Martinez-Tenor |
# | MAPIR. University of Malaga. 2016 |
# +-----------------------------------------------+
""" Task template v1.1 """
import numpy as np
import agent
import robot
# Task Parameters:
NAME = "Wander_1024s_9a"
DESCRIPTION = "Wander avoiding obstacles. 5 lasers. 4 ranges each. 9 actions"
ROBOT = "Pioneer 3dx with 8-point laser"
ENVIRONMENT = "VREP_SIM: square 6x6m with obstacles"
ENVIRONMENT_DETAIL = "SpeedX3. Threaded Render. Sens:Buffer, Act:Streaming"
AGENT_ELEMENTS = ["MOBILE_BASE", "DISTANCE_SENSOR"]
ENV_ELEMENTS = []
# AGENT_ELEMENTS: "MOBILE_BASE","DISTANCE_SENSOR","GOAL","ARM"
# ENV_ELEMENTS: "GOAL_OBJECT"
# Physical Parameters:
STEP_TIME = 1 # s
MOTOR_SPEED = 1 # rad/s (pioneer 3dx: 1 rad/s: ~ 0.1m/s)
RANGE_OBSTACLES = 0.80 # m
RANGE_DISPLACEMENT = 0.07 # m
RANGE_DAMAGE = 0.05 # m
# STATES. Input variables:
# Dictionary -> "variable name": (discrete intervals). Example:
# "X": (0, 5) 2 ranges: s(X)=0 if X<=5, =1 if X>5.
# "X": (0, 2, 4) 3 ranges: s(X)=0 if X<=2, =1 if X<4, =2 if X>4
# For multiple linear intervals we suggest np.linspace
INPUT_VARIABLES = {
"laser_front": np.linspace(0, RANGE_OBSTACLES, 4),
"laser_front_left": np.linspace(0, RANGE_OBSTACLES, 4),
"laser_left": np.linspace(0, RANGE_OBSTACLES, 4),
"laser_right": np.linspace(0, RANGE_OBSTACLES, 4),
"laser_front_right": np.linspace(0, RANGE_OBSTACLES, 4),
}
OUTPUT_VARIABLES = {
"left_wheel": np.linspace(-MOTOR_SPEED, MOTOR_SPEED, 3),
"right_wheel": np.linspace(-MOTOR_SPEED, MOTOR_SPEED, 3),
}
INITIAL_STATE = 0 # (usually overwritten by the fist observation)
INITIAL_POLICY = 0
def execute_action(actuator):
"""Send the commands to the actuators of the robot.
input: vector of actuator values: e.g. [2.0,-2.0] rad/s"""
assert len(actuator) == len(out_data), " Check output variables"
v_left, v_right = actuator[0], actuator[1]
# Two changes were made to improve the learning process:
# 1- backward movement replaced by forward movement at double speed
if v_left < 0 and v_right < 0:
v_left = v_right = MOTOR_SPEED * 2
# 2- One wheel stopped and the other moving backward replaced by no motion
elif (v_left == 0 and v_right < 0) or (v_left < 0 and v_right == 0):
v_left = v_right = 0
robot.move_wheels(v_left, v_right) # left wheel, right_wheel speeds (rad/s)
# TASK DEFINITION: REWARDS ----------------------------------------------------
REWARDS = np.array([-10.0, -2.0, -0.02, 10.0])
def get_reward(): # abstract s,a,sp pointless here
"""Return the reward from s,a,sp or the environment (recommended)"""
# Sensors values already updated in robot.py when the state was observed
distance_fl = robot.sensor["laser_front_left"]
distance_fr = robot.sensor["laser_front_right"]
distance_f = robot.sensor["laser_front"]
displacement = robot.mobilebase_displacement2d
n_collisions = (
int(distance_fl < RANGE_DAMAGE)
+ int(distance_fr < RANGE_DAMAGE)
+ int(distance_f < RANGE_DAMAGE)
)
r = REWARDS[2]
if n_collisions > 1: # big penalty
r = min(REWARDS)
elif n_collisions == 1: # small penalty
r = REWARDS[1]
elif displacement > RANGE_DISPLACEMENT:
r = max(REWARDS)
return r
# ------------------------------------------------------------------------------
def get_input_data(): # -- no modification needed --
"""Ask for sensory data to the robot and returns a vector with the values.
Relate Input Variables with robot sensors"""
global in_data
for idx, item in enumerate(in_names):
in_data[idx] = robot.sensor[item]
return in_data
def setup():
"""task module setup is performed in agent"""
agent.setup_task()
n_inputs = int
in_values = [None]
in_names = [None]
in_sizes = [int]
n_states = int
in_data = [None]
n_outputs = int
out_values = [None]
out_names = [None]
out_sizes = [int]
n_actions = int
out_data = [None]