-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathFRIQ_example_cartpole_setup.m
161 lines (145 loc) · 8.64 KB
/
FRIQ_example_cartpole_setup.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
function FRIQ_example_cartpole_setup()
% CartPole Problem example
% with FRI-based Reinforcement Learning
%
% FRIQ-learning framework v0.70
% https://github.com/szaguldo-kamaz/
%
% Author: David Vincze <david.vincze@uni-miskolc.hu>
% Copyright (c) 2013-2022 by David Vincze
%
%
%% USER DEFINED problem parameters for FRIQ
global FRIQ_param_appname FRIQ_param_apptitle
global FRIQ_param_FIVE_UD FRIQ_param_states FRIQ_param_states_default FRIQ_param_statedivs FRIQ_param_states_steepness
global FRIQ_param_actions FRIQ_param_actiondiv
global FRIQ_param_qdiff_pos_boundary FRIQ_param_qdiff_neg_boundary FRIQ_param_qdiff_final_tolerance FRIQ_param_reward_good_above FRIQ_param_reduction_reward_tolerance FRIQ_param_reduction_rule_distance FRIQ_param_reduction_allow_better_reward_above_tolerance
global FRIQ_param_norandom FRIQ_param_drawsim FRIQ_param_maxsteps FRIQ_param_alpha FRIQ_param_gamma FRIQ_param_epsilon FRIQ_param_maxepisodes
global FRIQ_param_construct_rb FRIQ_param_measure_constructed_rb_usage FRIQ_param_reduce_rb FRIQ_param_measure_reduced_rb_usage
global FRIQ_param_reduction_strategy FRIQ_param_reduction_strategy_secondary FRIQ_param_remove_unnecessary_membership_functions
global FRIQ_param_reduction_kmeans_rng FRIQ_param_reduction_kmeans_distancemetric
global FRIQ_param_doactionfunc FRIQ_param_rewardfunc FRIQ_param_drawfunc FRIQ_param_quantize_observationsfunc
global FRIQ_param_antecedent_terms FRIQ_param_antecedent_names
% constants
global FRIQ_const_reduction_strategy__MIN_Q
global FRIQ_const_reduction_strategy__MAX_Q
global FRIQ_const_reduction_strategy__HALF_GROUP_REMOVAL
global FRIQ_const_reduction_strategy__BUILD_MINANDMAXQ
global FRIQ_const_reduction_strategy__ANTECEDENT_REDUNDANCY
global FRIQ_const_reduction_strategy__ELIMINATE_DUPLICATED__FIRST
global FRIQ_const_reduction_strategy__ELIMINATE_DUPLICATED__MINQ
global FRIQ_const_reduction_strategy__ELIMINATE_DUPLICATED__MAXQ
global FRIQ_const_reduction_strategy__ELIMINATE_DUPLICATED__MERGE_MEAN
global FRIQ_const_reduction_strategy__ELIMINATE_SIMILAR__FIRST
global FRIQ_const_reduction_strategy__ELIMINATE_SIMILAR__MINQ
global FRIQ_const_reduction_strategy__ELIMINATE_SIMILAR__MAXQ
global FRIQ_const_reduction_strategy__ELIMINATE_SIMILAR__MERGE_MEAN
global FRIQ_const_reduction_strategy__CLUSTER__HIERARCHICAL
global FRIQ_const_reduction_strategy__CLUSTER__KMEANS_REMOVE_ONE
global FRIQ_const_reduction_strategy__CLUSTER__KMEANS_REMOVE_MANY
global FRIQ_const_reduction_strategy__CLUSTER__KMEANS_REPLACE_ONE
global FRIQ_const_reduction_strategy__CLUSTER__KMEANS_REPLACE_MANY
global FRIQ_const_reduction_strategy__CLUSTER__KMEANS_BUILD_CENTROID
global FRIQ_const_reduction_strategy__CLUSTER__KMEANS_BUILD_MINANDMAXQ
global FRIQ_const_reduction_strategy__CLUSTER__KMEANS_BUILD_MAXABSQ
global FRIQ_const_reduction_strategy__CLUSTER__KMEANS_BUILD_MINABSQ
global FRIQ_const_reduction_strategy__ALL
global FRIQ_const_reduction_strategy__ALL_CLUSTER_KMEANS
FRIQ_param_appname = 'example_cartpole';
FRIQ_param_apptitle = 'FRIQ-learning example: CartPole';
% state descriptor step size
%x1div = (2-(-2)) / 3.0;
x1div = 2;
%x2div = (0.1-(-0.1)) / 2.0;
x2div = 1;
%x3div = (deg2rad(12)-(deg2rad(-12)))/8.0;
x3div = 0.0524;
%x4div = (deg2rad(10)-(deg2rad(-10)))/2.0;
x4div = 2;
% state descriptors
%x1 = -2:x1div:2;
x1 = [-1 1];
%x2 = -0.5:x2div:0.5;
x2 = [-1 0 1];
%x3 = deg2rad(-12):x3div:deg2rad(12);
x3 = [-0.2094 -0.1571 -0.1047 -0.0524 0 0.0524 0.1047 0.1571 0.2094];
%x4 = deg2rad(-10):x4div:deg2rad(10);
x4 = [-1 1];
% define actions
FRIQ_param_actions = -1.0:0.1:1.0;
%FRIQ_param_actions = [ -1.0 -0.9 0.8 0.7 0.6 0.5 0.4 0.3 0.2 0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0];
FRIQ_param_actiondiv = 0.1;
% % Universe sizes for FIVE FRI (states + action)
% Us1 = [-2:0.1:2];
% Us2 = [-2:0.1:2];
% %Us3 = [deg2rad(-20):deg2rad(20)/20:deg2rad(20)];
% Us3 = [deg2rad(-15):deg2rad(15)/20:deg2rad(15)];
% Us4 = [-2:0.1:2];
% Ua = [-1:.05:1];
Us1 = -8:.016:8; % Universe (input) - state x1 - x
Us2 = -16:.032:16; % Universe (input) - state x2 - x dot
%Us3 = deg2rad(-90):deg2rad(90)/500:deg2rad(90); % Universe (input) - state x3 - theta
Us3 = -1.5707963267948966:0.0031415926535897933:1.5707963267948966; % Universe (input) - state x3 - theta
Us4 = -8:.016:8; % Universe (input) - state x4 - theta dot
Ua = -4:.008:4; % Universe (input) - action number
% dlmwrite('us1.txt',Us1,'precision','%.20f','delimiter','\n');
% dlmwrite('us2.txt',Us2,'precision','%.18f','delimiter','\n');
% dlmwrite('us3.txt',Us3,'precision','%.18f','delimiter','\n');
% dlmwrite('us4.txt',Us4,'precision','%.18f','delimiter','\n');
% dlmwrite('ua.txt',Ua,'precision','%.18f','delimiter','\n');
FRIQ_param_FIVE_UD = [Us1; Us2; Us3; Us4; Ua];
FRIQ_param_states = {x1 x2 x3 x4};
FRIQ_param_statedivs = {x1div x2div x3div x4div};
% steepness of the triangles defining the membership functions for the states
%FRIQ_param_states_steepness=[1, 1, 1/(deg2rad(12)/4.5), 1];
FRIQ_param_states_steepness = [1, 1, 21.485917317405871, 1];
FRIQ_param_states_default = [1 0 0 0];
%FRIQ_param_states_default=[0 0 0 0.01];
FRIQ_param_antecedent_terms = {
{'LEFT','RIGHT'}
{'LEFT','STOPPED','RIGHT'}
{'MAX LEFT','LEFT','LITTLE LEFT','BIT LEFT','STANDING','BIT RIGHT','LITTLE RIGHT','RIGHT','MAX RIGHT'}
{'LEFT','RIGHT'}
{'LEFT10','LEFT9','LEFT8','LEFT7','LEFT6','LEFT5','LEFT4','LEFT3','LEFT2','LEFT1','STOP','RIGHT1','RIGHT2','RIGHT3','RIGHT4','RIGHT5','RIGHT6','RIGHT7','RIGHT8','RIGHT9','RIGHT10'}
};
FRIQ_param_antecedent_names = {
'CART POSITION'
'CART ACCELERATION'
'POLE POSITION'
'POLE FALLING'
'FORCE TO APPLY'
};
% configuration
FRIQ_param_norandom = 1;
FRIQ_param_construct_rb = 1;
FRIQ_param_measure_constructed_rb_usage = 1;
FRIQ_param_reduce_rb = 0;
FRIQ_param_measure_reduced_rb_usage = 0;
FRIQ_param_reduction_strategy = FRIQ_const_reduction_strategy__ALL;
FRIQ_param_reduction_strategy_secondary = FRIQ_const_reduction_strategy__ALL;
FRIQ_param_reduction_kmeans_rng = 3;
FRIQ_param_reduction_kmeans_distancemetric = 'sqeuclidean'; % 'sqeuclidean' | 'cityblock' | 'cosine' | 'correlation'
% FRIQ_param_reduction_strategy_secondary = [];
% FRIQ_param_reduction_strategy = FRIQ_const_reduction_strategy__ANTECEDENT_REDUNDANCY;
% FRIQ_param_reduction_strategy_secondary = FRIQ_const_reduction_strategy__ELIMINATE_DUPLICATED__MERGE_MEAN;
FRIQ_param_remove_unnecessary_membership_functions = 0;
% FRIQ_param_drawsim = false; % indicates whether to display the graphical interface or not
FRIQ_param_drawsim = true; % indicates whether to display the graphical interface or not
FRIQ_param_maxsteps = 1000; % maximum number of steps per episode
FRIQ_param_qdiff_pos_boundary = +1.0;
FRIQ_param_qdiff_neg_boundary = -200.0;
FRIQ_param_qdiff_final_tolerance = 250.0;
FRIQ_param_reward_good_above = 0.0;
% FRIQ_param_reduction_reward_tolerance = 0.0; % set to 0.0 for zero tolerance
FRIQ_param_reduction_reward_tolerance = inf; % set to 0.0 for zero tolerance
FRIQ_param_reduction_rule_distance = 0.1; % for defining 'similar' rules
FRIQ_param_reduction_allow_better_reward_above_tolerance = true; % if reward is better, but larger than reduction_reward_tolerance, allow or not?
% learning parameters
FRIQ_param_alpha = 0.3; % learning rate
FRIQ_param_gamma = 1.0; % discount factor
FRIQ_param_epsilon = 0.001; % probability of a random action selection (overriden by FRIQ_norandom if necessary)
% external functions
FRIQ_param_drawfunc = @FRIQ_example_cartpole_draw; % function to call for the visualization of a resulting step
FRIQ_param_rewardfunc = @FRIQ_example_cartpole_getreward; % function to call for reward calculation
FRIQ_param_doactionfunc = @FRIQ_example_cartpole_doaction; % function to call for calculating the next state bases on the selected action
FRIQ_param_quantize_observationsfunc = @FRIQ_example_cartpole_quantize_observations; % function to call for quantizing observations (can be "built-in")