-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathFRIQ_mainloop.m
263 lines (221 loc) · 13.2 KB
/
FRIQ_mainloop.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
function FRIQ_mainloop()
%
% FRIQ-learning framework v0.70
% https://github.com/szaguldo-kamaz/
%
% Author:
% David Vincze <david.vincze@uni-miskolc.hu>
% Author of the FIVE FRI method:
% Szilveszter Kovacs <szkovacs@iit.uni-miskolc.hu>
% Additional reduction methods by: Tamas Tompa, Alex Toth
%
% Copyright (c) 2013-2022 by David Vincze
%
%% for accessing user config values defined in the setup file
global FRIQ_param_appname FRIQ_param_apptitle
global FRIQ_param_FIVE_UD FRIQ_param_states FRIQ_param_statedivs FRIQ_param_states_steepness FRIQ_param_states_default
global FRIQ_param_actions FRIQ_param_actiondiv
global FRIQ_param_qdiff_pos_boundary FRIQ_param_qdiff_neg_boundary FRIQ_param_qdiff_final_tolerance FRIQ_param_reward_good_above
global FRIQ_param_norandom FRIQ_param_drawsim FRIQ_param_maxsteps FRIQ_param_alpha FRIQ_param_gamma FRIQ_param_epsilon FRIQ_param_maxepisodes
global FRIQ_param_doactionfunc FRIQ_param_rewardfunc FRIQ_param_drawfunc
global FRIQ_param_construct_rb FRIQ_param_measure_constructed_rb_usage FRIQ_param_reduce_rb FRIQ_param_measure_reduced_rb_usage
global FRIQ_param_test_previous_rb FRIQ_param_test_previous_rb_filename
global FRIQ_param_reduction_strategy FRIQ_param_reduction_strategy_secondary FRIQ_const_reduction_strategies__all_cluster_kmeans
global FRIQ_const_reduction_strategy__names FRIQ_const_reduction_secondary_strategies
global FRIQ_const_reduction_strategy__ANTECEDENT_REDUNDANCY FRIQ_const_reduction_strategy__ALL FRIQ_const_reduction_strategy__ALL_CLUSTER_KMEANS
global FRIQ_param_reduction_kmeans_rng FRIQ_param_reduction_kmeans_distancemetric
%% Init
global FRIQ_octave FRIQ_matlab
if exist('OCTAVE_VERSION','builtin') > 0
FRIQ_octave=true;
FRIQ_matlab=false;
else
FRIQ_octave=false;
FRIQ_matlab=true;
end
FIVEpath=strcat(pwd,'/FIVE');
addpath(FIVEpath);
global U VE R stopappnow numofrules reduction_state
stopappnow = 0;
reduction_state = 0;
global numofstates numofactions Usize
numofstates = size(FRIQ_param_states, 2);
numofactions = length(FRIQ_param_actions);
Usize = size(FRIQ_param_FIVE_UD, 2);
if FRIQ_param_norandom == 1
FRIQ_param_epsilon = 0;
end
% 'possiblestates' are used for define points where new rules can be possibly inserted
global possiblestates possiblestates_epsilons
global possibleaction possibleaction_epsilon
possiblestates = FRIQ_param_states;
possibleaction = FRIQ_param_actions;
possiblestates_epsilons = FRIQ_param_statedivs;
possibleaction_epsilon = FRIQ_param_actiondiv;
% Generate initial data
[U, VE, R] = FRIQ_gen_FIVE_FRI_params(FRIQ_param_FIVE_UD, FRIQ_param_states, FRIQ_param_states_steepness, FRIQ_param_actions); % for FRIQ-learning - generate initial rule base and FIVE FRI specific data
[numofrules, ~] = size(R);
xpoints = [];
ypoints = [];
if FRIQ_matlab
set(gcf, 'BackingStore', 'off');
end
set(gco, 'Units', 'data');
set(gcf, 'name', FRIQ_param_apptitle);
set(gcf, 'Color', 'w');
if ~isempty(FRIQ_param_maxepisodes)
FRIQ_param_maxepisodes = 2000;
end
global filetimestamp
timetemp=clock;
filetimestamp=[num2str(timetemp(1)) '.' num2str(timetemp(2),'%0.2d') '.' num2str(timetemp(3),'%0.2d') '__' num2str(timetemp(4),'%0.2d') '_' num2str(timetemp(5),'%0.2d') '_' num2str(floor(timetemp(6)),'%0.2d')];
% check logs/rulebases dir
if ~isfolder('logs')
mkdir('logs');
end
if ~isfolder('rulebases')
mkdir('rulebases');
end
% open logfile
global logfile
logfile = fopen(['logs/FRIQ_' FRIQ_param_appname '_eplog__' filetimestamp '.txt'], 'w');
if logfile == -1
disp(['ERROR: Cannot open logs/FRIQ_' FRIQ_param_appname '_eplog__' filetimestamp '.txt !']);
ERROR
end
total_reward_friq = nan;
steps_friq = nan;
num_of_rules = nan;
%% FRIQ-learning main loop
global debug_on
debug_on = 0;
global epno
%% Simulation starts here
if FRIQ_param_test_previous_rb == 1
epno = 1;
R = dlmread(FRIQ_param_test_previous_rb_filename);
[total_reward_friq, steps_friq] = FRIQ_episode(FRIQ_param_maxsteps, FRIQ_param_alpha, FRIQ_param_gamma, FRIQ_param_epsilon);
[num_of_rules, ~] = size(R);
disp( ['FRIQ_episode: ' int2str(epno) ' FRIQ_steps: ' int2str(steps_friq) ' FRIQ_reward: ' num2str(total_reward_friq) ' epsilon: ' num2str(FRIQ_param_epsilon) ' rules: ' num2str(num_of_rules)])
fprintf(logfile, ['FRIQ_episode: ' int2str(epno) ' FRIQ_steps: ' int2str(steps_friq) ' FRIQ_reward: ' num2str(total_reward_friq) ' epsilon: ' num2str(FRIQ_param_epsilon) ' rules: ' num2str(num_of_rules) '\r\n']);
end
if FRIQ_param_construct_rb == 1
for epno = 1:FRIQ_param_maxepisodes
prevR = R;
prev_reward = total_reward_friq;
prev_steps = steps_friq;
prev_numru = num_of_rules;
[total_reward_friq, steps_friq] = FRIQ_episode(FRIQ_param_maxsteps, FRIQ_param_alpha, FRIQ_param_gamma, FRIQ_param_epsilon);
[num_of_rules, ~] = size(R);
disp( ['FRIQ_episode: ' int2str(epno) ' FRIQ_steps: ' int2str(steps_friq) ' FRIQ_reward: ' num2str(total_reward_friq) ' epsilon: ' num2str(FRIQ_param_epsilon) ' rules: ' num2str(num_of_rules)])
fprintf(logfile, ['FRIQ_episode: ' int2str(epno) ' FRIQ_steps: ' int2str(steps_friq) ' FRIQ_reward: ' num2str(total_reward_friq) ' epsilon: ' num2str(FRIQ_param_epsilon) ' rules: ' num2str(num_of_rules) '\r\n']);
if FRIQ_param_norandom == 0
FRIQ_param_epsilon = FRIQ_param_epsilon * 0.99;
else
FRIQ_param_epsilon = 0;
end
xpoints(epno) = epno - 1; %#ok<AGROW>
ypoints(epno) = steps_friq; %#ok<AGROW>
% Redraw the learning curve
subplot(2, 1, 1);
plot(xpoints, ypoints);
drawnow
if prev_numru == num_of_rules && prev_steps == steps_friq && total_reward_friq > FRIQ_param_reward_good_above && prev_reward == total_reward_friq
if max(abs(R(:, numofstates + 2) - prevR(:, numofstates + 2))) < FRIQ_param_qdiff_final_tolerance
dlmwrite(['rulebases/FRIQ_' FRIQ_param_appname '_incrementally_constructed_RB__' filetimestamp '.csv'], R);
dlmwrite(['rulebases/FRIQ_' FRIQ_param_appname '_incrementally_constructed_RB_steps__' filetimestamp '.txt'], steps_friq);
copyfile(['rulebases/FRIQ_' FRIQ_param_appname '_incrementally_constructed_RB__' filetimestamp '.csv'], ['rulebases/FRIQ_' FRIQ_param_appname '_incrementally_constructed_RB.csv']);
copyfile(['rulebases/FRIQ_' FRIQ_param_appname '_incrementally_constructed_RB_steps__' filetimestamp '.txt'], ['rulebases/FRIQ_' FRIQ_param_appname '_incrementally_constructed_RB_steps.txt']);
break
end
end
end
end
%% Measure rule usage of the constructed rule-base
if FRIQ_param_measure_constructed_rb_usage == 1
disp(['Measuring rule usage with this rule-base: rulebases/FRIQ_' FRIQ_param_appname '_incrementally_constructed_RB' ]);
fprintf(logfile, ['Measuring rule usage with this rule-base: rulebases/FRIQ_' FRIQ_param_appname '_incrementally_constructed_RB\r\n' ]);
FRIQ_measure_RB_usage(['rulebases/FRIQ_' FRIQ_param_appname '_incrementally_constructed_RB']);
end
%% Humanize RB (preferrably with "usage")
if FRIQ_param_construct_rb == 1
FRIQ_humanize_RB(['rulebases/FRIQ_' FRIQ_param_appname '_incrementally_constructed_RB']);
end
%% Reduction of a previously constructed rule-base
if FRIQ_param_reduce_rb == 1
if FRIQ_param_reduction_strategy == FRIQ_const_reduction_strategy__ALL
redstrats_to_use=1:size(FRIQ_const_reduction_strategy__names,2);
elseif FRIQ_param_reduction_strategy == FRIQ_const_reduction_strategy__ALL_CLUSTER_KMEANS
redstrats_to_use=FRIQ_const_reduction_strategies__all_cluster_kmeans;
else
redstrats_to_use=FRIQ_param_reduction_strategy;
end
if FRIQ_param_reduction_strategy_secondary == FRIQ_const_reduction_strategy__ALL
secondredstrats_to_use=[ 0 FRIQ_const_reduction_secondary_strategies ];
else
secondredstrats_to_use=FRIQ_param_reduction_strategy_secondary;
end
for redstrat = redstrats_to_use
FRIQ_param_reduction_strategy=redstrat;
if redstrat == FRIQ_const_reduction_strategy__ANTECEDENT_REDUNDANCY && ~isempty(secondredstrats_to_use)
for secondredstrat = secondredstrats_to_use
if secondredstrat == 0
FRIQ_param_reduction_strategy_secondary=[];
disp('Using ANTECEDENT_REDUNDANCY without secondary strategy.');
fprintf(logfile, 'Using ANTECEDENT_REDUNDANCY without secondary strategy.\r\n');
else
FRIQ_param_reduction_strategy_secondary=secondredstrat;
disp([ 'Using ANTECEDENT_REDUNDANCY with ' FRIQ_const_reduction_strategy__names{FRIQ_param_reduction_strategy_secondary} ]);
fprintf(logfile, [ 'Using ANTECEDENT_REDUNDANCY with ' FRIQ_const_reduction_strategy__names{FRIQ_param_reduction_strategy_secondary} '\r\n' ]);
end
FRIQ_reduction();
% fallbacks can overwrite this in FRIQ_reduction(), so reset it every time before calling FRIQ_reduction()
FRIQ_param_reduction_strategy=FRIQ_const_reduction_strategy__ANTECEDENT_REDUNDANCY;
reduced_rb_basefilename = [ 'rulebases/FRIQ_' FRIQ_param_appname '_reduced_RB_with_' FRIQ_const_reduction_strategy__names{FRIQ_param_reduction_strategy} ];
if ~isempty(FRIQ_param_reduction_kmeans_distancemetric)
reduced_rb_basefilename = [ reduced_rb_basefilename '_distmetric_' num2str(FRIQ_param_reduction_kmeans_distancemetric) ];
end
if ~isempty(FRIQ_param_reduction_kmeans_rng)
reduced_rb_basefilename = [ reduced_rb_basefilename '_withrng_' num2str(FRIQ_param_reduction_kmeans_rng) ];
end
if ~isempty(FRIQ_param_reduction_strategy_secondary)
reduced_rb_basefilename = [ reduced_rb_basefilename '_and_' FRIQ_const_reduction_strategy__names{FRIQ_param_reduction_strategy_secondary} ];
end
% Measure rule usage of the reduced rule-base
if FRIQ_param_measure_reduced_rb_usage == 1
disp(['Measuring rule usage with this rule-base: ' reduced_rb_basefilename ]);
fprintf(logfile, ['Measuring rule usage with this rule-base: ' reduced_rb_basefilename '\r\n' ]);
FRIQ_measure_RB_usage(reduced_rb_basefilename);
end
FRIQ_humanize_RB(reduced_rb_basefilename);
end
else
FRIQ_param_reduction_strategy_secondary=[];
disp([ 'Using ' FRIQ_const_reduction_strategy__names{FRIQ_param_reduction_strategy} ]);
fprintf(logfile, [ 'Using ' FRIQ_const_reduction_strategy__names{FRIQ_param_reduction_strategy} '\r\n' ]);
FRIQ_reduction();
% fallbacks can overwrite this in FRIQ_reduction(), so reset it every time before calling FRIQ_reduction()
FRIQ_param_reduction_strategy=redstrat;
reduced_rb_basefilename = [ 'rulebases/FRIQ_' FRIQ_param_appname '_reduced_RB_with_' FRIQ_const_reduction_strategy__names{FRIQ_param_reduction_strategy} ];
if ~isempty(FRIQ_param_reduction_kmeans_distancemetric)
reduced_rb_basefilename = [ reduced_rb_basefilename '_distmetric_' num2str(FRIQ_param_reduction_kmeans_distancemetric) ];
end
if ~isempty(FRIQ_param_reduction_kmeans_rng)
reduced_rb_basefilename = [ reduced_rb_basefilename '_withrng_' num2str(FRIQ_param_reduction_kmeans_rng) ];
end
if ~isempty(FRIQ_param_reduction_strategy_secondary)
reduced_rb_basefilename = [ reduced_rb_basefilename '_and_' FRIQ_const_reduction_strategy__names{FRIQ_param_reduction_strategy_secondary} ];
end
% Measure rule usage of the reduced rule-base
if FRIQ_param_measure_reduced_rb_usage == 1
disp(['Measuring rule usage with this rule-base: ' reduced_rb_basefilename ]);
fprintf(logfile, ['Measuring rule usage with this rule-base: ' reduced_rb_basefilename '\r\n' ]);
FRIQ_measure_RB_usage(reduced_rb_basefilename);
end
FRIQ_humanize_RB(reduced_rb_basefilename);
end
end
end
%% DeInit
rmpath(FIVEpath);
fclose(logfile);