-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathFRIQ_update_RB.m
118 lines (96 loc) · 4.84 KB
/
FRIQ_update_RB.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
function FRIQ_update_RB(state, action, reward, state_p, action_p, alpha, gamma)
% FRIQ_update_RB: FRIQ-learning framework: Update the fuzzy rule-base
%
% FRIQ-learning framework v0.70
% https://github.com/szaguldo-kamaz/
%
% Author: David Vincze <david.vincze@uni-miskolc.hu>
% Copyright (c) 2013-2022 by David Vincze
%
global FRIQ_param_qdiff_pos_boundary FRIQ_param_qdiff_neg_boundary
global U VE R numofrules
global possiblestates possiblestates_epsilons
global possibleaction possibleaction_epsilon
global debug_on
FRIQ_check_universes('q_now', state, action); % just to be on the safe side
Q_now = FIVEVagConcl_fixres(U, VE, R, [state, action]); % Q(s,a)
FRIQ_check_universes('q_p', state_p, action_p); % just to be on the safe side
Q_p = FIVEVagConcl_fixres(U, VE, R, [state_p, action_p]); % Q(sp,ap)
Qdiff = alpha * (reward + gamma * Q_p - Q_now);
if debug_on == 1
disp(['state: ' num2str(state, '%.18f') ' action: ' num2str(action, '%.18f')]);
disp(['state_p: ' num2str(state_p, '%.18f') ' action_p: ' num2str(action_p, '%.18f')]);
disp(['Q_now: ' num2str(Q_now, '%.18f') ' Q_p: ' num2str(Q_p, '%.18f') ' Qdiff: ' num2str(Qdiff, '%.18f')]);
end
numofstates = length(state);
numofantecedents = numofstates + 1;
RBcolumns = numofantecedents + 1;
% insert new rule - if it does not exist
if (Qdiff > FRIQ_param_qdiff_pos_boundary) || (Qdiff < FRIQ_param_qdiff_neg_boundary)
newpossiblestates = {};
newrulestates = zeros(1, numofstates);
for current_possible_state = 1:numofstates
[newpossiblestates{current_possible_state}, newrulestates(current_possible_state)] = FRIQ_check_possible_states(state(current_possible_state), possiblestates{current_possible_state}, possiblestates_epsilons{current_possible_state});
if newrulestates(current_possible_state) == inf
current_possible_state
CHKPOSSSTATEBUG
end
end
[newpossibleaction, newruleaction] = FRIQ_check_possible_states(action, possibleaction, possibleaction_epsilon);
if newruleaction == inf
POSSACTBUG
end
possibleaction = newpossibleaction;
possiblestates = newpossiblestates;
Q_newrule = FIVEVagConcl_fixres(U, VE, R, [ newrulestates newruleaction ]); % Q(s,a)
newrule = [ newrulestates newruleaction Q_newrule+Qdiff ];
% check whether the proposed new rule already exists or not
rulefound = 0;
search_for_this_rule = newrule(1:numofantecedents);
search_in_this_R = R(:, 1:numofantecedents);
numofrules = size(search_in_this_R, 1);
for rno = 1:numofrules
if search_for_this_rule == search_in_this_R(rno, :)
rulefound = 1;
break
end
end
if debug_on == 1
disp(['rulefound: ' int2str(rulefound)]);
end
if rulefound == 0 % append new rule to the end of the existing rulebase
R(size(R, 1) + 1, 1:RBcolumns) = newrule;
% numofrules=numofrules+1; % TODO: make this an option, use the newly added rule on not in the next iteration
if debug_on >= 1
format long
newrule
format
end
else % or update existing rules
%- FIVEVagConclWeight_fixres - nincs sulyozva a konkluzioval!
rulestoupdate = FIVEVagConclWeight_fixres(U, VE, R, [state, action]);
for rule = 1:numofrules
if rulestoupdate(rule) > 0.05
if debug_on == 1
disp(['updaterule ' int2str(rule) '. weight: ' num2str(rulestoupdate(rule)) ': oldQ: ' num2str(R(rule, RBcolumns), 18) ' -> newq: ' num2str(Q_now + Qdiff * rulestoupdate(rule), 18)]);
end
R(rule, RBcolumns) = Q_now + Qdiff * rulestoupdate(rule);
end
end
end
else % if deltaq is not so big then just update existing rules
% [Q_now Q_p Qdiff ]
rulestoupdate = FIVEVagConclWeight_fixres(U, VE, R, [state, action]);
for rule = 1:numofrules
if debug_on == 1
disp([ 'rule ' num2str(rule) ' weight ' num2str(rulestoupdate(rule),18) ]);
end
if rulestoupdate(rule) > 0.05
if debug_on == 1
[rule R(rule, RBcolumns) Q_now + Qdiff * rulestoupdate(rule)]
disp(['updating rule ' num2str(rule) ' from ' num2str(R(rule, RBcolumns), 18) ' to ' num2str(Q_now + Qdiff * rulestoupdate(rule), 18)]);
end
R(rule, RBcolumns) = Q_now + Qdiff * rulestoupdate(rule);
end
end
end