-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathqlearn.c
333 lines (267 loc) · 7.45 KB
/
qlearn.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
#include "qlearn.h"
#include <stdlib.h>
#include <pthread.h>
#include <stdio.h>
#include <math.h>
// Global constants
#define MAXSTA 100 // max # of states
#define MAXACT 100 // max # of actions
#define ALPHA0 0.1 // default learning rate
#define EPSINI 0.9 // initial exploration factor
#define EPSFIN 0.01 // final exploration factor
#define GAMMA0 0.9 // default discount factor
#define DECAY0 0.95 // default epsilon decay rate
//Mutex
static pthread_mutex_t mux_Qmatrix = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t mux_eps = PTHREAD_MUTEX_INITIALIZER;
// Global variables
static int nsta; // actual # of states
static int nact; // actual number of actions
static float alpha; // learning rate
static float gam; // discount factor
static float epsilon; // actual exploration probability
static float decay; // decay rate for epsilon
static float eps_norm; // normalized exploration probability
static float eps_ini; // initial exploration probability
static float eps_fin; // final exploration probability
static float Q[MAXSTA][MAXACT]; // Q matrix
static float Q_copy[MAXSTA][MAXACT]; // Q matrix for graphic task, used for
// avoiding useless critical sections
//-----------------------------------------------------
// The following Function initializes all the variables
// needed to implemetn QLearning Algorithm
//-----------------------------------------------------
void ql_init(int ns, int na){
int s, a;
nsta = ns;
nact = na;
if(nsta > MAXSTA){
printf("NUmber of states too big\n");
exit(1);
}
if(nact > MAXACT){
printf("Number of actions too big\n");
exit(1);
}
alpha = ALPHA0;
gam = GAMMA0;
eps_norm = 1.0;
eps_ini = EPSINI;
eps_fin = EPSFIN;
decay = DECAY0;
epsilon = EPSINI;
for (s = 0; s < nsta; s++)
for(a = 0; a < nact; a++)
Q[s][a] = 0;
// IDEAL CYCLE TEST
/*Q[9][0] = 10;
Q[16][0] = 10;
Q[23][2] = 10;
Q[24][2] = 10;
Q[25][1] = 10;
Q[18][1] = 10;
Q[11][3] = 10;
Q[10][3] = 10;*/
}
//-----------------------------------------------------
// The following Functions manage Q matrix and its copy
//-----------------------------------------------------
void ql_copy_Q(){
int s,a;
pthread_mutex_lock(&mux_Qmatrix);
for (s = 0; s < nsta; s++)
for(a = 0; a < nact; a++)
Q_copy[s][a] = Q[s][a];
pthread_mutex_unlock(&mux_Qmatrix);
}
void ql_get_Q(float *dest){
int s, a;
pthread_mutex_lock(&mux_Qmatrix);
for (s = 0; s < nsta; s++)
for(a = 0; a < nact; a++)
dest[s*4 + a] = Q_copy[s][a];
pthread_mutex_unlock(&mux_Qmatrix);
}
//-----------------------------------------------------
// The following Functions set the parameters
// used in the QLearning Algorithm
//-----------------------------------------------------
void ql_set_learning_rate(float a){
alpha = a;
}
void ql_set_discount_factor(float g){
gam = g;
}
void ql_set_expl_range(float e_ini, float e_fin){
eps_ini = e_ini;
eps_fin = e_fin;
}
void ql_set_epsini(float e_ini){
eps_ini = e_ini;
}
void ql_set_epsfin(float e_fin){
eps_fin = e_fin;
}
void ql_set_epsilon(float e){
epsilon = e;
}
void ql_set_expl_decay(float d){
decay = d;
}
//-----------------------------------------------------
// The following Functions get the parameters
// used in the QLearning Algorithm
//-----------------------------------------------------
float ql_get_learning_rate(){
return alpha;
}
float ql_get_discount_factor(){
return gam;
}
float ql_get_epsini(){
return eps_ini;
}
float ql_get_epsfin(){
return eps_fin;
}
float ql_get_epsilon(){
float ret;
pthread_mutex_lock(&mux_eps);
ret = epsilon;
pthread_mutex_unlock(&mux_eps);
return ret;
}
float ql_get_expl_decay(){
return decay;
}
//-----------------------------------------------------
// The following Function reduces exploration factor
// epsilon, depending on decay, inital and final
// epsilon values
//-----------------------------------------------------
void ql_reduce_exploration(){
eps_norm = decay*eps_norm;
pthread_mutex_lock(&mux_eps);
epsilon = eps_fin + eps_norm*(eps_ini - eps_fin);
pthread_mutex_unlock(&mux_eps);
//printf("Ho ridotto epsilon = %f\n", epsilon);
ql_get_epsilon();
}
//-----------------------------------------------------
// The following Function computes the Maximum Q
// value in a given state s
//-----------------------------------------------------
float ql_maxQ(int s){
int a;
float m;
m = Q[s][0]; // initialized with Q value for action 0
for (a = 1; a < nact; a++)
if (Q[s][a] > m)
m = Q[s][a];
return m;
}
//-----------------------------------------------------
// The following Function computes the best action
// in a given state s
//-----------------------------------------------------
float ql_best_action(int s){
int a, ba;
float m;
ba = 0; // initialized best action with action 0
m = Q[s][0]; // initialized with Q value for action 0
for (a = 1; a < nact; a++)
if (Q[s][a] > m){
m = Q[s][a];
ba = a;
}
return ba;
}
//-----------------------------------------------------
// The following Function computes Epsilon-greedy
// policy in a given state s
//-----------------------------------------------------
int ql_egreedy_policy (int s){
int ra, ba;
float x, eps;
ba = ql_best_action(s);
ra = rand()%nact;
//printf("L'azione casuale è %d\n", ra);
//printf("Epsilon = %f\n", epsilon);
x = frand(0, 1);
pthread_mutex_lock(&mux_eps);
eps = epsilon;
pthread_mutex_unlock(&mux_eps);
if (x < eps) return ra;
else return ba;
}
//-----------------------------------------------------
// The following Function updates Q value
//-----------------------------------------------------
float ql_updateQ(int s, int a, int r, int snew){
float Qtarget, TDerr;
Qtarget = r + gam*ql_maxQ(snew);
TDerr = Qtarget - Q[s][a];
Q[s][a] = Q[s][a] + alpha*TDerr;
return fabs(TDerr);
}
//-----------------------------------------------------
// The following Function shows the Q matrix
//-----------------------------------------------------
void ql_print_Qmatrix()
{
int i, j;
printf("Q matrix:\n");
for(i = 0; i < nsta; i++){
for(j = 0; j < nact; j++)
printf("%f ", Q[i][j]);
printf("\n");
}
}
//-----------------------------------------------------
// The following Function saves Q-Matrix in the file
// named "filename"; if this file doesn't exist, it's
// created.
//-----------------------------------------------------
void ql_Q_to_file(char* filename)
{
int i, j;
FILE* fptr;
fptr = fopen(filename, "w");
if(fptr == NULL){
printf("QLEARN: Error in opening file\n");
return;
}
for(i = 0; i < nsta; i++)
for(j = 0; j < nact; j++)
fprintf(fptr, "%d %f\n", i*nact + j, Q[i][j]);
fclose(fptr);
return;
}
//-----------------------------------------------------
// The following Function loads Q-Matrix from the file
// named "filename"; if this doesn't exist, the function
// prints an Error Message.
//-----------------------------------------------------
int ql_Q_from_file(char* filename)
{
int i, j, index;
FILE* fptr;
fptr = fopen(filename, "r");
if(fptr == NULL){
printf("QLEARN: Error, file: %s doesn't exist!\n", filename);
return 0;
}
for(i = 0; i < nsta; i++)
for(j = 0; j < nact; j++)
fscanf(fptr, "%d %f\n", &index, &Q[i][j]);
fclose(fptr);
return 1;
}
//-----------------------------------------------------
// THe following Function computes a random number
//-----------------------------------------------------
float frand(float xmin, float xmax){
float range;
range = (xmax - xmin);
return (xmin + range*(float)rand()/RAND_MAX);
}