-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathQlearningEMS.m
633 lines (516 loc) · 29.8 KB
/
QlearningEMS.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
% DESCRIPTION:
% Energy Management System based on Machine Learning theory (reinforcment
% learning) for a DC grid. The main source of power is a fuel-cell and a
% battery is connected to the grid to supply fast load changes (the slow
% dynamics of the FC cannot supply fast load changes).
% The aim of machine learning is to make the grid more efficient (by
% minimizing the losses in converters and battery for example) and to
% increase lifecycle of the system by reducing stress on components.
%
% Goal 1: Ensure the power supply at any time (done by the hardware, not
% related to ML)
% Goal 2: Maintain the State of Charge of the battery in [SOCmin;SOCmax].
% Goal 3: Minimize fuel consumption of the grid.
% Goal 3bis: Minimize stress on conponents (increase lifetime)
% NOTE:
% For reusability, all the script is written in p.u.
% The conversion to international system values is done in a dedicated
% block of the Simulink model.
% INPUT:
% Strucure containing the simulation parameters (see description in the
% calling script).
% STATUS:
% The function is working fine (no harmful errors detected)
% To improve:
% - Avoid the use of global variables and set the parallel computing
% framework
function QlearningEMS(simParam)
global inputsFromWS %...Should find solution to avoid it...
% Having a global variable is not very good practice, but it helped here to
% gain time...
% Global variable does not allow parallel computing, which is a big
% limitation for this script.
% Create a diary to track only the Simulink errors (the script is supposed
% to work properly)
diary
diary off
% #########################################################################
% ################ STATES ######################
% #########################################################################
% The states can be weighted from the calling script. If weight equals to
% zero, the state should not be considered.
% Initialize all the states as if they were not used:
% The suffix _Q is added to emphasize that this is the state used in the
% Q-learning calculation
iter_steady_Q = [0];
P_batt_Q = [0];
P_FC_Q = [0];
% SOC_Q = [0];
% Booleans for rewarding (initialized to false)
isTimeSteadyConsidered = 0; % Reward a constant FC power reference
isP_battConsidered = 0; % To minimize the power supplied by the battery
isP_FCConsidered = 0; % To operate the FCS at the highest efficiency
if simParam.weightSteady ~= 0
iter_steady_Q = [5 8]; % For how many iterations is the input the same ?
isTimeSteadyConsidered = 1;
probability_Forced_Constant_Sequence = 0.15; % Help to learn taking more constant actions during decay phase [0;1]
else
probability_Forced_Constant_Sequence = 0;
end
if simParam.weightP_batt ~= 0
P_batt_Q = [-1 -0.8 -0.55 -0.25 0.25 0.55 0.8 1];
isP_battConsidered = 1;
end
if simParam.weightP_FC ~= 0
P_FC_Q = [0.7 0.9]; % Centered on 0.8 mean P_FC < 0.8 = good else bad
isP_FCConsidered = 1;
end
if simParam.weightSOC ~= 0
SOC_Q = single(linspace(0.4,1,13)); % Battery state of charge
% The SOC is always considered
else
error('Error: SOC must be controlled, weight cannot be equal to 0');
end
% Generate a state list
% 4 Column matrix of all possible combinations of the discretized state.
Q_states=zeros(length(iter_steady_Q)*length(P_batt_Q)*length(P_FC_Q)*length(SOC_Q),4,'single');
% This part doesn't need to be optimized for simulation time (executed only
% once)
index=1;
for j=1:length(iter_steady_Q)
for k = 1:length(P_batt_Q)
for l = 1:length(P_FC_Q)
for m = 1:length(SOC_Q)
Q_states(index,1)=iter_steady_Q(j);
Q_states(index,2)=P_batt_Q(k);
Q_states(index,3)=P_FC_Q(l);
Q_states(index,4)=SOC_Q(m);
index=index+1;
end
end
end
end
% Assign the Q-state matrix in the base WS (for user analysis, not used for
% ML calculation)
assignin('base','Q_states',Q_states);
% #########################################################################
% ############### ACTIONS ######################
% #########################################################################
% The only action on the grid from the EMS is on the FC current.
dP_FC_ref_Q = 0.15; %p.u.
actions = [0 -dP_FC_ref_Q dP_FC_ref_Q];
% NB: Must be consistent with the number of columns in the Q-matrix
% #########################################################################
% ############### AGENT ######################
% #########################################################################
% Q matrix:
% Lines: states | columns: actions (same number than above)
% To each state are associated the 3 possible actions.
Q = repmat(zeros(size(Q_states,1),1,'single'),[1,3]);
% The Q matrix can also be charged from previous simulations here with the
% load() function
% .....
% Matrix to keep track of the actions taken.
% Used for the adaptative learning rate calculation ("Average Q factor
% method")
Q_visited = zeros(size(Q));
% #########################################################################
% ############### Q-learning SETTINGS ####################
% #########################################################################
% How many episodes of testing ? (i.e. how many courses the system attend?)
maxEpi = simParam.maxEpi;
% Choose model
model = simParam.model;
% Set the (approximate) duration of one episode:
totalTime = simParam.totalTime;
% Set the length of one iteration in the simulink model
iterationTime = simParam.iterationTime;
% Exploration vs exploitation
epsilon = simParam.epsilon; % Initial value generaly equal to 0.5
epsilonDecay = simParam.epsilonDecay; % Decay factor per iteration
% Future vs present value
% Should be close to 1 (e.g. 0.999) for the maximum learning time horizon
discount = simParam.discount;
% Inject some noise?
successRate = simParam.successRate; % No noise : 1
% Where to store the results
resultPath = [simParam.parentFolder '\' simParam.subFolder '\'];
mkdir(resultPath);
% #########################################################################
% ################ INITIALIZE THE SIMULATION #####################
% #########################################################################
% Calculates the number of iterations (must be integer)
maxit = floor(totalTime/iterationTime);
% Buffer load profile.
% When the system is off for longer than tStopLearning, the learning should
% stop to avoid modifying the Q-matrix during this period.
% This buffer contains the load value, and the system is considered to be
% off when the buffer is only filled with zeros (and the SOC not low).
tStopLearning = 6; % Stop the learning 6sec after the system turns off.
loadBufferLength = floor(tStopLearning/iterationTime);
loadBuffer = ones(loadBufferLength,1)'; % Initialized to 1 to learn at launching.
loadBufferIdle = zeros(loadBufferLength,1)'; % Buffer to test the equality with.
% Empty structure containing the discretized datas for one episode (filled
% with the end value of each iteration)
systemStatesTab = struct(...
'time',transpose(0:iterationTime:maxit*iterationTime)...
,'P_FC_out',zeros(maxit+1,1)...
,'P_Batt',zeros(maxit+1,1)...
,'SOC_battery',zeros(maxit+1,1)...
,'Load_profile',zeros(maxit+1,1)...
,'P_FC_ref',zeros(maxit+1,1)...
,'isExploitationAction',zeros(maxit+1,1)...
,'Stack_efficiency',zeros(maxit+1,1)...
,'reward_SOC',zeros(maxit+1,1)...
,'reward_P_FC',zeros(maxit+1,1)...
,'reward_P_batt',zeros(maxit+1,1)...
,'reward_Steady',zeros(maxit+1,1)...
,'reward',zeros(maxit+1,1));
% NOTE: This structure is overwritten each iteration.
% Has one more comumn than the number of iteration to include the initial
% state, and then generate rate of changes (feature used in previous
% versions).
% Empty structure containing continuous datas for one episode (filled with
% data collected all along the iteration)
continuousData = struct(...
'time',[]...
,'P_FC_out',[]...
,'P_Batt',timeseries()...
,'SOC_battery',timeseries()...
,'Load_profile',timeseries()...
,'Stack_efficiency',timeseries());
% The size of this structure is unknown (depending on Simulink auto time
% step solver)
% Initialize a .txt file containing relevant datas
delete([resultPath 'results.txt']);
resultsReport = fopen([resultPath 'results.txt'],'w');
fprintf(resultsReport,[datestr(now) '\r\n']);
fprintf(resultsReport,['Model used: ' model '\r\n']);
fprintf(resultsReport,'Epsilon start: %2.2f, epsilonDecay %2.7f\r\n',epsilon,epsilonDecay);
fprintf(resultsReport,'Discount: %3.3f\r\n', discount);
fprintf(resultsReport,'Number of episodes planned: %i\r\n', maxEpi);
fprintf(resultsReport,'Weight SOC: %3.3f\r\n',simParam.weightSOC);
fprintf(resultsReport,'Weight FC power: %3.3f\r\n',simParam.weightP_FC);
fprintf(resultsReport,'Weight Battery power: %3.3f\r\n',simParam.weightP_batt);
fprintf(resultsReport,'Weight Steady power: %3.3f\r\n',simParam.weightSteady);
fprintf(resultsReport,'Total time per episode: %5.1fs, Iteration time: %3.2fs\r\n',totalTime,iterationTime);
fprintf(resultsReport,'_______________\r\n\r\n');
% #########################################################################
% ############# START LEARNING ##################
% #########################################################################
for episodes = 1:maxEpi
% Reinitialize the time vector for continuous data at each episode:
continuousData.time = [];
continuousData.P_FC_out = [];
continuousData.P_Batt = timeseries();
continuousData.SOC_battery = timeseries();
continuousData.Load_profile = timeseries();
continuousData.Stack_efficiency = timeseries();
% Is the episode finished properly ?
% Feature currently not useful, but might be necessary later
completed = false; % Boolean checking the completion of the episode
while ~completed
% $$$$$$$$$$$$$$$ INITIALIZE THE EPISODE $$$$$$$$$$$$$$$$$$$$$
% Measure the simulation time
t_SimulinkTotal = 0; % For the time running in Simulink
t_LearningStart = cputime; % For the total time (Simulink + Learning)
% Select here the type of load
inputArray(2) = 10; % Realistic load
% Set here the initial SOC
m = mod(episodes,5);
switch m
case 0
SOC_init = 0.3;
case 1
SOC_init = 0.7;
case 2
SOC_init = 0.85;
case 3
SOC_init = 0.55;
case 4
SOC_init = 0.98;
end
inputArray(3) = SOC_init;
% Set here the start value for the FC power
P_FC_init = 0.6;
inputArray(1) = P_FC_init;
% Charge the input for initial time: inputArray
% (the input cannot ba calculated for initial time)
% Column 1: FC reference power
% Column 2: Load profile (code for each profile)
% Column 3: Initial SOC
inputsFromWS = Simulink.Parameter(inputArray);
inputsFromWS.StorageClass='ExportedGlobal';
%...should find how to avoid global...
% Open the model at the beginning of the episode
load_system(model);
% Initialize the variable finalTime giving the end time of the
% iterations
finalTime = 0;
% Initialize the value recording the number of constant actions
% taken in a row (to fill the state):
iterSteady = 0;
% Starting point
Q_state_struct = struct(...
'iter_steady',iterSteady,...
'P_batt',0,...
'P_FC_out',P_FC_init,...
'SOC',SOC_init);
% Convert the structure to array for use in the Q-learning calculation
Q_state_array = transpose(cell2mat(struct2cell(Q_state_struct)));
% Number of exploitation actions (non-random actions)
% For result analysis (not for ML calculation)
nExploitation = 0;
% Initialize boolean for the case SOC < 10% (causing crash in simulink)
lowSOC = 0;
% Initialize the number of "forced" constant actions
% Used during the decay phase, to teach the agent how to take
% sequence of constant inputs
steadyCounter = 0;
% % % % try % Allow error during episode without compromising the next episodes
% Go for one episode of maxit iterations
for h = 1:maxit
g = h + 1; % Do not write the first line (initial values)
fprintf('Episode n.%i, iteration n.%i/%i\n',episodes,h,maxit);
% $$$$$$$$$$$$$$$$$$ Pick an action $$$$$$$$$$$$$$$$$$$$$$$
% Interpolate the state within our discretization (ONLY for
% choosing the action. We do not actually change the state by doing
% this!)
[~,sIdx] = min(sum((Q_states - repmat(Q_state_array,[size(Q_states,1),1])).^2,2));
% sIdx is the line index of the state matrix corresponding the best to
% the current_state.
% $$$$$$$$$$$$$$$$$ Choose an action $$$$$$$$$$$$$$$$$$$$$$$$
rng('shuffle'); % Avoid repeated sequence of random mumbers
if steadyCounter <= 0 % Are we in a sequence of forced constant actions ? Negative means no
% EITHER 1) pick the best action according the Q matrix (EXPLOITATION).
if rand()>min(1,epsilon)... % Probability of aking an exploitation action according to the decay
&& rand()<=successRate... % Fail the check if our action doesn't succeed (i.e. simulating noise)
&& ((Q(sIdx,1)~=Q(sIdx,2)) && (Q(sIdx,1)~=Q(sIdx,3))) % Take a random action when all the action coefficients are equals
[~,aIdx_fc] = max(Q(sIdx,:)); % Pick the action (for the FC current) the Q matrix thinks is best
systemStatesTab.isExploitationAction(g) = 0.2; % For displaying only
nExploitation = nExploitation + 1;
% OR 2) Pick a random action (EXPLORATION)
else
rng('shuffle'); % Avoid repeated sequence of random mumbers
if rand()<(1-probability_Forced_Constant_Sequence) % Take a random action following the normal process
rng('shuffle'); % Avoid repeated sequence of random mumbers
aIdx_fc = randi(size(actions,2),1); % Random action for FC!
systemStatesTab.isExploitationAction(g) = 0; % For displaying only
else % Trigger a sequence of n consecutive constant actions (i.e. help the system to learn how to keep constant input)
steadyCounter = 8; % The length of the sequence of constant actions we want to force
systemStatesTab.isExploitationAction(g) = -0.2; % For displaying only
steadyCounter = steadyCounter - 1;
aIdx_fc = 1;
end
end
else % Continue the sequence of consecutive constant actions
systemStatesTab.isExploitationAction(g) = -0.2; % For displaying only
steadyCounter = steadyCounter - 1;
aIdx_fc = 1;
end
% Count the number of times the input is constant for
% rewarding (for both exploration and exploitation).
if aIdx_fc == 1
iterSteady = iterSteady + 1; % Time means number of iterations
else
iterSteady = 0;
end
% $$$$$$$$$$$$$$$$$ Run the model $$$$$$$$$$$$$$$$$$$$$$$$$$$
% New input for the model:
dP_FC_ref_Q = actions(1,aIdx_fc);
inputArray(1) = inputArray(1) + dP_FC_ref_Q;
% Keep the I_FC_Q in bounds (redundant with limiters in the
% simulink model, but accelerates convergence)
if inputArray(1)<0.1
inputArray(1)=0.1;
elseif inputArray(1)>1
inputArray(1)=1;
end
inputsFromWS.Value = inputArray;
% Run ths Simulink model for iterationTime
if h ~= 1 % From second iteration and more
set_param(model,'LoadInitialState','on');
currentState = finalState;
initTime = currentState.snapshotTime;
finalTime = initTime + iterationTime; % Calculate the final time of the simulation
assignin('base','finalTime',finalTime);
assignin('base','currentState',currentState);
t_SimulinkIterationStart = cputime;
diary on
simOut = sim(model,'StopTime','finalTime','InitialState','currentState');
diary off
t_SimulinkTotal = t_SimulinkTotal + cputime - t_SimulinkIterationStart;
finalState = simOut.myOperPoint; % Update the model state at the end of the simulation
else % First iteration, i.e. no initial state
set_param(model,'SaveFinalState','on','FinalStateName','myOperPoint',...
'SaveCompleteFinalSimState','on','LoadInitialState','off');
set_param(model,'SimulationMode','accelerator');
set_param(model,'FastRestart','off');
initTime = 0;
finalTime = initTime + iterationTime; % Calculate the final time of the iteration
assignin('base','finalTime',finalTime);
t_SimulinkIterationStart = cputime;
diary on
simOut = sim(model,'StopTime','finalTime');
diary off
t_SimulinkTotal = t_SimulinkTotal + cputime - t_SimulinkIterationStart;
set_param(model,'FastRestart','on');
finalState = simOut.myOperPoint; % Update the model state at the end of the simulation
end
% Collect the results of the iteration (last value returned by the model at the end of the iteration):
systemStatesTab.P_FC_out(g) = simOut.outputsToWS.P_FC_out.Data(end);
systemStatesTab.P_Batt(g) = simOut.outputsToWS.P_batt.Data(end);
systemStatesTab.SOC_battery(g) = simOut.outputsToWS.SOC.Data(end);
systemStatesTab.Stack_efficiency(g) = simOut.outputsToWS.Stack_efficiency.Data(end);
systemStatesTab.P_FC_ref(g) = inputArray(1);
systemStatesTab.Load_profile(g) = simOut.outputsToWS.Load_profile.Data(end);
systemOn = 1;
loadBuffer(mod(g,loadBufferLength)+1) = systemStatesTab.Load_profile(g);
if isequal(loadBuffer,loadBufferIdle) && simOut.outputsToWS.P_batt.Data(end) < 0.7 % Stop learning when no load and battery not under charged.
systemOn = 0;
end
% Store the raw continuous data collected all along the iteration
continuousData.time = [continuousData.time simOut.tout'];
continuousData.P_FC_out = [continuousData.P_FC_out simOut.outputsToWS.P_FC_out.Data'];
continuousData.P_Batt = append(continuousData.P_Batt,simOut.outputsToWS.P_batt);
continuousData.SOC_battery = append(continuousData.SOC_battery,simOut.outputsToWS.SOC);
continuousData.Load_profile = append(continuousData.Load_profile,simOut.outputsToWS.Load_profile);
continuousData.Stack_efficiency = append(continuousData.Stack_efficiency,simOut.outputsToWS.Stack_efficiency);
% Fill the Q-learning state
Q_state_struct.iter_steady = iterSteady;
Q_state_struct.P_batt = mean(simOut.outputsToWS.P_batt.Data); % Take the average value on the last iteration (kind of LPF for freq. greater than f_learning)
Q_state_struct.P_FC_out = simOut.outputsToWS.P_FC_out.Data(end);
Q_state_struct.SOC = simOut.outputsToWS.SOC.Data(end);
% Convert the structure to array for use in the Q-learning calculation
Q_state_array = transpose(cell2mat(struct2cell(Q_state_struct)));
% $$$$$$$$$$$$$$$$ Calculate the reward $$$$$$$$$$$$$$$$$$$$
[rSOC,rP_FC,rP_batt,rSteady] = getReward(Q_state_struct,aIdx_fc);
reward = ...
simParam.weightSOC*rSOC +...
simParam.weightP_FC*isP_FCConsidered*rP_FC +...
simParam.weightP_batt*isP_battConsidered*rP_batt +...
simParam.weightSteady*isTimeSteadyConsidered*rSteady;
fprintf('SOC %3.3f\n',Q_state_struct.SOC);
systemStatesTab.reward(g) = reward;
% Save rewards individualy to evaluate the quality of the
% policy regarding a single criterion:
systemStatesTab.reward_SOC(g) = rSOC;
systemStatesTab.reward_P_FC(g) = rP_FC;
systemStatesTab.reward_P_batt(g) = rP_batt;
systemStatesTab.reward_Steady(g) = rSteady;
% $$$$$$$$$$$$$$$$ Update the Q-matrix $$$$$$$$$$$$$$$$$$$$$$$
% Interpolate again to find the new state the system is closest to.
[~,snewIdx] = min(sum((Q_states - repmat(Q_state_array,[size(Q_states,1),1])).^2,2)); % Interpolate again to find the new state the system is closest to.
% Update Q
Q(sIdx,aIdx_fc) = Q(sIdx,aIdx_fc) + (1/(Q_visited(sIdx,aIdx_fc)+1)) * systemOn * ( reward + discount*max(Q(snewIdx,:)) - Q(sIdx,aIdx_fc) ); % The line that makes everything !!!
fprintf('State index %i\n',sIdx);
fprintf('Reward %2.2f\n',reward);
fprintf('Q(sIdx,aIdx_fc) %3.2f\n',Q(sIdx,aIdx_fc));
% Make the action visited one more time
Q_visited(sIdx,aIdx_fc) = Q_visited(sIdx,aIdx_fc) + 1;
% Decay the odds of picking a random action vs picking the
% estimated "best" action. I.e. we're becoming more confident in
% our learned Q.
epsilon = epsilon*epsilonDecay;
% Break the iteration if SOC < 10%
if simOut.outputsToWS.SOC.Data(end) < 0.1
lowSOC = 1;
break
end
end % end iterations counting for single episode
% The episode finished properly if this point is reached
completed = 1;
% $$$$$$$$$$$$$$$$ PLOTTING $$$$$$$$$$$$$$$$$$$$$$$
% Analysis of the episode performance
if ~lowSOC
t_LearningTotal = cputime - t_LearningStart;
fprintf(resultsReport,'Episode %i: \r\n',episodes);
ratioExploitation = (nExploitation/maxit)*100;
fprintf(resultsReport,'Exploitation actions: %3.2f%% \r\n',ratioExploitation);
fprintf(resultsReport,'Epsilon (end of iteration): %2.3f \r\n',epsilon);
fprintf(resultsReport,'Simulink time: %5.1fs \r\n',t_SimulinkTotal);
fprintf(resultsReport,'Episode duration (Simulink + Q-process): %5.1fs \r\n',t_LearningTotal);
ratioTime = (t_SimulinkTotal/t_LearningTotal)*100;
fprintf(resultsReport,'Ratio Simulink/Total time for episode: %3.2f%% \r\n',ratioTime);
fprintf(resultsReport,'_______________\r\n\r\n');
% Resample the raw continuous data at f=1Hz
resampledData = struct(...
'time',[]...
,'P_FC_out',[]...
,'P_Batt',timeseries()...
,'SOC_battery',timeseries()...
,'Load_profile',timeseries()...
,'Stack_efficiency',timeseries());
tEnd = floor(continuousData.time(end));
resampledData.time = zeros(tEnd+1,1);
for i = 0:tEnd
resampledData.time(i+1) = i; % Resample at a rate of 1Hz (one value each sec)
end
[x, index] = unique(continuousData.time);
resampledData.P_FC_out = interp1(x,continuousData.P_FC_out(index),resampledData.time);
resampledData.P_Batt = resample(continuousData.P_Batt,resampledData.time);
resampledData.SOC_battery = resample(continuousData.SOC_battery,resampledData.time);
resampledData.Load_profile = resample(continuousData.Load_profile,resampledData.time);
resampledData.Stack_efficiency = resample(continuousData.Stack_efficiency,resampledData.time);
% Plotting the result of the episode
fig = figure(episodes);
subplot(411);
h(1) = plot(systemStatesTab.time(2:end),systemStatesTab.SOC_battery(2:end),'.');
hold on
h(2) = bar(systemStatesTab.time(2:end),systemStatesTab.isExploitationAction(2:end));
h(3) = line([systemStatesTab.time(1),systemStatesTab.time(end)],[0.6,0.6],'Color','k','LineStyle',':');
h(4) = line([systemStatesTab.time(1),systemStatesTab.time(end)],[0.8,0.8],'Color','k','LineStyle',':');
h(5) = plot(resampledData.SOC_battery,'-');
legend(h([1 2 5]),'Processing point','Exploitation','SOC','Location','southwest');
subplot(412)
plot(systemStatesTab.time(2:end),systemStatesTab.reward(2:end),'*-');
hold on
plot(resampledData.P_Batt,'-');
plot(systemStatesTab.time(2:end),systemStatesTab.P_Batt(2:end),'.');
legend('Reward','P Batt','Location','southwest');
subplot(413);
plot(systemStatesTab.time(2:end),systemStatesTab.P_FC_ref(2:end),'.');
hold on
plot(resampledData.time,resampledData.P_FC_out,'-');
plot(resampledData.Load_profile,'-');
legend('P FC ref (p.u.)','P FC out(p.u.)','Load profile (p.u.)','Location','southwest');
subplot(414)
plot(systemStatesTab.time(2:end),systemStatesTab.reward_SOC(2:end),'.-');
hold on
plot(systemStatesTab.time(2:end),systemStatesTab.reward_P_FC(2:end),'.-');
plot(systemStatesTab.time(2:end),systemStatesTab.reward_P_batt(2:end),'.-');
plot(systemStatesTab.time(2:end),systemStatesTab.reward_Steady(2:end),'.-');
legend('rSOC','rP FC','rP batt','rSteady','Location','southwest');
drawnow
% Save the plots
saveas(fig,[resultPath 'episode' num2str(episodes) '.fig']);
saveas(fig,[resultPath 'episode' num2str(episodes) '.jpg']);
close(fig);
% Save the Q-matrix
save([resultPath 'Q_episode' num2str(episodes) '.mat'],'Q');
save([resultPath 'Q_visited_episode' num2str(episodes) '.mat'],'Q_visited');
% Save the data collected
save([resultPath 'Data_episode' num2str(episodes) '.mat'],'systemStatesTab','resampledData');
else
fprintf(resultsReport,'Episode %i: \r\n',episodes);
fprintf(resultsReport,'Failure, SOC too close to 0\r\n');
fprintf(resultsReport,'_______________\r\n\r\n');
end
% % % % catch
% % % % fprintf(resultsReport,'Episode %i: \r\n',episodes);
% % % % fprintf(resultsReport,'Error occured, go to next episode\r\n');
% % % % fprintf(resultsReport,'_______________\r\n\r\n');
% % % % set_param(model,'FastRestart','off');
% % % % close_system(model,0);
% % % % load_system(model);
% % % % set_param(model,'SimulationCommand','update')
% % % % completed = 1;
% % % % end % end try catch
% % % %
end % end while episode not completed
% Close the model without saving it
%set_param(model,'FastRestart','off');
%close_system(model,0); % Seem that the simulations are longer when restarting from an already opened model
end % end episodes counting
% Close the text file
fclose(resultsReport);