-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmyStepFunction.m
157 lines (130 loc) · 5.51 KB
/
myStepFunction.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
function [NextObs,Reward,IsDone,StockSaved] = myStepFunction(Action,StockSaved,trainData,ActionVectors,isTrain)
% Copyright 2020 The MathWorks, Inc.
%% Init of needed variables
cash_in_hand = StockSaved.cash_in_hand;
lastweek = StockSaved.last7;
Reward_action = 0;
bad_actions = 0;
Reward = 0;
diffPricesBought = zeros(1,3);
%% Updating base values of the new step
prev_val = sum(StockSaved.stock_owned.*lastweek(1,:)) + cash_in_hand;
StockSaved.cur_step = StockSaved.cur_step + 1;
stock_price = trainData(StockSaved.cur_step, :);
%% Check action is correct
if ~ismember(Action,1:length(ActionVectors))
error('Action must be an action combo');
end
%% Updating indicators
% lastweek = StockSaved.last7;
Ind1 = (stock_price{1,:} ./ lastweek(1,:)) - 1; % change from yesterday
Ind2 = (stock_price{1,:} ./ lastweek(2,:)) - 1; % change from 2 days
Ind3 = (stock_price{1,:} ./ lastweek(end,:)) - 1; % change from 7 days
Ind4 = (stock_price{1,:} ./ mean(lastweek(1,:))) - 1; % change from average of 7 days
if any(isinf(Ind1))
Ind1(isinf(Ind1))=0;
end
if any(isinf(Ind2))
Ind2(isinf(Ind2))=0;
end
if any(isinf(Ind3))
Ind3(isinf(Ind3))=0;
end
if StockSaved.cur_step > 7 % wating for the buffer to be full
%% Getting actions by stocks
action_vec = ActionVectors(Action,:);
sell_index = action_vec == 0;
buy_index = action_vec == 2;
%% Handling sell action (sell every things)
if any(sell_index)
sell_index = find(sell_index);
% Retrieving bad actions, you try to sell stocks you don't own
bad_actions = bad_actions + sum(StockSaved.stock_owned(sell_index) == 0);
% Calculating the reward action regarding indicators
IndR = [Ind1(sell_index),Ind2(sell_index),Ind3(sell_index),Ind4(sell_index)];
% Selling is rewarded if the indicators are showing negative
% momentum. I.e. if the price is falling, its a good time to sell
% but it is bad if you sell when price is rising
Reward_action = Reward_action + sum( abs(IndR(IndR<0))) - sum( abs(IndR(IndR>=0)));
% Calculating the difference from the bought price
diffPricesBought(sell_index) = stock_price{1,sell_index}./StockSaved.prevBoughtPrices(sell_index) -1;
if isinf(diffPricesBought(sell_index))
diffPricesBought(sell_index)=0;
end
% Selling action
cash_in_hand = cash_in_hand + sum(stock_price{1,sell_index}.*StockSaved.stock_owned(sell_index));
StockSaved.stock_owned(sell_index) = 0;
% Reward calculation
cur_val = sum(StockSaved.stock_owned.*stock_price{1,:}) + cash_in_hand;
Reward_action = Reward_action*100; %scaling reward action
Profit = cur_val - prev_val;
Reward = Profit +Reward_action; %total reward is based of actions and profit when selling
end
%% Handling buy action (buy one of each indexed stocks until no more money)
if any(buy_index)
buy_index = find(buy_index);
% Retrieving bad actions
% its good if you can buy at least 1 stock
noBadActionIndex = ~(stock_price{1,:} > cash_in_hand) & action_vec == 2;
% it's bad to try and buy stocks if you have no cash to do so
bad_actions = bad_actions + sum(stock_price{1,buy_index} > cash_in_hand);
% Buy action
can_buy = true;
while can_buy
for jj = 1:length(buy_index)
index = buy_index(jj);
if cash_in_hand > stock_price{1,index}
StockSaved.stock_owned(index) = StockSaved.stock_owned(index)+1;
cash_in_hand = cash_in_hand - stock_price{1,index};
else
can_buy = false;
end
end
end
% Updating the previously bought prices
if any(noBadActionIndex)
StockSaved.prevBoughtPrices(noBadActionIndex)= stock_price{1,noBadActionIndex};
end
end
end
%% Reward calculation when stock is Held
cur_val = sum(StockSaved.stock_owned.*stock_price{1,:}) + cash_in_hand;
Profit = cur_val - prev_val;
StockSaved.profits(end+1) = cur_val;
if Reward > 0 || Profit > 0
Reward = 1;
elseif Reward < 0 || Profit < 0
Reward =-1;
end
%% Reward Calculation if any bad action occurs
% This means and bad action is heavily penalised
% This overwrites Reward from Selling / Holding
if bad_actions > 0
Reward = -bad_actions;
end
%% Updating new state
StockSaved.cash_in_hand = cash_in_hand;
StockSaved.State = [StockSaved.stock_owned,diffPricesBought.*100,StockSaved.cash_in_hand,Ind1.*100,Ind2.*100,Ind3.*100,Ind4.*100];
NextObs = StockSaved.State;
%% Updating last day prices buffer
StockSaved.last7(2:end,:) = StockSaved.last7(1:end-1,:);
StockSaved.last7(1,:) = stock_price{:,:};
%% Done condition
IsDone = (StockSaved.cur_step == StockSaved.total_step);
%% Profit plot
if IsDone && ~isTrain
testDataR = table2array(trainData)./trainData{1,:};
testDataR = 20000*testDataR;
figure;
plot(StockSaved.profits);hold on;
plot(repelem(20000,numel(StockSaved.profits)));
plot(testDataR);
title('Profit on Test Data');
legend({'Current Value','Initial invest','stock1','stock2','stock3'},'Location','northwest');
xlabel('Days');
ylabel('Money');
ytickformat('usd');
end
%% Displaying state of step
disp('Step: '+string(StockSaved.cur_step)+'/'+string(StockSaved.total_step)+' Action: '+string(Action)+' Profit: '+string(cur_val-20000)+' Reward: '+string(Reward)+' Reward action: '+string(Reward_action));
end