-
Notifications
You must be signed in to change notification settings - Fork 0
/
cornk.py
489 lines (457 loc) · 19.6 KB
/
cornk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
# This code is a representation (and may be incorrect as it is my interpretation after reading the paper) of CORN-K which represents (CORrelation-driven Non-parametric learning)
# This comes from :B. H. Li, S. C.H, and V. Gopalkrishnan, “Corn: Correlation-driven nonparametric learning approach for portfolio selection”
from datetime import date
import numpy as np
from numpy.lib import corrcoef
import pandas as pd
from matplotlib import pyplot as plt
from stockMarketReader import readDataSet
import math
from scipy.optimize import minimize
# Need to construct a set of experts as required by CORN
class Expert:
"""
This class serves the purpose of making a CORN expert
"""
#constructor for this expert with the two noteworthy parameters
def __init__(self, windowSize, corrThresh, numStocks, numDays):
"""
Initialisation of a CORN expert with its unique parameters:
the window size, the correlation threshold.
Standard parameters being number of stocks and number of days.
"""
self.windowSize = windowSize
self.corrThresh = corrThresh
self.numStocks = numStocks
self.weight = 0
# initial wealth as based on page 12 note
self.wealthAchieved = 1
self.currPort = None
self.portHistory = np.empty((numStocks, numDays))
def updateWeight(self, numExperts):
"""
Update this agent's weight to be 1/numExperts as it is part of top-numExperts (K)
"""
self.weight = 1 / numExperts
def assignCorrSet(self, corrSet):
"""
A function that allows us to add a correlation similar set to this specific expert - should it be needed.
It will most likely not be needed given that this changes frequently.
"""
self.corrSimSet = corrSet
def addPort(self, portfolio, day):
"""
A way to track past portfolios should it be needed.
In reality this is not really going to be needed given that we can track the wealth and increase the wealth.
"""
for i in range(self.numStocks):
self.portHistory[day][i] = portfolio[i]
def increaseWealth(self, priceVector):
"""
Function that given a portfolio and a price relative vector will increase the agent's wealth using it.
Note that this is meant to take in the day's (i.e at time t) price relative vector.
"""
# need to set a self wealth for each specific agent
self.wealthAchieved = self.wealthAchieved * (self.currPort @ priceVector)
def getUniformPort():
"""
Generate a uniform portfolio given a number of stocks.
"""
stocks = np.ones((numStocks))
return stocks / numStocks
def objective(portfolio, days, setSize):
"""
Ensuring that days is a vector/matrix where width is number of days and length is numStocks.
"""
total = 1
for i in range(days.shape[1]) :
total *= portfolio @ days[:,i]
# Return negative portfolio so that we can minimise (hence maximising the portfolio)
return -total
def constraintSumOne(portfolio):
prob = 1
for i in portfolio:
prob -= i
return prob
def boundsCreator():
b = (0.0,1.0)
a = [b]*numStocks
return a
def initialGuess(days, sizeSet):
port = np.zeros((numStocks))
for i in range(sizeSet):
bestStock = np.argmax(days[:,i])
port[bestStock] += 1
port /= port.sum()
return port
def expertLearn(window, corrThresh, day, data):
"""
Preform algorithm 1 from CORN paper.
This algorithm is the expert learning procedure.
Given an index date (day), the window (a specified window size), the histMarketWind (t-1 to 1) and the corrThresh which is rho our correlation coeff threshold.
"""
corrSimSet = np.array(())
if day <= window + 1:
#works
return uniformPort
else:
#check out
for i in range(window + 1,day): #just check that this works otherwise change it to t
# print("Test 1")
# print("size of window: " + str(window))
# print("I-window: " + str(i-window) + " , i-1: " + str(i-1))
# print(data.shape)
markWindI = marketWindow(i-window, i-1, dates, data)
# print("Test 2")
# print("day-window: " + str(day-window) + " , day-1: " + str(day-1))
markWindT = marketWindow(day - window, day - 1, dates, data)
# print("Test 3")
# check at some point to ensure that this captures the standard deviation for the whole window (i.e output not something weird)
# flattened just to ensure that this does happen
pearsonCor = -1
if np.std(markWindI.flatten()) == 0 or np.std(markWindT.flatten()) == 0:
# print("Recognised a 0 standard deviation in a market window")
pearsonCor = 0
# print("Test 4")
# may need to change this to the exact calculation they use in the formula
if pearsonCor != 0:
pearsonCor = np.cov(markWindI, markWindT) / (np.std(markWindI.flatten()) * np.std(markWindT.flatten()) )
# print("Test 5")
elif pearsonCor >= corrThresh:
# print("Appended a set")
# append this to our set i.e add the index
corrSimSet = np.append(corrSimSet,i)
# print("Test 6")
if len(corrSimSet) == 0:
# print("Empty set")
return uniformPort
else:
# Search for the optimal portfolio
# so using the i in the set, get the argmax
# from what I understand, we need the price relative vector at time i, find the stock that gave the best return and all in on that stock
# TODO ADD CHANGES BASED ON DRICORN-K (so here construct a portfolio based on their optimals)
# do not need a temp relative just find the sub portfolios, log them, divide by the number in the corrSimSet
# then minus the deviation
# from here normalise the portfolio so that the required portfolio property is maintained
tempRelative = 0
port = np.zeros((numStocks))
print("I found this many in my corrSimSet: " + str(len(corrSimSet)))
corrSimSetDays = np.empty((numStocks,len(corrSimSet)))
# print(corrSimSetDays.shape)
# print(numStocks)
for i in range(len(corrSimSet)):
corrSetDay = dayReturn(corrSimSet[i], dates, data)
for x in range(numStocks):
corrSimSetDays[x][i] = corrSetDay[x]
initGuess = initialGuess(corrSimSetDays, len(corrSimSet))
bnds = boundsCreator()
con1 = {'type': 'eq' , 'fun': constraintSumOne}
cons = [con1]
sol = minimize(objective, initGuess, args=(corrSimSetDays, len(corrSimSet)), method='SLSQP', bounds = bnds, constraints=cons)
if sol.success == True:
return sol.x
else:
print("could not optimise so will return CORN PORT")
tempRelative = 0
bestDay = -1
port = np.zeros((numStocks))
print("I found this many in my corrSimSet: " + str(len(corrSimSet)))
for i in corrSimSet:
# get the price relative vector for the day
priceRelative = dayReturn(i,dates,data)
temp = priceRelative.max()
if day == -1:
print("Error occurred at day " + str(i) + " Stuff went terribly inside expert learn")
else:
if tempRelative < temp:
bestDay = i
temp = priceRelative.max()
port = np.zeros((numStocks))
# print(tempRelative)
port[np.argmax(priceRelative,axis=0)] = 1
# print("WAS ABLE TO FIND AN OPTIMAL PORT")
return port
def dayReturn(day, dates, data):
"""
Given a day, the dates and a dataframe.
Get stock market data for the given day - organise it vertically.
TODO CHECK THAT THIS WORKS
NOTE data here is the newly created price relative matrix for market history
"""
day = int(day)
if day != 0:
# yesterdayReturn = data[data['Date'] == dates[day-1]]
# yesterdayReturn = yesterdayReturn.Close.to_numpy()
# todayReturn = data[data['Date'] == dates[day]]
# todayReturn = todayReturn.Close.to_numpy()
# todayReturn = todayReturn / yesterdayReturn
# return todayReturn.reshape(len(todayReturn),1)
# want a column for day before so
# since already encoded in this format
# print(data.shape)
todayReturn = np.zeros((numStocks))
# print(todayReturn.shape)
# print(data.shape)
try:
for x in range(numStocks):
# print("X IS : " + str(x))
# print("TODAY RETURN AT " + str(todayReturn[x]))
todayReturn[x] = data[x][day]
# print("TODAY RETURN AT " + str(todayReturn[x]))
# for x in range(numStocks):
# if math.isnan(todayReturn[x]):
# print("error occurred here")
# return np.ones((numStocks))
return todayReturn
except:
print(data.shape)
print(day)
print(data[:][day])
input()
else:
# Find number of stocks and then return 1 for each
# startDate = data[data['Date'] == dates[0]]
# tickers = np.unique(startDate.Ticker.to_numpy())
numOfStocks = data.shape[0]
return np.ones((numOfStocks))
def getDatesVec(data):
"""
Get the vector of dates that spans a given stock market data set - specifically done for CORN algorithm but not exclusive to it
note that this requires the pandas dataframe of data
NOTE pandas dataframe for data
"""
startDate = data.Date.min()
startDate = data[data['Date'] == startDate]
startDate = startDate.Ticker.to_numpy()
tick = np.unique(startDate)[0]
tickerDates = data[data['Ticker'] == tick]
tickerDates = np.unique(data.Date.to_numpy())
return tickerDates
def cornDataRead():
name = input("Name of data set\n")
if name == "BIS":
return np.loadtxt("./Data Sets/PriceRelatives/BISPRICERELATIVES.txt")
elif name == "BOV":
return np.loadtxt("./Data Sets/PriceRelatives/BOVPRICERELATIVES.txt")
elif name == "EUR":
return np.loadtxt("./Data Sets/PriceRelatives/EURPRICERELATIVES.txt")
elif name == "JSE":
return np.loadtxt("./Data Sets/PriceRelatives/JSEPRICERELATIVES.txt")
elif name == "NAS":
return np.loadtxt("./Data Sets/PriceRelatives/NASPRICERELATIVES.txt")
elif name == "SP5":
return np.loadtxt("./Data Sets/PriceRelatives/SP5PRICERELATIVES.txt")
else:
print("ERROR INPUT CORRECT NAME")
return cornDataRead()
# does not rely on using pandas dataframe
def generateHistoricalMarket(data, dates, numStocks):
"""
Function to generate a set of historical price relative vectors.
Given a data set, the dates as a numpy array and the number of stocks in the data set.
"""
print(len(dates))
relatives = np.empty((numStocks, len(dates)))
initalDay = np.ones((numStocks))
relatives[:,0] = initalDay
numErrors = 0
errorDays = np.array(())
for i in range(1,len(dates)):
try:
marketToday = data[data['Date'] == dates[i]]
marketYesterday = data[data['Date'] == dates[i-1]]
change = marketToday.Close.to_numpy()/marketYesterday.Close.to_numpy()
change = change.reshape(numStocks)
relatives[:,i] = change
if i % 100 == 0:
percent = i/len(dates)
statement = "Percentage: " + str(percent*100) + "%, number of errors: " + str(numErrors)
print(statement)
except:
numErrors += 1
errorDays = np.append(errorDays, i)
#acknowledge where errors occured we appeneded a 1's array
relatives[:,i] = np.ones((numStocks))
for i in errorDays:
print("Error at day: " +str(i))
print(numErrors)
name = "SP5PRICERELATIVES.txt"
print("Saving data as " + name)
np.savetxt(name,relatives)
print("Saved")
content = np.loadtxt(name)
print("Length of saved item was as follows(numStocks,length):" + str(relatives.shape))
print("Loaded")
print("Shape " + str(content.shape))
return relatives
# relies on using day return a few times
def marketWindow(startDate, endDate, dates, data):
"""
Return a market window from t-w to t-1 (inclusive of endpoints) therefore w in width.
startDate is the index to start on.
endDate is the index to end on.
dates contains a vector of dates in the data.
data is a capture of the data set.
"""
# Finding out the length of the stocks is useful here
width = endDate - startDate + 1
# Make a window that can contain all stocks
if(width != 1):
market = np.empty((numStocks,width))
count = 0
for i in range(startDate, endDate + 1):
window = dayReturn(i,dates,data)
# print(window)
for j in range(numStocks):
market[j][count] = window[j]
count += 1
return market
else:
return dayReturn(endDate,dates,data)
# only relies on dayReturn for the use of data
def calcReturns(portfolios, dates, data, initialCapital = 1):
"""
Function which generates returns given an initial portfolio.
Portfolios need to be a matrix that is the width of the number of tradingdays(individual portfolios), length of the number of stocks - which describe how the portfolio looks.
Each portfolio must be simplex, so they each are a value greater than or equal to zero, and their values sum to 1.
"""
returns = np.array(())
for i in range(len(dates)):
day = dayReturn(i, dates, data)
val = 0
for j in range(numStocks):
val += portfolios[i][j] * day[j]
returns = np.append(returns, val)
return initialCapital * returns
def initExperts(windowSize, numStocks, P):
"""
Initialise all the experts.
Given a windowSize(max) assign some to each.
For a number of stocks that are given by the tickers.
For P where we will figure out our correlation.
"""
# init W*P experts
experts = []
for i in range(0,windowSize-1):
for j in range(P):
# __init__(self, windowSize, corrThresh, numStocks, numDays):
expert = Expert(i+1,j/P, numStocks, len(dates))
experts.append(expert)
return experts
def printExperts(experts, windowSize, P):
"""
Function to print the experts.
Pay attention to the indexing, since a 0 window does not make sense it feels
"""
for i in range(0,windowSize-1):
for j in range(0,P):
print("Expert at " + str(i*(windowSize-1) + j) +" with characteristics:"+str(experts[i*(windowSize-1) +j].windowSize) + "," + str(experts[i*(windowSize-1) +j].corrThresh))
def findTopK(experts):
"""
Function to find the top-K experts.
Based on a chosen K
An array of the indices of where the best elements occurred) NOTE THAT THIS WILL BE A FLATTENED ARRAY
"""
expertsWealth = np.empty((windowSize-1)*P)
for i in range((windowSize-1)*P):
expertsWealth[i] = experts[i].wealthAchieved
# print(experts[i*(windowSize-1) + j].wealthAchieved)
# print(expertsWealth)
indicesBest = np.array(())
for i in range(K):
currBest = np.argmax(expertsWealth)
indicesBest = np.append(indicesBest, currBest)
# Create a sentinel value to ignore
expertsWealth[currBest] = -999
return indicesBest
# No reliance on dataframe data
def runCorn(dates, data, windowSize, P, startDate, startDateEarly):
"""
Run the CORN-K algorithm on the data set
TODO CHANGE THIS TO WORK WITH THE NEW EXPERT ARRAY AND HOW IT IS A FLAT ARRAY
"""
# create experts which a 1D array
experts = initExperts(windowSize,numStocks,P)
# going downwards window size increases, going rightwards the corrThresh increases
totReturn = 1
# starting from first day to the final day
# first day we get an initial wealth of 0 (t = 0)
returns = np.array(())
returns = np.append(returns,1)
for i in range(startDateEarly ,len(dates)):
print("i is: " + str(i))
# for each window size as based on the experts which is of length windowSize - 1
for w in range((windowSize - 1)*P):
experts[w].currPort = expertLearn(experts[w].windowSize, experts[w].corrThresh, i, data)
# combine our experts' portfolios
day = dayReturn(i, dates, data)
#update the experts' individual wealths
expertDayEarly = experts
for m in range((windowSize-1)*P):
experts[m].increaseWealth(day)
# print(experts[m].wealthAchieved)
# TOP-K and expert weights update
# first need to find these top-K experts
# so select top K experts based on historical performance - so search through experts and find their wealths, as a 2D matrix, find those indices and work backwards ?
# this will not be a 2D array and instead an array that is flattened
# Given that experts should also be a flattened array this should be acceptable
topK = findTopK(expertDayEarly)
# since topK contains the indices of the top-k experts we will just loop through the experts
for x in topK:
# set their weights (TOP K)
x = int(x)
if x in topK:
experts[x].weight = 1 / K
# just not setting the weights for the others should acheive the same complexity
if i >= startDate:
todayPortNumerator = np.zeros(numStocks)
todayPortDenom = np.zeros(numStocks)
for x in topK:
x = int(x)
if experts[x].weight != 0:
todayPortNumerator += experts[x].weight * (experts[x].wealthAchieved * experts[x].currPort)
todayPortDenom += experts[x].weight * experts[x].wealthAchieved
else:
pass
todayPort = todayPortNumerator / todayPortDenom
val = day @ todayPort
if not math.isnan(val):
totReturn = totReturn * val
else:
print("NAN VALUE ENCOUNTERED AT DATE:" + str(i))
print("TOTAL RETURN AT CURRENT IS: " + str(totReturn))
returns = np.append(returns,totReturn)
# if val == 0:
# print("VALUE IS 0 AT DAY" + str(i))
if i == ENDDate:
return returns
return returns
data = readDataSet()
dataset = cornDataRead()
for i in range(dataset.shape[1]):
for j in range(dataset.shape[0]):
if math.isnan(dataset[j][i]):
dataset[j][i] = 1
print(dataset)
dates = getDatesVec(data)
print(len(dates))
tempStartFind = data[data['Date'] == dates[0]]
tempTickersFind = np.unique(tempStartFind.Ticker.to_numpy())
numStocks = len(tempTickersFind)
today = dayReturn(1,dates,dataset)
print("CURRENT TESTS")
print(today)
market = marketWindow(1,1,dates,dataset)
print(market)
uniformPort = np.ones((numStocks)) / numStocks
windowSize = 5
P = 10
K = 5
exchange = input()
startDateEarly = 608
startDate = 908
ENDDate = 1162
wealth = runCorn(dates,dataset,windowSize,P, startDate, startDateEarly)
print("Minimum value in wealth array: " + str(wealth.min()))
print("Maximum value in wealth array: " + str(wealth.max()))
np.savetxt("./Data Sets/CORNK/{0}-Exchange-StartDate{1}-EndDate{2}.txt".format(exchange, startDate, ENDDate) ,wealth)