-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathutils.py
417 lines (322 loc) · 15.5 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
from DeltaHedging.src.backtest import *
# load coin data from GetServerData/data/
def load_data(coin, cwd):
"""
:param cwd: current working directory
:param coin: 'btc' or 'eth'
:return: pandas.DataFrame with all data collected
"""
# import modules
import pyarrow.feather as feather
import pandas as pd
import datetime as dt
import os
# file path
source_path = os.path.abspath(os.getcwd())
source_path = source_path.replace(cwd, 'GetServerData/data/' + coin + '_option_data.ftr')
# load data from GetServerData/data/...
df = feather.read_feather(source_path)
print(coin + ' data imported')
# map index
index = df['instrument_name'].map(lambda x: x.split('-'))
# pull strike
strike = [int(element[2]) for element in index]
df['strike'] = strike
# pull maturity
maturity = [element[1] for element in index]
maturity = pd.to_datetime(maturity) + pd.DateOffset(hours=10)
# pull date and time -- 5min round
df['timestamp'] = pd.DatetimeIndex(
df['timestamp'].apply(lambda d: dt.datetime.fromtimestamp(int(d) / 1000).strftime('%Y-%m-%d %H:%M:%S')))
df['timestamp'] = df['timestamp'].round('5min')
# pull time to maturity
t = maturity - df['timestamp']
t = t / pd.to_timedelta(1, unit='D')
df['t'] = t / 365
print('additional metrics added')
return df
# evaluate the performance of all the strategies
def all_strategies(lag, coin_df, quantile_iv, coin, fee, margin):
"""
:param lag: int. lag to enter a position after a trading signal
:param coin_df: pd.DataFrame from load_data()
:param quantile_iv: float. 0 < quantile_iv < 0.5
:param coin: str. 'BTC' or 'ETH'
:param fee: float. i.e. 0.06
:param margin: float. i.e. 0.5
:return: 2 pd.DataFrame() with strategies results and position
"""
# import modules
import os
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None # default='warn'
from datetime import datetime, timedelta
from tqdm import tqdm
# source path
source_path = os.path.abspath(os.getcwd())
file_path = source_path.replace('deliverables', 'reports/data/' + coin + '/')
# subset calls and puts
df_calls = coin_df[coin_df['instrument_name'].str.contains('-C')]
df_puts = coin_df[coin_df['instrument_name'].str.contains('-P')]
# remove '-C' and '-P' from instrument_name
df_calls['instrument_name'] = df_calls['instrument_name'].str.rstrip('-C')
df_puts['instrument_name'] = df_puts['instrument_name'].str.rstrip('-P')
# target iVol
target_iv = df_calls['mark_iv'].median()
df_calls['difference'] = df_calls['mark_iv'] - target_iv
# target buy and sell based on target_iv
buy_signal = df_calls['difference'].quantile(q=0.5 - quantile_iv)
sell_signal = df_calls['difference'].quantile(q=0.5 + quantile_iv)
# trading signal
trading_signal = np.where(
df_calls['difference'] >= sell_signal, -1,
(np.where(df_calls['difference'] <= buy_signal, 1, 0)))
df_calls['initial_trading_signal'] = trading_signal
# create df_option with variables of interest
df_calls = df_calls[
['timestamp', 'index_price', 'underlying_price', 'instrument_name', 'mark_price', 'mark_iv', 'greeks.delta',
'strike', 't', 'difference', 'initial_trading_signal', 'underlying_index']]
df_calls.columns = ['timestamp', 'index_price', 'underlying_price', 'instrument_name', 'call_price',
'mark_iv', 'market_delta_call', 'strike', 't',
'difference', 'initial_trading_signal', 'underlying_index']
df_puts = df_puts[
['timestamp', 'instrument_name', 'mark_price', 'greeks.delta']]
df_puts.columns = ['timestamp', 'instrument_name', 'put_price', 'market_delta_put']
df_option = df_puts.merge(df_calls, how='inner', on=['timestamp', 'instrument_name'])
# change 0 prices to 1e-08
df_option.loc[df_option['call_price'] == 0, 'call_price'] = 1e-08
df_option.loc[df_option['put_price'] == 0, 'put_price'] = 1e-08
# Datetime index for the back-test pd.DataFrame
start_date = str(df_option.timestamp.min().round('1d'))
start_date = datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
end_date = str(df_option.timestamp.max().round('1d'))
end_date = datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S")
min_gap = 5
full_datetime = [(start_date + timedelta(hours=min_gap*i/60)).strftime("%Y-%m-%d %H:%M:%S")
for i in range(int((end_date-start_date).total_seconds() / 60.0 / min_gap))]
# back-test variables
flag = False
options = df_option['instrument_name'].unique()
pbar = tqdm(total=len(options))
# loop over each pair of instruments
for option in options:
# initialize join_df
index = full_datetime
join_df = pd.DataFrame(index, columns=['timestamp'])
join_df.set_index('timestamp', inplace=True)
# initialize join_position_df
join_position_df = join_df
# subset by instrument and sort values
df = df_option[df_option['instrument_name'] == option]
df = df.sort_values('timestamp')
all_zeros = not np.any(df['initial_trading_signal'])
# if there is no trading signal: skip
if all_zeros is True:
pbar.update(1)
# if there is at least one trading signal != 0: apply strategy
else:
# strategy return and position
result = delta_neutral_dollar_strategy(df=df, fee=fee, margin=margin, lag=lag)
# results
cum_ret_delta_hedged = pd.DataFrame(result[0])
cum_ret_delta_hedged.columns = [option]
# position
df_position = pd.DataFrame(result[1])
df_position.columns = [option]
# create df_final with performance of the delta-hedged strategy for each pair
if not flag: # first iteration
# fill df
join_df.reset_index(inplace=True)
join_df = join_df.set_index(pd.to_datetime(join_df['timestamp']))
df_initial = join_df.join(cum_ret_delta_hedged)
df_initial = df_initial[~df_initial.index.duplicated(keep='first')]
df_final = df_initial
df_final = df_final.iloc[:, 1:]
# fill df_final_position
join_position_df.reset_index(inplace=True)
join_position_df = join_position_df.set_index(pd.to_datetime(join_position_df['timestamp']))
df_initial_position = join_position_df.join(df_position)
df_initial_position = df_initial_position[~df_initial_position.index.duplicated(keep='first')]
df_final_position = df_initial_position
df_final_position = df_final_position.iloc[:, 2:]
# remove df_initial from the interpreter
del df_initial
del df_initial_position
flag = True
else: # from the second iteration
# concat df
join_df.reset_index(inplace=True)
join_df = join_df.set_index(pd.to_datetime(join_df['timestamp']))
df_second = join_df.join(cum_ret_delta_hedged)
df_second = df_second.iloc[:, 1]
df_second = df_second[~df_second.index.duplicated(keep='first')]
df_final = pd.concat([df_final, df_second], axis=1)
# concat position_df
join_position_df.reset_index(inplace=True)
join_position_df = join_position_df.set_index(pd.to_datetime(join_position_df['timestamp']))
df_second_position = join_position_df.join(df_position)
df_second_position = df_second_position.iloc[:, 2]
df_second_position = df_second_position[~df_second_position.index.duplicated(keep='first')]
df_final_position = pd.concat([df_final_position, df_second_position], axis=1)
# remove df_second from the interpreter
del df_second
del df_second_position
# update progress bar
pbar.update(1)
# close progress bar
pbar.close()
# store portfolio results
df_final.to_parquet(file_path + 'df_final.parquet')
df_final_position.to_parquet(file_path + 'df_final_position.parquet')
# print
print(coin + ' all strategies result: done')
return df_final, df_final_position
# quantitative strategy
def strategy_performance(coin, transaction_cost):
# import modules
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# directories
source_path = os.path.abspath(os.getcwd())
portfolios_path = source_path.replace('deliverables', 'reports/data/' + coin + '/df_final.parquet')
positions_path = source_path.replace('deliverables', 'reports/data/' + coin + '/df_final_position.parquet')
# load data
df_portfolios = pd.read_parquet(portfolios_path)
df_position = pd.read_parquet(positions_path)
# remove columns of non interest
if coin == 'BTC':
df_portfolios = df_portfolios.drop('BTC-31DEC21-68000', 1)
df_position = df_position.drop('BTC-31DEC21-68000', 1)
color = 'C0'
else:
color = 'C1'
# trading opportunities
trading_opportunities = df_position.sum(axis=1)
trading_opportunities_perc = trading_opportunities / df_position.count(axis=1)
trading_opportunities_perc = trading_opportunities_perc.replace(np.nan, 0)
# plot trading opportunities
plt.rcParams['font.family'] = 'serif' # set font family: serif
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
fig.text(s=coin + ' trading opportunities', x=0.5, y=0.95, fontsize=20, ha='center', va='center')
fig.text(0.06, 0.5, 'Trading Opportunities', ha='center', va='center', rotation='vertical')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.plot(trading_opportunities, color=color)
ax.hlines(y=trading_opportunities.mean(), xmin=trading_opportunities.index[0], xmax=trading_opportunities.index[-1],
linestyle='--', color='black', lw=.4)
plt.margins(x=0)
plt.margins(y=0)
fig.legend(['Trading opportunities', 'Mean:' + str(round(trading_opportunities.mean(), 2))],
bbox_to_anchor=(.5, 0.03), loc="lower center",
bbox_transform=fig.transFigure, ncol=2, frameon=False)
# save plot
file_path = source_path.replace('deliverables',
'reports/data/' + coin + '/trading_opportunities.pdf')
plt.savefig(file_path, dpi=160)
plt.close()
# plot trading opportunities %
plt.rcParams['font.family'] = 'serif' # set font family: serif
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
fig.text(s=coin + ' trading opportunities as ratio of trading available options', x=0.5, y=0.95, fontsize=20,
ha='center', va='center')
fig.text(0.06, 0.5, 'Trading Opportunities', ha='center', va='center', rotation='vertical')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.plot(trading_opportunities_perc[1:-1], color=color)
ax.hlines(y=trading_opportunities_perc[1:-1].mean(), xmin=trading_opportunities_perc[1:-1].index[0],
xmax=trading_opportunities_perc[1:-1].index[-1], linestyle='--', color='black', lw=.4)
plt.margins(x=0)
plt.margins(y=0)
fig.legend(['Trading opportunities', 'Mean:' + str(round(trading_opportunities_perc[1:-1].mean(), 2))],
bbox_to_anchor=(.5, 0.03), loc="lower center",
bbox_transform=fig.transFigure, ncol=2, frameon=False)
# save plot
file_path = source_path.replace('deliverables',
'reports/data/' + coin + '/trading_opportunities_perc.pdf')
plt.savefig(file_path, dpi=160)
plt.close()
# performance
performance = (df_portfolios.ffill(axis=0).iloc[-1, :] - 1)
prob_succ = sum(performance > 0) / len(performance)
upside_potential = performance[performance > 0].mean()
downside_risk = performance[performance < 0].mean()
summary_performance = performance.describe()
summary_performance.T['skew'] = performance.skew()
summary_performance.T['kurt'] = performance.kurt()
summary_performance.T['prob'] = prob_succ
summary_performance.T['upside'] = upside_potential
summary_performance.T['downside'] = downside_risk
print(coin + ' summary performance all strategies: done')
file_path = source_path.replace('deliverables', 'reports/data/' + coin + '/summary_performance_all.csv')
summary_performance.to_csv(file_path)
# quantitative strategy
def quant_strategy(transaction_cost):
# ffill and bfill NA to calcualte returns
df = df_portfolios.fillna(method='ffill')
df = df.fillna(method='bfill')
# evaluate log-returns
log_ret = np.log(df / df.shift(1))
log_ret = log_ret * df_position
# daily rebalance
log_ret = log_ret.resample('D').sum()
# account for transaction costs
log_ret[log_ret != 0] = log_ret - transaction_cost
# equally weighted portfolio
log_ret = log_ret.div(log_ret[log_ret != 0].count(axis=1), axis='index')
ret = log_ret.sum(axis=1)
# summary
summary = ret.describe()
summary.T['skew'] = ret.skew()
summary.T['kurt'] = ret.kurt()
file_path = source_path.replace('deliverables', 'reports/data/' + coin + '/summary_final_ret.csv')
summary.to_csv(file_path)
print(coin + ' summary statistics final strategy returns: done')
return ret
# strategy performance
ret = quant_strategy(transaction_cost=transaction_cost)
cum_ret = (1 + ret).cumprod()
return ret, cum_ret
# plot of the cumulative performance and returns
def plot_strategy(coin, ret, cum_ret):
# import modules
import pandas as pd
import matplotlib.pyplot as plt
import os
source_path = os.path.abspath(os.getcwd())
plot_path = source_path.replace('deliverables', 'reports/data/' + coin + '/' + coin + '_stategy_performance.pdf')
cum_ret.loc[pd.to_datetime('2021-03-08 00:00:00')] = 1
cum_ret = cum_ret.sort_index()
plt.rcParams['font.family'] = 'serif' # set font family: serif
fig, ax = plt.subplots(2, 1, figsize=(15, 10))
fig.text(s=coin + ' Equally Weighted Portfolio of Delta-Neutral Straddles', x=0.5, y=0.95, fontsize=20, ha='center', va='center')
if coin == 'BTC':
color = 'C0'
else:
color = 'C1'
ax[0].plot(cum_ret, color=color)
ax[0].hlines(y=1, xmin=cum_ret.index[0], xmax=cum_ret.index[-1], linestyle='--', color='black', lw=0.8)
ax[1].plot(ret, color=color)
ax[1].hlines(y=0, xmin=cum_ret.index[0], xmax=cum_ret.index[-1], linestyle='--', color='black', lw=0.8)
# margins
ax[0].margins(x=0)
ax[0].margins(y=0.1)
ax[1].margins(x=0)
ax[1].margins(y=0.1)
# remove spines
ax[0].spines['top'].set_visible(False)
ax[0].spines['right'].set_visible(False)
ax[0].spines['bottom'].set_visible(False)
ax[1].spines['top'].set_visible(False)
ax[1].spines['right'].set_visible(False)
# remove lables
ax[0].tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
# name labels
fig.text(0.04, 0.7, 'Cumulative log-returns', ha='center', va='center', rotation='vertical')
fig.text(0.04, 0.3, 'Daily log-returns', ha='center', va='center', rotation='vertical')
# savefig
plt.savefig(plot_path, dpi=160)
plt.clf()
return print(coin + ' performance plot: done')