-
Notifications
You must be signed in to change notification settings - Fork 8
/
max_daily_return.py
88 lines (77 loc) · 3.32 KB
/
max_daily_return.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# ------------------------------------------------------------------
# Maximum daily return
#
# Maximum daily return is defined as the average of largest n daily
# return in a month (at least 15 daily returns are required)
#
# Bali, Cakici and Whitelaw (2011)
# "While conditioning on the single day with the maximum return is
# both simple and intuitive as a proxy for extreme positive returns,
# it is also slightly arbitrary. As an alternative, we also rank
# stocks by the average of the N (N=1, 2, …, 5) highest daily
# returns within the month"
#
# For example, mdr1 is the maximum return in a month. And mdr2 is
# the average of top 2 daily returns in a month.
# ------------------------------------------------------------------
import wrds
import configparser as cp
import pandas as pd
import numpy as np
import os
import time
class ap_maxret:
def __init__(self):
start_time = time.time()
pass_dir = '~/.pass'
cfg = cp.ConfigParser()
cfg.read(os.path.join(os.path.expanduser(pass_dir), 'credentials.cfg'))
conn = wrds.Connection(wrds_username=cfg['wrds']['username'])
# Extract CRSP daily data
dsf = conn.raw_sql("""
select a.permno, a.date, a.ret
from crsp.dsf a left join crsp.msenames b
on a.permno=b.permno and a.date>=b.namedt and a.date<=b.nameendt
where b.exchcd between -2 and 3 and b.shrcd between 10 and 11
""", date_cols=['date'])
dsf = dsf.drop_duplicates(['permno', 'date'], keep='last')
dsf.loc[dsf['ret']<=-1, 'ret'] = np.nan
dsf['permno'] = dsf['permno'].astype(int)
end_time = time.time()
print('\n--------- Extract data from WRDS ---------')
print(f'Time used: {(end_time-start_time)/60: 3.1f} mins\n')
# Rank returns to find top 5 returns in a month
start_time = time.time()
dsf['yyyymm'] = dsf['date'].dt.year*100 + dsf['date'].dt.month
# Require at least 15 days in a month
dsf = dsf.dropna()
dsf['n_day'] = (dsf.groupby(['permno', 'yyyymm'])
['ret'].transform('count'))
dsf = dsf.query('n_day>=15').copy()
del dsf['n_day']
dsf['rank'] = (dsf.groupby(['permno', 'yyyymm'])['ret']
.rank(method='min', ascending=False))
self.dsf = dsf.copy()
end_time = time.time()
print(f'--------- Rank returns ---------')
print(f'Time used: {end_time-start_time: 3.1f} seconds\n')
def maxret(self, n):
start_time = time.time()
df = self.dsf.query('rank<=@n').copy()
df = (df.groupby(['permno', 'yyyymm'])['ret']
.mean().to_frame('mdr'+str(n)).reset_index())
df = df.sort_values(['permno', 'yyyymm'], ignore_index=True)
end_time = time.time()
print(f'--------- MDR{n} ---------')
print(f'Time used: {end_time-start_time: 3.1f} seconds\n')
return df
if __name__ == '__main__':
db = ap_maxret()
mdr = db.maxret(1)
for i in range(2, 6):
_tmp = db.maxret(i)
mdr = mdr.merge(_tmp, how='left', on=['permno', 'yyyymm'])
mdr = mdr.sort_values(['permno', 'yyyymm'], ignore_index=True)
data_dir = '/Volumes/Seagate/asset_pricing_data'
mdr.to_csv(os.path.join(data_dir, 'mdr.txt'), sep='\t', index=False)
print('Done: data is generated')