-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathreadCBOEPickles.py
90 lines (84 loc) · 3.9 KB
/
readCBOEPickles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import pickle
import os
from bs4 import BeautifulSoup
import pandas as pd
import datetime
def GenerateTable1(date):
filename = "./cboe/"+date+"_cboe_futures.p"
f = open(filename, "rb")
d = pickle.loads(f.read())
table1_data = []
for i in d:
t = d[1].replace("\r","").replace(" ","").split("\n")
for k, v in enumerate(t):
if '(' in v and ')' in v:
try:
table1 = {"date" : datetime.datetime(month=int(date.split("_")[1]), day=int(date.split("_")[0]), year=int(date.split("_")[2])).isoformat().split("T")[0], "title" : '', "vol_call" : None, "vol_put" : None, "vol_total" : None, "oi_call" : None, "oi_put" : None, "oi_total" : None}
row = t[k:(k+37)]
row = [j for j in row if '\xa0' not in j]
row = [j for j in row if len(j) > 0]
table1["title"] = (lambda x: x if ':' not in x else WEEKLY)(row[0].split("(")[1].split(")")[0])
if not row[5].isdigit():
continue
table1["vol_call"] = row[5].replace(",", "")
table1["vol_put"] = row[6].replace(",", "")
table1["vol_total"] = row[7].replace(",", "")
table1["oi_call"] = row[9].replace(",", "")
table1["oi_put"] = row[10].replace(",", "")
table1["oi_total"] = row[11].replace(",", "")
table1_data.append(table1)
except:
pass
return pd.DataFrame(table1_data)
def GenerateTable2(date):
filename = "./cboe/"+date+"_cboe_futures.p"
f = open(filename, "rb")
d = pickle.loads(f.read())
table2_data = []
for i in d:
t = d[1].replace("\r","").replace(" ","").split("\n")
for k, v in enumerate(t):
if '(' in v and ')' in v:
try:
table2 = {"date" : datetime.datetime(month=int(date.split("_")[1]), day=int(date.split("_")[0]), year=int(date.split("_")[2])).isoformat().split("T")[0], "title" : '', "level_high" : None, "level_low" : None, "level_close" : None, "level_change" : None}
row = t[k:(k+37)]
row = [j for j in row if '\xa0' not in j]
row = [j for j in row if len(j) > 0]
if not row[5].isdigit():
continue
if row[12] == "High":
if len(row[17:]) > 0:
table2["title"] = (lambda x: x if ':' not in x else WEEKLY)(row[0].split("(")[1].split(")")[0])
table2["level_high"] = row[17].replace(",", "")
table2["level_low"] = row[18].replace(",", "")
table2["level_close"] = row[19].replace(",", "")
table2["level_change"] = row[20].replace(",", "")
else:
pass
table2_data.append(table2)
except:
pass
return pd.DataFrame(table2_data)
if __name__ == "__main__":
# print(GenerateTable1("26_11_2018"))
# print(GenerateTable2("4_1_2016"))
dfs1 = []
dfs2 = []
for f in os.listdir("./cboe/"):
print(f)
print(f.split("_cboe")[0])
dfs1.append(GenerateTable1(f.split("_cboe")[0]))
dfs2.append(GenerateTable2(f.split("_cboe")[0]))
combined_df1 = pd.concat(dfs1)
combined_df1 = combined_df1.set_index("date")
print(combined_df1.shape)
combined_df1.to_pickle("./data/files/cboedf1.p")
print("Wrote df1 to file")
combined_df2 = pd.concat(dfs2)
combined_df2 = combined_df2.set_index("date")
print(combined_df2.shape)
combined_df2.to_pickle("./data/files/cboedf2.p")
print("Wrote df1 to file")
# loaded_df = pd.read_pickle("./data/files/cboedf.p")
# print(loaded_df)
# print(loaded_df.shape)