-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSelect_Trading_Assets.py
60 lines (49 loc) · 2.38 KB
/
Select_Trading_Assets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#%%
from __future__ import print_function, absolute_import
from gm.api import *
with open("token.txt") as token_file:
token = token_file.read()
set_token(token)
#%%
import pandas as pd
import numpy as np
from tqdm import tqdm
def string_to_timestamp(date_str):
# Convert string to Timestamp with timezone 'Asia/Shanghai'
timestamp = pd.Timestamp(date_str, tz='Asia/Shanghai')
return timestamp
# read the file containing all ETFs in Shanghai Exchange.
ETFs = pd.read_excel("ETFs in Shanghai.xlsx")
ETFs_selected = ETFs[(ETFs.iloc[:, 1] == "单市场股票(沪)ETF") | (ETFs.iloc[:, 1] == "跨市场股票(沪深京)ETF")]
ETF_codes = ETFs_selected.iloc[:, 2]
data_entries = []
taded_amount = []
for code in tqdm(ETF_codes):
# post adjust
data = history(symbol='SHSE.' + str(code), frequency='1d', start_time='2022-09-01 09:00:00', end_time='2024-12-01 16:00:00',
fields='amount,bob', adjust=ADJUST_POST, df=True)
insample_data = data.loc[data.loc[:, "bob"] < string_to_timestamp('2024-09-01 09:00:00')]
taded_amount.append(insample_data.loc[:, "amount"].sum())
data_entries.append(data.shape[0])
ETFs_selected.loc[:, "data_entries"] = data_entries
ETFs_selected.loc[:, "taded_amount"] = taded_amount
ETFs_selected = ETFs_selected.sort_values(by=["data_entries", "taded_amount"], ascending=[False, False])
ETFs_selected.to_csv("sorted_ETFs.csv")
# %%
# The ETFs I selected are 512480, 512880, 512010, 515790, 512690, 512660, 512800, 512200, 516160, 512980, 515170, 516510
final_selected = [512480, 512880, 512010, 515790, 512690, 512660, 512800, 512200, 516160, 512980, 515170, 516510]
#%%
all_data = []
# download datas
for code in tqdm(final_selected):
# post adjust
# Assuming the slippage in trading could be neglected. Therefore, I directly use open to calculate return.
data = history(symbol='SHSE.' + str(code), frequency='1d', start_time='2022-09-01 09:00:00', end_time='2024-12-01 16:00:00',
fields='symbol,open,bob', adjust=ADJUST_POST, df=True)
return_data = np.log(data.loc[:, "open"]/data.loc[:, "open"].shift(1))
data.loc[:, "return"] = return_data
data.drop("open", axis=1, inplace=True)
data.loc[:, 'bob'] = pd.to_datetime(data.loc[:, 'bob']).dt.date
all_data.append(data)
all_data = pd.concat(all_data, axis = 0)
all_data.to_csv("ETF_return_data.csv")