-
-
Notifications
You must be signed in to change notification settings - Fork 119
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
build factor source data (price and volumns) from qlib if no source d…
…ata is provided by the user (#168)
- Loading branch information
1 parent
6a5a750
commit 48c81ea
Showing
11 changed files
with
166 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -139,6 +139,9 @@ dmypy.json | |
# all pkl files | ||
*.pkl | ||
|
||
# all h5 files | ||
*.h5 | ||
|
||
# all vs-code files | ||
.vscode/ | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
24 changes: 24 additions & 0 deletions
24
rdagent/scenarios/qlib/experiment/factor_data_template/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# How to read files. | ||
For example, if you want to read `filename.h5` | ||
```Python | ||
import pandas as pd | ||
df = pd.read_hdf("filename.h5", key="data") | ||
``` | ||
NOTE: **key is always "data" for all hdf5 files **. | ||
|
||
# Here is a short description about the data | ||
|
||
| Filename | Description | | ||
| -------------- | -----------------------------------------------------------------| | ||
| "daily_pv.h5" | Adjusted daily price and volume data. | | ||
|
||
|
||
# For different data, We have some basic knowledge for them | ||
|
||
## Daily price and volume data | ||
$open: open price of the stock on that day. | ||
$close: close price of the stock on that day. | ||
$high: high price of the stock on that day. | ||
$low: low price of the stock on that day. | ||
$volume: volume of the stock on that day. | ||
$factor: factor value of the stock on that day. |
27 changes: 27 additions & 0 deletions
27
rdagent/scenarios/qlib/experiment/factor_data_template/generate.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import qlib | ||
|
||
qlib.init(provider_uri="~/.qlib/qlib_data/cn_data") | ||
|
||
from qlib.data import D | ||
|
||
instruments = D.instruments() | ||
fields = ["$open", "$close", "$high", "$low", "$volume", "$factor"] | ||
data = D.features(instruments, fields, freq="day").swaplevel().sort_index().loc["2008-12-29":].sort_index() | ||
|
||
data.to_hdf("./daily_pv_all.h5", key="data") | ||
|
||
|
||
fields = ["$open", "$close", "$high", "$low", "$volume", "$factor"] | ||
data = ( | ||
( | ||
D.features(instruments, fields, start_time="2018-01-01", end_time="2019-12-31", freq="day") | ||
.swaplevel() | ||
.sort_index() | ||
) | ||
.swaplevel() | ||
.loc[data.reset_index()["instrument"].unique()[:100]] | ||
.swaplevel() | ||
.sort_index() | ||
) | ||
|
||
data.to_hdf("./daily_pv_debug.h5", key="data") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import shutil | ||
from pathlib import Path | ||
|
||
import pandas as pd | ||
|
||
# render it with jinja | ||
from jinja2 import Environment, StrictUndefined | ||
|
||
from rdagent.components.coder.factor_coder.config import FACTOR_IMPLEMENT_SETTINGS | ||
from rdagent.utils.env import QTDockerEnv | ||
|
||
|
||
def generate_data_folder_from_qlib(): | ||
template_path = Path(__file__).parent / "factor_data_template" | ||
qtde = QTDockerEnv() | ||
qtde.prepare() | ||
|
||
# Run the Qlib backtest | ||
execute_log = qtde.run( | ||
local_path=str(template_path), | ||
entry=f"python generate.py", | ||
) | ||
|
||
assert ( | ||
Path(__file__).parent / "factor_data_template" / "daily_pv_all.h5" | ||
).exists(), "daily_pv_all.h5 is not generated." | ||
assert ( | ||
Path(__file__).parent / "factor_data_template" / "daily_pv_debug.h5" | ||
).exists(), "daily_pv_debug.h5 is not generated." | ||
|
||
Path(FACTOR_IMPLEMENT_SETTINGS.data_folder).mkdir(parents=True, exist_ok=True) | ||
shutil.copy( | ||
Path(__file__).parent / "factor_data_template" / "daily_pv_all.h5", | ||
Path(FACTOR_IMPLEMENT_SETTINGS.data_folder) / "daily_pv.h5", | ||
) | ||
shutil.copy( | ||
Path(__file__).parent / "factor_data_template" / "README.md", | ||
Path(FACTOR_IMPLEMENT_SETTINGS.data_folder) / "README.md", | ||
) | ||
|
||
Path(FACTOR_IMPLEMENT_SETTINGS.data_folder_debug).mkdir(parents=True, exist_ok=True) | ||
shutil.copy( | ||
Path(__file__).parent / "factor_data_template" / "daily_pv_debug.h5", | ||
Path(FACTOR_IMPLEMENT_SETTINGS.data_folder_debug) / "daily_pv.h5", | ||
) | ||
shutil.copy( | ||
Path(__file__).parent / "factor_data_template" / "README.md", | ||
Path(FACTOR_IMPLEMENT_SETTINGS.data_folder_debug) / "README.md", | ||
) | ||
|
||
|
||
def get_data_folder_intro(): | ||
"""Directly get the info of the data folder. | ||
It is for preparing prompting message. | ||
""" | ||
|
||
if ( | ||
not Path(FACTOR_IMPLEMENT_SETTINGS.data_folder).exists() | ||
or not Path(FACTOR_IMPLEMENT_SETTINGS.data_folder_debug).exists() | ||
): | ||
generate_data_folder_from_qlib() | ||
|
||
JJ_TPL = Environment(undefined=StrictUndefined).from_string( | ||
""" | ||
{{file_name}} | ||
```{{type_desc}} | ||
{{content}} | ||
``` | ||
""" | ||
) | ||
content_l = [] | ||
for p in Path(FACTOR_IMPLEMENT_SETTINGS.data_folder_debug).iterdir(): | ||
if p.name.endswith(".h5"): | ||
df = pd.read_hdf(p) | ||
# get df.head() as string with full width | ||
pd.set_option("display.max_columns", None) # or 1000 | ||
pd.set_option("display.max_rows", None) # or 1000 | ||
pd.set_option("display.max_colwidth", None) # or 199 | ||
rendered = JJ_TPL.render( | ||
file_name=p.name, | ||
type_desc="generated by `pd.read_hdf(filename).head()`", | ||
content=df.head().to_string(), | ||
) | ||
content_l.append(rendered) | ||
elif p.name.endswith(".md"): | ||
with open(p) as f: | ||
content = f.read() | ||
rendered = JJ_TPL.render( | ||
file_name=p.name, | ||
type_desc="markdown", | ||
content=content, | ||
) | ||
content_l.append(rendered) | ||
else: | ||
raise NotImplementedError( | ||
f"file type {p.name} is not supported. Please implement its description function.", | ||
) | ||
return "\n ----------------- file splitter -------------\n".join(content_l) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters