Skip to content

Commit 2c33332

Browse files
SunsetWolfyou-n-g
andauthored
More dataloader example (#1823)
* More dataloader example * optimize code * optimeze code * optimeze code * optimeze code * optimeze code * optimeze code * fix pylint error * fix CI error * fix CI error * Comments * fix error type --------- Co-authored-by: Young <[email protected]>
1 parent a7d5a9b commit 2c33332

File tree

5 files changed

+48
-5
lines changed

5 files changed

+48
-5
lines changed

.github/workflows/test_qlib_from_pip.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
# Since macos-latest changed from 12.7.4 to 14.4.1,
1717
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
1818
# so we limit the macos version to macos-12.
19-
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
19+
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12]
2020
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
2121
python-version: [3.7, 3.8]
2222

.github/workflows/test_qlib_from_source.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
# Since macos-latest changed from 12.7.4 to 14.4.1,
1818
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
1919
# so we limit the macos version to macos-12.
20-
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
20+
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12]
2121
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
2222
python-version: [3.7, 3.8]
2323

.github/workflows/test_qlib_from_source_slow.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
# Since macos-latest changed from 12.7.4 to 14.4.1,
1818
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
1919
# so we limit the macos version to macos-12.
20-
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
20+
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12]
2121
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
2222
python-version: [3.7, 3.8]
2323

qlib/data/dataset/loader.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def load(self, instruments, start_time=None, end_time=None) -> pd.DataFrame:
4141
----------
4242
instruments : str or dict
4343
it can either be the market name or the config file of instruments generated by InstrumentProvider.
44+
If the value of instruments is None, it means that no filtering is done.
4445
start_time : str
4546
start of the time range.
4647
end_time : str
@@ -50,6 +51,11 @@ def load(self, instruments, start_time=None, end_time=None) -> pd.DataFrame:
5051
-------
5152
pd.DataFrame:
5253
data load from the under layer source
54+
55+
Raise
56+
-----
57+
KeyError:
58+
if the instruments filter is not supported, raise KeyError
5359
"""
5460

5561

@@ -320,7 +326,13 @@ def __init__(self, dataloader_l: List[Dict], join="left") -> None:
320326
def load(self, instruments=None, start_time=None, end_time=None) -> pd.DataFrame:
321327
df_full = None
322328
for dl in self.data_loader_l:
323-
df_current = dl.load(instruments, start_time, end_time)
329+
try:
330+
df_current = dl.load(instruments, start_time, end_time)
331+
except KeyError:
332+
warnings.warn(
333+
"If the value of `instruments` cannot be processed, it will set instruments to None to get all the data."
334+
)
335+
df_current = dl.load(instruments=None, start_time=start_time, end_time=end_time)
324336
if df_full is None:
325337
df_full = df_current
326338
else:

tests/data_mid_layer_tests/test_dataloader.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77
from pathlib import Path
88

99
sys.path.append(str(Path(__file__).resolve().parent))
10-
from qlib.data.dataset.loader import NestedDataLoader
10+
from qlib.data.dataset.loader import NestedDataLoader, QlibDataLoader
11+
from qlib.data.dataset.handler import DataHandlerLP
1112
from qlib.contrib.data.loader import Alpha158DL, Alpha360DL
13+
from qlib.data import D
1214

1315

1416
class TestDataLoader(unittest.TestCase):
@@ -44,6 +46,35 @@ def test_nested_data_loader(self):
4446
assert "LABEL0" in columns_list
4547

4648
# Then you can use it wth DataHandler;
49+
# NOTE: please note that the data processors are missing!!! You should add based on your requirements
50+
51+
"""
52+
dataset.to_pickle("test_df.pkl")
53+
nested_data_loader = NestedDataLoader(
54+
dataloader_l=[
55+
{
56+
"class": "qlib.contrib.data.loader.Alpha158DL",
57+
"kwargs": {"config": {"label": (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])}},
58+
},
59+
{
60+
"class": "qlib.contrib.data.loader.Alpha360DL",
61+
},
62+
{
63+
"class": "qlib.data.dataset.loader.StaticDataLoader",
64+
"kwargs": {"config": "test_df.pkl"},
65+
},
66+
]
67+
)
68+
data_handler_config = {
69+
"start_time": "2008-01-01",
70+
"end_time": "2020-08-01",
71+
"instruments": "csi300",
72+
"data_loader": nested_data_loader,
73+
}
74+
data_handler = DataHandlerLP(**data_handler_config)
75+
data = data_handler.fetch()
76+
print(data)
77+
"""
4778

4879

4980
if __name__ == "__main__":

0 commit comments

Comments
 (0)