Skip to content

Commit

Permalink
More dataloader example (#1823)
Browse files Browse the repository at this point in the history
* More dataloader example

* optimize code

* optimeze code

* optimeze code

* optimeze code

* optimeze code

* optimeze code

* fix pylint error

* fix CI error

* fix CI error

* Comments

* fix error type

---------

Co-authored-by: Young <[email protected]>
  • Loading branch information
SunsetWolf and you-n-g authored Jul 10, 2024
1 parent a7d5a9b commit 2c33332
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_qlib_from_pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
# Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# so we limit the macos version to macos-12.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_qlib_from_source.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
# Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# so we limit the macos version to macos-12.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_qlib_from_source_slow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
# Since macos-latest changed from 12.7.4 to 14.4.1,
# the minimum python version that matches a 14.4.1 version of macos is 3.10,
# so we limit the macos version to macos-12.
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-12]
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12]
# not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
python-version: [3.7, 3.8]

Expand Down
14 changes: 13 additions & 1 deletion qlib/data/dataset/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def load(self, instruments, start_time=None, end_time=None) -> pd.DataFrame:
----------
instruments : str or dict
it can either be the market name or the config file of instruments generated by InstrumentProvider.
If the value of instruments is None, it means that no filtering is done.
start_time : str
start of the time range.
end_time : str
Expand All @@ -50,6 +51,11 @@ def load(self, instruments, start_time=None, end_time=None) -> pd.DataFrame:
-------
pd.DataFrame:
data load from the under layer source
Raise
-----
KeyError:
if the instruments filter is not supported, raise KeyError
"""


Expand Down Expand Up @@ -320,7 +326,13 @@ def __init__(self, dataloader_l: List[Dict], join="left") -> None:
def load(self, instruments=None, start_time=None, end_time=None) -> pd.DataFrame:
df_full = None
for dl in self.data_loader_l:
df_current = dl.load(instruments, start_time, end_time)
try:
df_current = dl.load(instruments, start_time, end_time)
except KeyError:
warnings.warn(
"If the value of `instruments` cannot be processed, it will set instruments to None to get all the data."
)
df_current = dl.load(instruments=None, start_time=start_time, end_time=end_time)
if df_full is None:
df_full = df_current
else:
Expand Down
33 changes: 32 additions & 1 deletion tests/data_mid_layer_tests/test_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
from pathlib import Path

sys.path.append(str(Path(__file__).resolve().parent))
from qlib.data.dataset.loader import NestedDataLoader
from qlib.data.dataset.loader import NestedDataLoader, QlibDataLoader
from qlib.data.dataset.handler import DataHandlerLP
from qlib.contrib.data.loader import Alpha158DL, Alpha360DL
from qlib.data import D


class TestDataLoader(unittest.TestCase):
Expand Down Expand Up @@ -44,6 +46,35 @@ def test_nested_data_loader(self):
assert "LABEL0" in columns_list

# Then you can use it wth DataHandler;
# NOTE: please note that the data processors are missing!!! You should add based on your requirements

"""
dataset.to_pickle("test_df.pkl")
nested_data_loader = NestedDataLoader(
dataloader_l=[
{
"class": "qlib.contrib.data.loader.Alpha158DL",
"kwargs": {"config": {"label": (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])}},
},
{
"class": "qlib.contrib.data.loader.Alpha360DL",
},
{
"class": "qlib.data.dataset.loader.StaticDataLoader",
"kwargs": {"config": "test_df.pkl"},
},
]
)
data_handler_config = {
"start_time": "2008-01-01",
"end_time": "2020-08-01",
"instruments": "csi300",
"data_loader": nested_data_loader,
}
data_handler = DataHandlerLP(**data_handler_config)
data = data_handler.fetch()
print(data)
"""


if __name__ == "__main__":
Expand Down

0 comments on commit 2c33332

Please sign in to comment.