diff --git a/examples/benchmarks/LightGBM/requirements.txt b/examples/benchmarks/LightGBM/requirements.txt index 3f455556b8..4ffcc6f8a3 100644 --- a/examples/benchmarks/LightGBM/requirements.txt +++ b/examples/benchmarks/LightGBM/requirements.txt @@ -1,3 +1,3 @@ pandas==1.1.2 numpy==1.21.0 -lightgbm==3.1.0 +lightgbm diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml new file mode 100644 index 0000000000..df0f7c7947 --- /dev/null +++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml @@ -0,0 +1,72 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi500 +benchmark: &benchmark SH000905 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy + kwargs: + model: + dataset: + topk: 50 + n_drop: 5 + backtest: + start_time: 2017-01-01 + end_time: 2020-08-01 + account: 100000000 + benchmark: *benchmark + exchange_kwargs: + limit_threshold: 0.095 + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LGBModel + module_path: qlib.contrib.model.gbdt + kwargs: + loss: mse + colsample_bytree: 0.8879 + learning_rate: 0.2 + subsample: 0.8789 + lambda_l1: 205.6999 + lambda_l2: 580.9768 + max_depth: 8 + num_leaves: 210 + num_threads: 20 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha158 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: + model: + dataset: + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360_csi500.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360_csi500.yaml new file mode 100644 index 0000000000..767050919f --- /dev/null +++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360_csi500.yaml @@ -0,0 +1,80 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi500 +benchmark: &benchmark SH000905 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market + infer_processors: [] + learn_processors: + - class: DropnaLabel + - class: CSRankNorm + kwargs: + fields_group: label + label: ["Ref($close, -2) / Ref($close, -1) - 1"] +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy + kwargs: + signal: + - + - + topk: 50 + n_drop: 5 + backtest: + start_time: 2017-01-01 + end_time: 2020-08-01 + account: 100000000 + benchmark: *benchmark + exchange_kwargs: + limit_threshold: 0.095 + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LGBModel + module_path: qlib.contrib.model.gbdt + kwargs: + loss: mse + colsample_bytree: 0.8879 + learning_rate: 0.0421 + subsample: 0.8789 + lambda_l1: 205.6999 + lambda_l2: 580.9768 + max_depth: 8 + num_leaves: 210 + num_threads: 20 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: Alpha360 + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: + model: + dataset: + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md index e1616f4fd2..10ae91f354 100644 --- a/examples/benchmarks/README.md +++ b/examples/benchmarks/README.md @@ -20,7 +20,9 @@ The numbers shown below demonstrate the performance of the entire `workflow` of > NOTE: > We have very limited resources to implement and finetune the models. We tried our best effort to fairly compare these models. But some models may have greater potential than what it looks like in the table below. Your contribution is highly welcomed to explore their potential. -## Alpha158 dataset +## Results on CSI300 + +### Alpha158 dataset | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | |------------------------------------------|-------------------------------------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------| @@ -44,7 +46,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of | DoubleEnsemble(Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4340±0.00 | 0.0523±0.00 | 0.4284±0.01 | 0.1168±0.01 | 1.3384±0.12 | -0.1036±0.01 | -## Alpha360 dataset +### Alpha360 dataset | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | |-------------------------------------------|----------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------| @@ -79,6 +81,38 @@ The numbers shown below demonstrate the performance of the entire `workflow` of - Signal-based evaluation: IC, ICIR, Rank IC, Rank ICIR - Portfolio-based metrics: Annualized Return, Information Ratio, Max Drawdown +## Results on CSI500 +The results on CSI500 is not complete. PR's for models on csi500 are welcome! + +Transfer previous models in CSI300 to CSI500 is quite easy. You can try models with just a few commands below. +``` +cd examples/benchmarks/LightGBM +pip install -r requirements.txt + +# create new config and set the benchmark to csi500 +cp workflow_config_lightgbm_Alpha158.yaml workflow_config_lightgbm_Alpha158_csi500.yaml +sed -i "s/csi300/csi500/g" workflow_config_lightgbm_Alpha158_csi500.yaml +sed -i "s/SH000300/SH000905/g" workflow_config_lightgbm_Alpha158_csi500.yaml + +# you can either run the model once +qrun workflow_config_lightgbm_Alpha158_csi500.yaml + +# or run it for multiple times automatically and get the summarized results. +cd ../../ +python run_all_model.py run 3 lightgbm Alpha158 csi500 # for models with randomness. please run it for 20 times. +``` + +### Alpha158 dataset + +| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | +|------------|----------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------| +| LightGBM | Alpha158 | 0.0377±0.00 | 0.3860±0.00 | 0.0448±0.00 | 0.4675±0.00 | 0.1151±0.00 | 1.3884±0.00 | -0.0898±0.00 | + +### Alpha360 dataset +| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown | +|------------|----------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------| +| LightGBM | Alpha360 | 0.0400±0.00 | 0.3605±0.00 | 0.0536±0.00 | 0.5431±0.00 | 0.0505±0.00 | 0.7658±0.02 | -0.1880±0.00 | + # Contributing diff --git a/examples/run_all_model.py b/examples/run_all_model.py index 71ce10a411..71589049a2 100644 --- a/examples/run_all_model.py +++ b/examples/run_all_model.py @@ -117,8 +117,10 @@ def get_all_folders(models, exclude) -> dict: # function to get all the files under the model folder -def get_all_files(folder_path, dataset) -> (str, str): - yaml_path = str(Path(f"{folder_path}") / f"*{dataset}*.yaml") +def get_all_files(folder_path, dataset, universe="") -> (str, str): + if universe != "": + universe = f"_{universe}" + yaml_path = str(Path(f"{folder_path}") / f"*{dataset}{universe}.yaml") req_path = str(Path(f"{folder_path}") / f"*.txt") yaml_file = glob.glob(yaml_path) req_file = glob.glob(req_path) @@ -224,6 +226,7 @@ def run( times=1, models=None, dataset="Alpha360", + universe="", exclude=False, qlib_uri: str = "git+https://github.com/microsoft/qlib#egg=pyqlib", exp_folder_name: str = "run_all_model_records", @@ -245,6 +248,9 @@ def run( determines whether the model being used is excluded or included. dataset : str determines the dataset to be used for each model. + universe : str + the stock universe of the dataset. + default "" indicates that qlib_uri : str the uri to install qlib with pip it could be url on the we or local path (NOTE: the local path must be a absolute path) @@ -259,6 +265,15 @@ def run( ------- Here are some use cases of the function in the bash: + The run_all_models will decide which config to run based no `models` `dataset` `universe` + Example 1): + + models="lightgbm", dataset="Alpha158", universe="" will result in running the following config + examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml + + models="lightgbm", dataset="Alpha158", universe="csi500" will result in running the following config + examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml + .. code-block:: bash # Case 1 - run all models multiple times @@ -279,6 +294,9 @@ def run( # Case 6 - run other models except those are given as arguments for one time python run_all_model.py run --models=[mlp,tft,sfm] --exclude=True + # Case 7 - run lightgbm model on csi500. + python run_all_model.py run 3 lightgbm Alpha158 csi500 + """ self._init_qlib(exp_folder_name) @@ -290,7 +308,7 @@ def run( for fn in folders: # get all files sys.stderr.write("Retrieving files...\n") - yaml_path, req_path = get_all_files(folders[fn], dataset) + yaml_path, req_path = get_all_files(folders[fn], dataset, universe=universe) if yaml_path is None: sys.stderr.write(f"There is no {dataset}.yaml file in {folders[fn]}") continue