From c3e26035e66f519f13bd12f24a5dd4598304e4a1 Mon Sep 17 00:00:00 2001 From: Young Date: Wed, 10 Jul 2024 09:18:23 +0000 Subject: [PATCH] Add Framework --- rdagent/core/experiment.py | 7 ++ rdagent/scenarios/qlib/task_generator/data.py | 38 +++++++- .../ws_tpl/factor_exp/conf.yaml | 89 +++++++++++++++++++ 3 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 rdagent/scenarios/qlib/task_generator/ws_tpl/factor_exp/conf.yaml diff --git a/rdagent/core/experiment.py b/rdagent/core/experiment.py index b7abb8606..e45e426cf 100644 --- a/rdagent/core/experiment.py +++ b/rdagent/core/experiment.py @@ -18,6 +18,10 @@ class Task: class Implementation(ABC, Generic[ASpecificTask]): + # TODO: workspace; + # - code or data(optional) + # - Execute logic + # - `env is not included`. It is a underlying infra def __init__(self, target_task: ASpecificTask) -> None: self.target_task = target_task @@ -92,6 +96,7 @@ def prepare(self, *args, **kwargs): typical usage of `*args, **kwargs`: Different methods shares the same data. The data are passed by the arguments. """ + # TODO: model and factor prepare; def inject_code(self, **files: str): """ @@ -118,12 +123,14 @@ class Experiment(ABC, Generic[ASpecificTask, ASpecificImp]): """ The experiment is a sequence of tasks and the implementations of the tasks after generated by the TaskGenerator. """ + result_ws: Optional[FBImplementation] def __init__(self, sub_tasks: Sequence[ASpecificTask]) -> None: self.sub_tasks = sub_tasks self.sub_implementations: Sequence[ASpecificImp] = [None for _ in self.sub_tasks] self.based_experiments: Sequence[Experiment] = [] self.result: object = None # The result of the experiment, can be different types in different scenarios. + self.result_ws = None TaskOrExperiment = TypeVar("TaskOrExperiment", Task, Experiment) diff --git a/rdagent/scenarios/qlib/task_generator/data.py b/rdagent/scenarios/qlib/task_generator/data.py index a996c417b..debc2bd32 100644 --- a/rdagent/scenarios/qlib/task_generator/data.py +++ b/rdagent/scenarios/qlib/task_generator/data.py @@ -36,6 +36,42 @@ def generate(self, exp: QlibFactorExperiment) -> QlibFactorExperiment: combined_factors = pd.concat([SOTA_factor, new_factors], axis=1).dropna() else: combined_factors = new_factors + """ + Preivous TaskGenerator: + - Converting the tasks in experiment to implementation/workspace: + - Task: combine factor and run results (it does not require intelligence) + - TaskImplementation: + + - current things: + + New Inherit relationship + - DevAgent(Exp) => Exp + - TaskDev(Exp) => Exp: Converting the tasks in experiment to implementation/workspace: + + # we use following name + # - workspace + + # outside logic + class QlibFactorExpWorkspace: + + def prepare(): + # create a folder; + # copy template + # place data inside the folder `combined_factors` + # + def execute(): + de = DockerEnv() + + de.run(local_path=self.ws_path, entry="qrun conf.yaml") + + + # outside logic + ws = exp.ws + ws.prepare() + ws.inject_code("read_exp.py") # dump the results to a general format(csv, hdf5) + # otherwise you can put it into the template without inejct. + ws.execute() + """ print("Success in processing and combining factor data.") @@ -81,4 +117,4 @@ def process_factor_data(self, exp: QlibFactorExperiment) -> pd.DataFrame: return combined_factors else: print("No valid factor data found to merge.") - return pd.DataFrame() # Return an empty DataFrame if no valid data \ No newline at end of file + return pd.DataFrame() # Return an empty DataFrame if no valid data diff --git a/rdagent/scenarios/qlib/task_generator/ws_tpl/factor_exp/conf.yaml b/rdagent/scenarios/qlib/task_generator/ws_tpl/factor_exp/conf.yaml new file mode 100644 index 000000000..fa4d36529 --- /dev/null +++ b/rdagent/scenarios/qlib/task_generator/ws_tpl/factor_exp/conf.yaml @@ -0,0 +1,89 @@ +qlib_init: + provider_uri: "~/.qlib/qlib_data/cn_data" + region: cn +market: &market csi300 +benchmark: &benchmark SH000300 +data_handler_config: &data_handler_config + start_time: 2008-01-01 + end_time: 2020-08-01 + fit_start_time: 2008-01-01 + fit_end_time: 2014-12-31 + instruments: *market + data_loader: + class: NestedDataLoader + kwargs: + dataloader_l=[ + { + "class": "qlib.contrib.data.loader.Alpha158DL", + "kwargs": {"config": {"label": (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])}}, + }, + { + "class": "qlib.data.dataset.loader.StaticDataLoader", + "kwargs": {"config": "combined_df.pkl"}, + }, + ] + LEARN_PROCESSORS = [ + {"class": "DropnaLabel"}, + {"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}}, + ] + +port_analysis_config: &port_analysis_config + strategy: + class: TopkDropoutStrategy + module_path: qlib.contrib.strategy + kwargs: + signal: + topk: 50 + n_drop: 5 + backtest: + start_time: 2017-01-01 + end_time: 2020-08-01 + account: 100000000 + benchmark: *benchmark + exchange_kwargs: + limit_threshold: 0.095 + deal_price: close + open_cost: 0.0005 + close_cost: 0.0015 + min_cost: 5 +task: + model: + class: LGBModel + module_path: qlib.contrib.model.gbdt + kwargs: + loss: mse + colsample_bytree: 0.8879 + learning_rate: 0.2 + subsample: 0.8789 + lambda_l1: 205.6999 + lambda_l2: 580.9768 + max_depth: 8 + num_leaves: 210 + num_threads: 20 + dataset: + class: DatasetH + module_path: qlib.data.dataset + kwargs: + handler: + class: DataHandlerLP + module_path: qlib.contrib.data.handler + kwargs: *data_handler_config + segments: + train: [2008-01-01, 2014-12-31] + valid: [2015-01-01, 2016-12-31] + test: [2017-01-01, 2020-08-01] + record: + - class: SignalRecord + module_path: qlib.workflow.record_temp + kwargs: + model: + dataset: + - class: SigAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + ana_long_short: False + ann_scaler: 252 + - class: PortAnaRecord + module_path: qlib.workflow.record_temp + kwargs: + config: *port_analysis_config