From c3e26035e66f519f13bd12f24a5dd4598304e4a1 Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Wed, 10 Jul 2024 09:18:23 +0000
Subject: [PATCH] Add Framework

---
 rdagent/core/experiment.py                    |  7 ++
 rdagent/scenarios/qlib/task_generator/data.py | 38 +++++++-
 .../ws_tpl/factor_exp/conf.yaml               | 89 +++++++++++++++++++
 3 files changed, 133 insertions(+), 1 deletion(-)
 create mode 100644 rdagent/scenarios/qlib/task_generator/ws_tpl/factor_exp/conf.yaml

diff --git a/rdagent/core/experiment.py b/rdagent/core/experiment.py
index b7abb8606..e45e426cf 100644
--- a/rdagent/core/experiment.py
+++ b/rdagent/core/experiment.py
@@ -18,6 +18,10 @@ class Task:
 
 
 class Implementation(ABC, Generic[ASpecificTask]):
+    # TODO: workspace;
+    # - code or data(optional)
+    # - Execute logic
+    # - `env is not included`. It is a underlying infra
     def __init__(self, target_task: ASpecificTask) -> None:
         self.target_task = target_task
 
@@ -92,6 +96,7 @@ def prepare(self, *args, **kwargs):
             typical usage of `*args, **kwargs`:
                 Different methods shares the same data. The data are passed by the arguments.
         """
+        # TODO: model and factor prepare;
 
     def inject_code(self, **files: str):
         """
@@ -118,12 +123,14 @@ class Experiment(ABC, Generic[ASpecificTask, ASpecificImp]):
     """
     The experiment is a sequence of tasks and the implementations of the tasks after generated by the TaskGenerator.
     """
+    result_ws: Optional[FBImplementation]
 
     def __init__(self, sub_tasks: Sequence[ASpecificTask]) -> None:
         self.sub_tasks = sub_tasks
         self.sub_implementations: Sequence[ASpecificImp] = [None for _ in self.sub_tasks]
         self.based_experiments: Sequence[Experiment] = []
         self.result: object = None  # The result of the experiment, can be different types in different scenarios.
+        self.result_ws = None
 
 
 TaskOrExperiment = TypeVar("TaskOrExperiment", Task, Experiment)
diff --git a/rdagent/scenarios/qlib/task_generator/data.py b/rdagent/scenarios/qlib/task_generator/data.py
index a996c417b..debc2bd32 100644
--- a/rdagent/scenarios/qlib/task_generator/data.py
+++ b/rdagent/scenarios/qlib/task_generator/data.py
@@ -36,6 +36,42 @@ def generate(self, exp: QlibFactorExperiment) -> QlibFactorExperiment:
             combined_factors = pd.concat([SOTA_factor, new_factors], axis=1).dropna()
         else:
             combined_factors = new_factors
+        """
+        Preivous TaskGenerator:
+        - Converting the tasks in experiment to implementation/workspace:
+            - Task: combine factor and run results (it does not require intelligence)
+            - TaskImplementation: 
+
+        - current things: 
+        
+        New Inherit relationship
+        - DevAgent(Exp) => Exp
+            - TaskDev(Exp) => Exp: Converting the tasks in experiment to implementation/workspace:
+        
+        # we use following name
+        # - workspace
+
+        # outside logic
+        class QlibFactorExpWorkspace:
+           
+            def prepare():
+                # create a folder;
+                # copy template
+                # place data inside the folder `combined_factors`
+                #
+            def execute():
+                de = DockerEnv()
+
+                de.run(local_path=self.ws_path, entry="qrun conf.yaml")
+
+
+        # outside logic
+        ws = exp.ws
+        ws.prepare()
+        ws.inject_code("read_exp.py") # dump the results to a general format(csv, hdf5)
+                                      # otherwise you can put it into the template without inejct.
+        ws.execute()
+        """
 
         print("Success in processing and combining factor data.")
 
@@ -81,4 +117,4 @@ def process_factor_data(self, exp: QlibFactorExperiment) -> pd.DataFrame:
             return combined_factors
         else:
             print("No valid factor data found to merge.")
-            return pd.DataFrame()  # Return an empty DataFrame if no valid data
\ No newline at end of file
+            return pd.DataFrame()  # Return an empty DataFrame if no valid data
diff --git a/rdagent/scenarios/qlib/task_generator/ws_tpl/factor_exp/conf.yaml b/rdagent/scenarios/qlib/task_generator/ws_tpl/factor_exp/conf.yaml
new file mode 100644
index 000000000..fa4d36529
--- /dev/null
+++ b/rdagent/scenarios/qlib/task_generator/ws_tpl/factor_exp/conf.yaml
@@ -0,0 +1,89 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    data_loader:
+        class: NestedDataLoader
+        kwargs:
+          dataloader_l=[
+            {
+                "class": "qlib.contrib.data.loader.Alpha158DL",
+                "kwargs": {"config": {"label": (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])}},
+            },
+            {
+                "class": "qlib.data.dataset.loader.StaticDataLoader",
+                "kwargs": {"config": "combined_df.pkl"},
+            },
+            ]
+    LEARN_PROCESSORS = [
+      {"class": "DropnaLabel"},
+      {"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}},
+    ]
+
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy
+        kwargs:
+            signal: <PRED>
+            topk: 50
+            n_drop: 5
+    backtest:
+        start_time: 2017-01-01
+        end_time: 2020-08-01
+        account: 100000000
+        benchmark: *benchmark
+        exchange_kwargs:
+            limit_threshold: 0.095
+            deal_price: close
+            open_cost: 0.0005
+            close_cost: 0.0015
+            min_cost: 5
+task:
+    model:
+        class: LGBModel
+        module_path: qlib.contrib.model.gbdt
+        kwargs:
+            loss: mse
+            colsample_bytree: 0.8879
+            learning_rate: 0.2
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: DataHandlerLP
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            model: <MODEL>
+            dataset: <DATASET>
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config