fix: Fixed some bugs introduced during refactoring. (#167)

* Fixed some bugs introduced during refactoring. * fix a minor bug * build factor source data (price and volumns) from qlib if no source data is provided by the user (#168) * Fixed some bugs introduced during refactoring. * fix a small bug * fix a small bug --------- Co-authored-by: Xu Yang <[email protected]>
microsoft · Aug 5, 2024 · f8f1445 · f8f1445
1 parent 48c81ea
commit f8f1445
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 14 deletions.
diff --git a/rdagent/app/qlib_rd_loop/conf.py b/rdagent/app/qlib_rd_loop/conf.py
@@ -34,7 +34,7 @@ class Config:
 
     # 2) sub task specific:
     report_result_json_file_path: str = "git_ignore_folder/report_list.json"
-    max_factor_per_report: int = 10000
+    max_factors_per_exp: int = 10000
 
 
 class FactorFromReportPropSetting(FactorBasePropSetting):

diff --git a/rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py b/rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py
@@ -1,15 +1,11 @@
-# TODO: we should have more advanced mechanism to handle such requirements for saving sessions.
 import json
 from pathlib import Path
 from typing import Any, Tuple
 
 import fire
 from jinja2 import Environment, StrictUndefined
 
-from rdagent.app.qlib_rd_loop.conf import (
-    FACTOR_FROM_REPORT_PROP_SETTING,
-    FactorBasePropSetting,
-)
+from rdagent.app.qlib_rd_loop.conf import FACTOR_FROM_REPORT_PROP_SETTING
 from rdagent.app.qlib_rd_loop.factor_w_sc import FactorRDLoop
 from rdagent.components.document_reader.document_reader import (
     extract_first_page_screenshot_from_pdf,
@@ -32,6 +28,16 @@
 
 
 def generate_hypothesis(factor_result: dict, report_content: str) -> str:
+    """
+    Generate a hypothesis based on factor results and report content.
+
+    Args:
+        factor_result (dict): The results of the factor analysis.
+        report_content (str): The content of the report.
+
+    Returns:
+        str: The generated hypothesis.
+    """
     system_prompt = (
         Environment(undefined=StrictUndefined).from_string(prompts["hypothesis_generation"]["system"]).render()
     )
@@ -60,6 +66,15 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str:
 
 
 def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Tuple[QlibFactorExperiment, Hypothesis]:
+    """
+    Extract hypothesis and experiment details from report files.
+
+    Args:
+        report_file_path (str): Path to the report file.
+
+    Returns:
+        Tuple[QlibFactorExperiment, Hypothesis]: The extracted experiment and generated hypothesis.
+    """
     with logger.tag("extract_factors_and_implement"):
         with logger.tag("load_factor_tasks"):
             exp = FactorExperimentLoaderFromPDFfiles().load(report_file_path)
@@ -88,13 +103,14 @@ def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Tuple[Qlib
 
 
 class FactorReportLoop(FactorRDLoop, metaclass=LoopMeta):
-    skip_loop_error = (FactorEmptyError,)
-
-    def __init__(self, PROP_SETTING: FactorBasePropSetting):
+    def __init__(self, PROP_SETTING: FACTOR_FROM_REPORT_PROP_SETTING):
         super().__init__(PROP_SETTING=PROP_SETTING)
         self.judge_pdf_data_items = json.load(open(PROP_SETTING.report_result_json_file_path, "r"))
         self.pdf_file_index = 0
         self.valid_pdf_file_count = 0
+        self.current_loop_hypothesis = None
+        self.current_loop_exp = None
+        self.steps = ["propose_hypo_exp", "propose", "exp_gen", "coding", "running", "feedback"]
 
     def propose_hypo_exp(self, prev_out: dict[str, Any]):
         with logger.tag("r"):
@@ -109,8 +125,8 @@ def propose_hypo_exp(self, prev_out: dict[str, Any]):
                     continue
                 self.valid_pdf_file_count += 1
                 exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]]
-                exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_FROM_REPORT_PROP_SETTING.max_factor_per_report]
-                exp.sub_tasks = exp.sub_tasks[: FACTOR_FROM_REPORT_PROP_SETTING.max_factor_per_report]
+                exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_FROM_REPORT_PROP_SETTING.max_factors_per_exp]
+                exp.sub_tasks = exp.sub_tasks[: FACTOR_FROM_REPORT_PROP_SETTING.max_factors_per_exp]
                 logger.log_object(hypothesis, tag="hypothesis generation")
                 logger.log_object(exp.sub_tasks, tag="experiment generation")
                 self.current_loop_hypothesis = hypothesis
@@ -130,7 +146,7 @@ def main(path=None, step_n=None):
 
     .. code-block:: python
 
-        dotenv run -- python rdagent/app/qlib_rd_loop/factor_from_report_sh.py $LOG_PATH/__session__/1/0_propose  --step_n 1   # `step_n` is a optional parameter
+        dotenv run -- python rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py $LOG_PATH/__session__/1/0_propose  --step_n 1   # `step_n` is a optional parameter
 
     """
     if path is None:

diff --git a/rdagent/utils/workflow.py b/rdagent/utils/workflow.py
@@ -24,7 +24,13 @@ class LoopMeta(type):
     @staticmethod
     def _get_steps(bases):
         """
-        get all the `steps` of base classes and combine them to a single one.
+        Recursively get all the `steps` from the base classes and combine them into a single list.
+
+        Args:
+            bases (tuple): A tuple of base classes.
+
+        Returns:
+            List[Callable]: A list of steps combined from all base classes.
         """
         steps = []
         for base in bases:
@@ -34,7 +40,17 @@ def _get_steps(bases):
         return steps
 
     def __new__(cls, clsname, bases, attrs):
-        # move custommized steps into steps
+        """
+        Create a new class with combined steps from base classes and current class.
+
+        Args:
+            clsname (str): Name of the new class.
+            bases (tuple): Base classes.
+            attrs (dict): Attributes of the new class.
+
+        Returns:
+            LoopMeta: A new instance of LoopMeta.
+        """
         steps = LoopMeta._get_steps(bases)  # all the base classes of parents
         for name, attr in attrs.items():
             if not name.startswith("__") and isinstance(attr, Callable):