From f8f1445283fb89aefeb2918243c35a219a51a56c Mon Sep 17 00:00:00 2001
From: WinstonLiyt <104308117+WinstonLiyt@users.noreply.github.com>
Date: Mon, 5 Aug 2024 21:48:24 +0800
Subject: [PATCH] fix: Fixed some bugs introduced during refactoring. (#167)

* Fixed some bugs introduced during refactoring.

* fix a minor bug

* build factor source data (price and volumns) from qlib if no source data is provided by the user (#168)

* Fixed some bugs introduced during refactoring.

* fix a small bug

* fix a small bug

---------

Co-authored-by: Xu Yang <peteryang@vip.qq.com>
---
 rdagent/app/qlib_rd_loop/conf.py              |  2 +-
 .../qlib_rd_loop/factor_from_report_w_sc.py   | 38 +++++++++++++------
 rdagent/utils/workflow.py                     | 20 +++++++++-
 3 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/rdagent/app/qlib_rd_loop/conf.py b/rdagent/app/qlib_rd_loop/conf.py
index bb8d9bdbe..d6319a4a5 100644
--- a/rdagent/app/qlib_rd_loop/conf.py
+++ b/rdagent/app/qlib_rd_loop/conf.py
@@ -34,7 +34,7 @@ class Config:
 
     # 2) sub task specific:
     report_result_json_file_path: str = "git_ignore_folder/report_list.json"
-    max_factor_per_report: int = 10000
+    max_factors_per_exp: int = 10000
 
 
 class FactorFromReportPropSetting(FactorBasePropSetting):
diff --git a/rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py b/rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py
index c9c9c79b0..fa9e61c47 100644
--- a/rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py
+++ b/rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py
@@ -1,4 +1,3 @@
-# TODO: we should have more advanced mechanism to handle such requirements for saving sessions.
 import json
 from pathlib import Path
 from typing import Any, Tuple
@@ -6,10 +5,7 @@
 import fire
 from jinja2 import Environment, StrictUndefined
 
-from rdagent.app.qlib_rd_loop.conf import (
-    FACTOR_FROM_REPORT_PROP_SETTING,
-    FactorBasePropSetting,
-)
+from rdagent.app.qlib_rd_loop.conf import FACTOR_FROM_REPORT_PROP_SETTING
 from rdagent.app.qlib_rd_loop.factor_w_sc import FactorRDLoop
 from rdagent.components.document_reader.document_reader import (
     extract_first_page_screenshot_from_pdf,
@@ -32,6 +28,16 @@
 
 
 def generate_hypothesis(factor_result: dict, report_content: str) -> str:
+    """
+    Generate a hypothesis based on factor results and report content.
+
+    Args:
+        factor_result (dict): The results of the factor analysis.
+        report_content (str): The content of the report.
+
+    Returns:
+        str: The generated hypothesis.
+    """
     system_prompt = (
         Environment(undefined=StrictUndefined).from_string(prompts["hypothesis_generation"]["system"]).render()
     )
@@ -60,6 +66,15 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str:
 
 
 def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Tuple[QlibFactorExperiment, Hypothesis]:
+    """
+    Extract hypothesis and experiment details from report files.
+
+    Args:
+        report_file_path (str): Path to the report file.
+
+    Returns:
+        Tuple[QlibFactorExperiment, Hypothesis]: The extracted experiment and generated hypothesis.
+    """
     with logger.tag("extract_factors_and_implement"):
         with logger.tag("load_factor_tasks"):
             exp = FactorExperimentLoaderFromPDFfiles().load(report_file_path)
@@ -88,13 +103,14 @@ def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Tuple[Qlib
 
 
 class FactorReportLoop(FactorRDLoop, metaclass=LoopMeta):
-    skip_loop_error = (FactorEmptyError,)
-
-    def __init__(self, PROP_SETTING: FactorBasePropSetting):
+    def __init__(self, PROP_SETTING: FACTOR_FROM_REPORT_PROP_SETTING):
         super().__init__(PROP_SETTING=PROP_SETTING)
         self.judge_pdf_data_items = json.load(open(PROP_SETTING.report_result_json_file_path, "r"))
         self.pdf_file_index = 0
         self.valid_pdf_file_count = 0
+        self.current_loop_hypothesis = None
+        self.current_loop_exp = None
+        self.steps = ["propose_hypo_exp", "propose", "exp_gen", "coding", "running", "feedback"]
 
     def propose_hypo_exp(self, prev_out: dict[str, Any]):
         with logger.tag("r"):
@@ -109,8 +125,8 @@ def propose_hypo_exp(self, prev_out: dict[str, Any]):
                     continue
                 self.valid_pdf_file_count += 1
                 exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]]
-                exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_FROM_REPORT_PROP_SETTING.max_factor_per_report]
-                exp.sub_tasks = exp.sub_tasks[: FACTOR_FROM_REPORT_PROP_SETTING.max_factor_per_report]
+                exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_FROM_REPORT_PROP_SETTING.max_factors_per_exp]
+                exp.sub_tasks = exp.sub_tasks[: FACTOR_FROM_REPORT_PROP_SETTING.max_factors_per_exp]
                 logger.log_object(hypothesis, tag="hypothesis generation")
                 logger.log_object(exp.sub_tasks, tag="experiment generation")
                 self.current_loop_hypothesis = hypothesis
@@ -130,7 +146,7 @@ def main(path=None, step_n=None):
 
     .. code-block:: python
 
-        dotenv run -- python rdagent/app/qlib_rd_loop/factor_from_report_sh.py $LOG_PATH/__session__/1/0_propose  --step_n 1   # `step_n` is a optional parameter
+        dotenv run -- python rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py $LOG_PATH/__session__/1/0_propose  --step_n 1   # `step_n` is a optional parameter
 
     """
     if path is None:
diff --git a/rdagent/utils/workflow.py b/rdagent/utils/workflow.py
index 9399ff009..c9c6f1c05 100644
--- a/rdagent/utils/workflow.py
+++ b/rdagent/utils/workflow.py
@@ -24,7 +24,13 @@ class LoopMeta(type):
     @staticmethod
     def _get_steps(bases):
         """
-        get all the `steps` of base classes and combine them to a single one.
+        Recursively get all the `steps` from the base classes and combine them into a single list.
+
+        Args:
+            bases (tuple): A tuple of base classes.
+
+        Returns:
+            List[Callable]: A list of steps combined from all base classes.
         """
         steps = []
         for base in bases:
@@ -34,7 +40,17 @@ def _get_steps(bases):
         return steps
 
     def __new__(cls, clsname, bases, attrs):
-        # move custommized steps into steps
+        """
+        Create a new class with combined steps from base classes and current class.
+
+        Args:
+            clsname (str): Name of the new class.
+            bases (tuple): Base classes.
+            attrs (dict): Attributes of the new class.
+
+        Returns:
+            LoopMeta: A new instance of LoopMeta.
+        """
         steps = LoopMeta._get_steps(bases)  # all the base classes of parents
         for name, attr in attrs.items():
             if not name.startswith("__") and isinstance(attr, Callable):