From f8f1445283fb89aefeb2918243c35a219a51a56c Mon Sep 17 00:00:00 2001 From: WinstonLiyt <104308117+WinstonLiyt@users.noreply.github.com> Date: Mon, 5 Aug 2024 21:48:24 +0800 Subject: [PATCH] fix: Fixed some bugs introduced during refactoring. (#167) * Fixed some bugs introduced during refactoring. * fix a minor bug * build factor source data (price and volumns) from qlib if no source data is provided by the user (#168) * Fixed some bugs introduced during refactoring. * fix a small bug * fix a small bug --------- Co-authored-by: Xu Yang --- rdagent/app/qlib_rd_loop/conf.py | 2 +- .../qlib_rd_loop/factor_from_report_w_sc.py | 38 +++++++++++++------ rdagent/utils/workflow.py | 20 +++++++++- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/rdagent/app/qlib_rd_loop/conf.py b/rdagent/app/qlib_rd_loop/conf.py index bb8d9bdbe..d6319a4a5 100644 --- a/rdagent/app/qlib_rd_loop/conf.py +++ b/rdagent/app/qlib_rd_loop/conf.py @@ -34,7 +34,7 @@ class Config: # 2) sub task specific: report_result_json_file_path: str = "git_ignore_folder/report_list.json" - max_factor_per_report: int = 10000 + max_factors_per_exp: int = 10000 class FactorFromReportPropSetting(FactorBasePropSetting): diff --git a/rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py b/rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py index c9c9c79b0..fa9e61c47 100644 --- a/rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py +++ b/rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py @@ -1,4 +1,3 @@ -# TODO: we should have more advanced mechanism to handle such requirements for saving sessions. import json from pathlib import Path from typing import Any, Tuple @@ -6,10 +5,7 @@ import fire from jinja2 import Environment, StrictUndefined -from rdagent.app.qlib_rd_loop.conf import ( - FACTOR_FROM_REPORT_PROP_SETTING, - FactorBasePropSetting, -) +from rdagent.app.qlib_rd_loop.conf import FACTOR_FROM_REPORT_PROP_SETTING from rdagent.app.qlib_rd_loop.factor_w_sc import FactorRDLoop from rdagent.components.document_reader.document_reader import ( extract_first_page_screenshot_from_pdf, @@ -32,6 +28,16 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str: + """ + Generate a hypothesis based on factor results and report content. + + Args: + factor_result (dict): The results of the factor analysis. + report_content (str): The content of the report. + + Returns: + str: The generated hypothesis. + """ system_prompt = ( Environment(undefined=StrictUndefined).from_string(prompts["hypothesis_generation"]["system"]).render() ) @@ -60,6 +66,15 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str: def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Tuple[QlibFactorExperiment, Hypothesis]: + """ + Extract hypothesis and experiment details from report files. + + Args: + report_file_path (str): Path to the report file. + + Returns: + Tuple[QlibFactorExperiment, Hypothesis]: The extracted experiment and generated hypothesis. + """ with logger.tag("extract_factors_and_implement"): with logger.tag("load_factor_tasks"): exp = FactorExperimentLoaderFromPDFfiles().load(report_file_path) @@ -88,13 +103,14 @@ def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Tuple[Qlib class FactorReportLoop(FactorRDLoop, metaclass=LoopMeta): - skip_loop_error = (FactorEmptyError,) - - def __init__(self, PROP_SETTING: FactorBasePropSetting): + def __init__(self, PROP_SETTING: FACTOR_FROM_REPORT_PROP_SETTING): super().__init__(PROP_SETTING=PROP_SETTING) self.judge_pdf_data_items = json.load(open(PROP_SETTING.report_result_json_file_path, "r")) self.pdf_file_index = 0 self.valid_pdf_file_count = 0 + self.current_loop_hypothesis = None + self.current_loop_exp = None + self.steps = ["propose_hypo_exp", "propose", "exp_gen", "coding", "running", "feedback"] def propose_hypo_exp(self, prev_out: dict[str, Any]): with logger.tag("r"): @@ -109,8 +125,8 @@ def propose_hypo_exp(self, prev_out: dict[str, Any]): continue self.valid_pdf_file_count += 1 exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]] - exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_FROM_REPORT_PROP_SETTING.max_factor_per_report] - exp.sub_tasks = exp.sub_tasks[: FACTOR_FROM_REPORT_PROP_SETTING.max_factor_per_report] + exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_FROM_REPORT_PROP_SETTING.max_factors_per_exp] + exp.sub_tasks = exp.sub_tasks[: FACTOR_FROM_REPORT_PROP_SETTING.max_factors_per_exp] logger.log_object(hypothesis, tag="hypothesis generation") logger.log_object(exp.sub_tasks, tag="experiment generation") self.current_loop_hypothesis = hypothesis @@ -130,7 +146,7 @@ def main(path=None, step_n=None): .. code-block:: python - dotenv run -- python rdagent/app/qlib_rd_loop/factor_from_report_sh.py $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional parameter + dotenv run -- python rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional parameter """ if path is None: diff --git a/rdagent/utils/workflow.py b/rdagent/utils/workflow.py index 9399ff009..c9c6f1c05 100644 --- a/rdagent/utils/workflow.py +++ b/rdagent/utils/workflow.py @@ -24,7 +24,13 @@ class LoopMeta(type): @staticmethod def _get_steps(bases): """ - get all the `steps` of base classes and combine them to a single one. + Recursively get all the `steps` from the base classes and combine them into a single list. + + Args: + bases (tuple): A tuple of base classes. + + Returns: + List[Callable]: A list of steps combined from all base classes. """ steps = [] for base in bases: @@ -34,7 +40,17 @@ def _get_steps(bases): return steps def __new__(cls, clsname, bases, attrs): - # move custommized steps into steps + """ + Create a new class with combined steps from base classes and current class. + + Args: + clsname (str): Name of the new class. + bases (tuple): Base classes. + attrs (dict): Attributes of the new class. + + Returns: + LoopMeta: A new instance of LoopMeta. + """ steps = LoopMeta._get_steps(bases) # all the base classes of parents for name, attr in attrs.items(): if not name.startswith("__") and isinstance(attr, Callable):