Skip to content

Commit

Permalink
fix: Fixed some bugs introduced during refactoring. (#167)
Browse files Browse the repository at this point in the history
* Fixed some bugs introduced during refactoring.

* fix a minor bug

* build factor source data (price and volumns) from qlib if no source data is provided by the user (#168)

* Fixed some bugs introduced during refactoring.

* fix a small bug

* fix a small bug

---------

Co-authored-by: Xu Yang <[email protected]>
  • Loading branch information
WinstonLiyt and peteryang1 authored Aug 5, 2024
1 parent 48c81ea commit f8f1445
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 14 deletions.
2 changes: 1 addition & 1 deletion rdagent/app/qlib_rd_loop/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class Config:

# 2) sub task specific:
report_result_json_file_path: str = "git_ignore_folder/report_list.json"
max_factor_per_report: int = 10000
max_factors_per_exp: int = 10000


class FactorFromReportPropSetting(FactorBasePropSetting):
Expand Down
38 changes: 27 additions & 11 deletions rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
# TODO: we should have more advanced mechanism to handle such requirements for saving sessions.
import json
from pathlib import Path
from typing import Any, Tuple

import fire
from jinja2 import Environment, StrictUndefined

from rdagent.app.qlib_rd_loop.conf import (
FACTOR_FROM_REPORT_PROP_SETTING,
FactorBasePropSetting,
)
from rdagent.app.qlib_rd_loop.conf import FACTOR_FROM_REPORT_PROP_SETTING
from rdagent.app.qlib_rd_loop.factor_w_sc import FactorRDLoop
from rdagent.components.document_reader.document_reader import (
extract_first_page_screenshot_from_pdf,
Expand All @@ -32,6 +28,16 @@


def generate_hypothesis(factor_result: dict, report_content: str) -> str:
"""
Generate a hypothesis based on factor results and report content.
Args:
factor_result (dict): The results of the factor analysis.
report_content (str): The content of the report.
Returns:
str: The generated hypothesis.
"""
system_prompt = (
Environment(undefined=StrictUndefined).from_string(prompts["hypothesis_generation"]["system"]).render()
)
Expand Down Expand Up @@ -60,6 +66,15 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str:


def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Tuple[QlibFactorExperiment, Hypothesis]:
"""
Extract hypothesis and experiment details from report files.
Args:
report_file_path (str): Path to the report file.
Returns:
Tuple[QlibFactorExperiment, Hypothesis]: The extracted experiment and generated hypothesis.
"""
with logger.tag("extract_factors_and_implement"):
with logger.tag("load_factor_tasks"):
exp = FactorExperimentLoaderFromPDFfiles().load(report_file_path)
Expand Down Expand Up @@ -88,13 +103,14 @@ def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Tuple[Qlib


class FactorReportLoop(FactorRDLoop, metaclass=LoopMeta):
skip_loop_error = (FactorEmptyError,)

def __init__(self, PROP_SETTING: FactorBasePropSetting):
def __init__(self, PROP_SETTING: FACTOR_FROM_REPORT_PROP_SETTING):
super().__init__(PROP_SETTING=PROP_SETTING)
self.judge_pdf_data_items = json.load(open(PROP_SETTING.report_result_json_file_path, "r"))
self.pdf_file_index = 0
self.valid_pdf_file_count = 0
self.current_loop_hypothesis = None
self.current_loop_exp = None
self.steps = ["propose_hypo_exp", "propose", "exp_gen", "coding", "running", "feedback"]

def propose_hypo_exp(self, prev_out: dict[str, Any]):
with logger.tag("r"):
Expand All @@ -109,8 +125,8 @@ def propose_hypo_exp(self, prev_out: dict[str, Any]):
continue
self.valid_pdf_file_count += 1
exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]]
exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_FROM_REPORT_PROP_SETTING.max_factor_per_report]
exp.sub_tasks = exp.sub_tasks[: FACTOR_FROM_REPORT_PROP_SETTING.max_factor_per_report]
exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_FROM_REPORT_PROP_SETTING.max_factors_per_exp]
exp.sub_tasks = exp.sub_tasks[: FACTOR_FROM_REPORT_PROP_SETTING.max_factors_per_exp]
logger.log_object(hypothesis, tag="hypothesis generation")
logger.log_object(exp.sub_tasks, tag="experiment generation")
self.current_loop_hypothesis = hypothesis
Expand All @@ -130,7 +146,7 @@ def main(path=None, step_n=None):
.. code-block:: python
dotenv run -- python rdagent/app/qlib_rd_loop/factor_from_report_sh.py $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional parameter
dotenv run -- python rdagent/app/qlib_rd_loop/factor_from_report_w_sc.py $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional parameter
"""
if path is None:
Expand Down
20 changes: 18 additions & 2 deletions rdagent/utils/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@ class LoopMeta(type):
@staticmethod
def _get_steps(bases):
"""
get all the `steps` of base classes and combine them to a single one.
Recursively get all the `steps` from the base classes and combine them into a single list.
Args:
bases (tuple): A tuple of base classes.
Returns:
List[Callable]: A list of steps combined from all base classes.
"""
steps = []
for base in bases:
Expand All @@ -34,7 +40,17 @@ def _get_steps(bases):
return steps

def __new__(cls, clsname, bases, attrs):
# move custommized steps into steps
"""
Create a new class with combined steps from base classes and current class.
Args:
clsname (str): Name of the new class.
bases (tuple): Base classes.
attrs (dict): Attributes of the new class.
Returns:
LoopMeta: A new instance of LoopMeta.
"""
steps = LoopMeta._get_steps(bases) # all the base classes of parents
for name, attr in attrs.items():
if not name.startswith("__") and isinstance(attr, Callable):
Expand Down

0 comments on commit f8f1445

Please sign in to comment.