Skip to content

Commit

Permalink
some small refinement (#156)
Browse files Browse the repository at this point in the history
  • Loading branch information
peteryang1 authored Aug 2, 2024
1 parent d8da7db commit 94cf2bb
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 45 deletions.
2 changes: 1 addition & 1 deletion rdagent/app/qlib_rd_loop/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Config:
# 2) sub task specific:
origin_report_path: str = "data/report_origin"
local_report_path: str = "data/report"
report_result_json_file_path: str = "git_ignore_folder/res_dict.csv"
report_result_json_file_path: str = "git_ignore_folder/report_list.json"
progress_file_path: str = "git_ignore_folder/progress.pkl"
report_extract_result: str = "git_ignore_folder/hypo_exp_cache.pkl"
max_factor_per_report: int = 10000
Expand Down
34 changes: 7 additions & 27 deletions rdagent/app/qlib_rd_loop/factor_from_report_sh.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
# TODO: we should have more advanced mechanism to handle such requirements for saving sessions.
import csv
import json
import pickle
from pathlib import Path
from typing import Any
from typing import Any, Tuple

import fire
import pandas as pd
from dotenv import load_dotenv
from jinja2 import Environment, StrictUndefined

from rdagent.app.qlib_rd_loop.conf import FACTOR_PROP_SETTING
Expand All @@ -16,36 +12,23 @@
load_and_process_pdfs_by_langchain,
)
from rdagent.components.workflow.conf import BasePropSetting
from rdagent.components.workflow.rd_loop import RDLoop
from rdagent.core.developer import Developer
from rdagent.core.exception import FactorEmptyError
from rdagent.core.prompts import Prompts
from rdagent.core.proposal import (
Hypothesis,
Hypothesis2Experiment,
HypothesisExperiment2Feedback,
HypothesisGen,
Trace,
)
from rdagent.core.proposal import Hypothesis, HypothesisExperiment2Feedback, Trace
from rdagent.core.scenario import Scenario
from rdagent.core.utils import import_class
from rdagent.log import rdagent_logger as logger
from rdagent.oai.llm_utils import APIBackend
from rdagent.scenarios.qlib.developer.factor_coder import QlibFactorCoSTEER
from rdagent.scenarios.qlib.experiment.factor_experiment import (
QlibFactorExperiment,
QlibFactorScenario,
)
from rdagent.scenarios.qlib.factor_experiment_loader.pdf_loader import (
FactorExperimentLoaderFromPDFfiles,
classify_report_from_dict,
)
from rdagent.utils.workflow import LoopBase, LoopMeta

with open(FACTOR_PROP_SETTING.report_result_json_file_path, "r") as input_file:
csv_reader = csv.reader(input_file)
judge_pdf_data = [row[0] for row in csv_reader]

prompts_path = Path(__file__).parent / "prompts.yaml"
prompts = Prompts(file_path=prompts_path)

Expand Down Expand Up @@ -78,7 +61,7 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str:
)


def extract_factors_and_implement(report_file_path: str) -> tuple:
def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Tuple[QlibFactorExperiment, Hypothesis]:
scenario = QlibFactorScenario()

with logger.tag("extract_factors_and_implement"):
Expand Down Expand Up @@ -121,21 +104,18 @@ def __init__(self, PROP_SETTING: BasePropSetting):
self.summarizer: HypothesisExperiment2Feedback = import_class(PROP_SETTING.summarizer)(scen)
self.trace = Trace(scen=scen)

self.judge_pdf_data_items = judge_pdf_data
self.judge_pdf_data_items = json.load(open(FACTOR_PROP_SETTING.report_result_json_file_path, "r"))
self.pdf_file_index = 0
self.hypo_exp_cache = (
pickle.load(open(FACTOR_PROP_SETTING.report_extract_result, "rb"))
if Path(FACTOR_PROP_SETTING.report_extract_result).exists()
else {}
)
super().__init__()

def propose_hypo_exp(self, prev_out: dict[str, Any]):
with logger.tag("r"):
while True:
if self.pdf_file_index > 100:
break
report_file_path = self.judge_pdf_data_items[self.pdf_file_index]
self.pdf_file_index += 1
exp, hypothesis = extract_factors_and_implement(str(report_file_path))
exp, hypothesis = extract_hypothesis_and_exp_from_reports(str(report_file_path))
if exp is None:
continue
exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]]
Expand Down
30 changes: 13 additions & 17 deletions rdagent/components/coder/factor_coder/CoSTEER/evolving_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,20 @@ def evolve(

if FACTOR_IMPLEMENT_SETTINGS.select_threshold < len(to_be_finished_task_index):
# Select a fixed number of factors if the total exceeds the threshold
implementation_factors_per_round = FACTOR_IMPLEMENT_SETTINGS.select_threshold
else:
implementation_factors_per_round = len(to_be_finished_task_index)

if FACTOR_IMPLEMENT_SETTINGS.select_method == "random":
to_be_finished_task_index = RandomSelect(
to_be_finished_task_index,
implementation_factors_per_round,
)
if FACTOR_IMPLEMENT_SETTINGS.select_method == "random":
to_be_finished_task_index = RandomSelect(
to_be_finished_task_index,
FACTOR_IMPLEMENT_SETTINGS.select_threshold,
)

if FACTOR_IMPLEMENT_SETTINGS.select_method == "scheduler":
to_be_finished_task_index = LLMSelect(
to_be_finished_task_index,
implementation_factors_per_round,
evo,
queried_knowledge.former_traces,
self.scen,
)
if FACTOR_IMPLEMENT_SETTINGS.select_method == "scheduler":
to_be_finished_task_index = LLMSelect(
to_be_finished_task_index,
FACTOR_IMPLEMENT_SETTINGS.select_threshold,
evo,
queried_knowledge.former_traces,
self.scen,
)

result = multiprocessing_wrapper(
[
Expand Down

0 comments on commit 94cf2bb

Please sign in to comment.