Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: some small refinement #156

Merged
merged 1 commit into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rdagent/app/qlib_rd_loop/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Config:
# 2) sub task specific:
origin_report_path: str = "data/report_origin"
local_report_path: str = "data/report"
report_result_json_file_path: str = "git_ignore_folder/res_dict.csv"
report_result_json_file_path: str = "git_ignore_folder/report_list.json"
progress_file_path: str = "git_ignore_folder/progress.pkl"
report_extract_result: str = "git_ignore_folder/hypo_exp_cache.pkl"
max_factor_per_report: int = 10000
Expand Down
34 changes: 7 additions & 27 deletions rdagent/app/qlib_rd_loop/factor_from_report_sh.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
# TODO: we should have more advanced mechanism to handle such requirements for saving sessions.
import csv
import json
import pickle
from pathlib import Path
from typing import Any
from typing import Any, Tuple

import fire
import pandas as pd
from dotenv import load_dotenv
from jinja2 import Environment, StrictUndefined

from rdagent.app.qlib_rd_loop.conf import FACTOR_PROP_SETTING
Expand All @@ -16,36 +12,23 @@
load_and_process_pdfs_by_langchain,
)
from rdagent.components.workflow.conf import BasePropSetting
from rdagent.components.workflow.rd_loop import RDLoop
from rdagent.core.developer import Developer
from rdagent.core.exception import FactorEmptyError
from rdagent.core.prompts import Prompts
from rdagent.core.proposal import (
Hypothesis,
Hypothesis2Experiment,
HypothesisExperiment2Feedback,
HypothesisGen,
Trace,
)
from rdagent.core.proposal import Hypothesis, HypothesisExperiment2Feedback, Trace
from rdagent.core.scenario import Scenario
from rdagent.core.utils import import_class
from rdagent.log import rdagent_logger as logger
from rdagent.oai.llm_utils import APIBackend
from rdagent.scenarios.qlib.developer.factor_coder import QlibFactorCoSTEER
from rdagent.scenarios.qlib.experiment.factor_experiment import (
QlibFactorExperiment,
QlibFactorScenario,
)
from rdagent.scenarios.qlib.factor_experiment_loader.pdf_loader import (
FactorExperimentLoaderFromPDFfiles,
classify_report_from_dict,
)
from rdagent.utils.workflow import LoopBase, LoopMeta

with open(FACTOR_PROP_SETTING.report_result_json_file_path, "r") as input_file:
csv_reader = csv.reader(input_file)
judge_pdf_data = [row[0] for row in csv_reader]

prompts_path = Path(__file__).parent / "prompts.yaml"
prompts = Prompts(file_path=prompts_path)

Expand Down Expand Up @@ -78,7 +61,7 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str:
)


def extract_factors_and_implement(report_file_path: str) -> tuple:
def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Tuple[QlibFactorExperiment, Hypothesis]:
scenario = QlibFactorScenario()

with logger.tag("extract_factors_and_implement"):
Expand Down Expand Up @@ -121,21 +104,18 @@ def __init__(self, PROP_SETTING: BasePropSetting):
self.summarizer: HypothesisExperiment2Feedback = import_class(PROP_SETTING.summarizer)(scen)
self.trace = Trace(scen=scen)

self.judge_pdf_data_items = judge_pdf_data
self.judge_pdf_data_items = json.load(open(FACTOR_PROP_SETTING.report_result_json_file_path, "r"))
self.pdf_file_index = 0
self.hypo_exp_cache = (
pickle.load(open(FACTOR_PROP_SETTING.report_extract_result, "rb"))
if Path(FACTOR_PROP_SETTING.report_extract_result).exists()
else {}
)
super().__init__()

def propose_hypo_exp(self, prev_out: dict[str, Any]):
with logger.tag("r"):
while True:
if self.pdf_file_index > 100:
break
report_file_path = self.judge_pdf_data_items[self.pdf_file_index]
self.pdf_file_index += 1
exp, hypothesis = extract_factors_and_implement(str(report_file_path))
exp, hypothesis = extract_hypothesis_and_exp_from_reports(str(report_file_path))
if exp is None:
continue
exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]]
Expand Down
30 changes: 13 additions & 17 deletions rdagent/components/coder/factor_coder/CoSTEER/evolving_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,20 @@ def evolve(

if FACTOR_IMPLEMENT_SETTINGS.select_threshold < len(to_be_finished_task_index):
# Select a fixed number of factors if the total exceeds the threshold
implementation_factors_per_round = FACTOR_IMPLEMENT_SETTINGS.select_threshold
else:
implementation_factors_per_round = len(to_be_finished_task_index)

if FACTOR_IMPLEMENT_SETTINGS.select_method == "random":
to_be_finished_task_index = RandomSelect(
to_be_finished_task_index,
implementation_factors_per_round,
)
if FACTOR_IMPLEMENT_SETTINGS.select_method == "random":
to_be_finished_task_index = RandomSelect(
to_be_finished_task_index,
FACTOR_IMPLEMENT_SETTINGS.select_threshold,
)

if FACTOR_IMPLEMENT_SETTINGS.select_method == "scheduler":
to_be_finished_task_index = LLMSelect(
to_be_finished_task_index,
implementation_factors_per_round,
evo,
queried_knowledge.former_traces,
self.scen,
)
if FACTOR_IMPLEMENT_SETTINGS.select_method == "scheduler":
to_be_finished_task_index = LLMSelect(
to_be_finished_task_index,
FACTOR_IMPLEMENT_SETTINGS.select_threshold,
evo,
queried_knowledge.former_traces,
self.scen,
)

result = multiprocessing_wrapper(
[
Expand Down
Loading