diff --git a/rdagent/app/qlib_rd_loop/factor_from_report_sh.py b/rdagent/app/qlib_rd_loop/factor_from_report_sh.py index a2d57dfc..1b57ae7b 100644 --- a/rdagent/app/qlib_rd_loop/factor_from_report_sh.py +++ b/rdagent/app/qlib_rd_loop/factor_from_report_sh.py @@ -67,11 +67,15 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str: ) response_json = json.loads(response) - hypothesis_text = response_json.get("hypothesis", "No hypothesis generated.") - reason_text = response_json.get("reason", "No reason provided.") - concise_reason_text = response_json.get("concise_reason", "No concise reason provided.") - return Hypothesis(hypothesis=hypothesis_text, reason=reason_text, concise_reason=concise_reason_text) + return Hypothesis( + hypothesis=response_json.get("hypothesis", "No hypothesis provided"), + reason=response_json.get("reason", "No reason provided"), + concise_reason=response_json.get("concise_reason", "No concise reason provided"), + concise_observation=response_json.get("concise_observation", "No concise observation provided"), + concise_justification=response_json.get("concise_justification", "No concise justification provided"), + concise_knowledge=response_json.get("concise_knowledge", "No concise knowledge provided"), + ) def extract_factors_and_implement(report_file_path: str) -> tuple: @@ -118,7 +122,7 @@ def __init__(self, PROP_SETTING: BasePropSetting): self.trace = Trace(scen=scen) self.judge_pdf_data_items = judge_pdf_data - self.index = 0 + self.pdf_file_index = 0 self.hypo_exp_cache = ( pickle.load(open(FACTOR_PROP_SETTING.report_extract_result, "rb")) if Path(FACTOR_PROP_SETTING.report_extract_result).exists() @@ -129,28 +133,12 @@ def __init__(self, PROP_SETTING: BasePropSetting): def propose_hypo_exp(self, prev_out: dict[str, Any]): with logger.tag("r"): while True: - if self.index > 100: - break - report_file_path = self.judge_pdf_data_items[self.index] - self.index += 1 - if report_file_path in self.hypo_exp_cache: - hypothesis, exp = self.hypo_exp_cache[report_file_path] - exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [ - t[1] for t in self.trace.hist if t[2] - ] - else: + report_file_path = self.judge_pdf_data_items[self.pdf_file_index] + self.pdf_file_index += 1 + exp, hypothesis = extract_factors_and_implement(str(report_file_path)) + if exp is None: continue - # else: - # exp, hypothesis = extract_factors_and_implement(str(report_file_path)) - # if exp is None: - # continue - # exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]] - # self.hypo_exp_cache[report_file_path] = (hypothesis, exp) - # pickle.dump(self.hypo_exp_cache, open(FACTOR_PROP_SETTING.report_extract_result, "wb")) - with logger.tag("extract_factors_and_implement"): - with logger.tag("load_pdf_screenshot"): - pdf_screenshot = extract_first_page_screenshot_from_pdf(report_file_path) - logger.log_object(pdf_screenshot) + exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]] exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_PROP_SETTING.max_factor_per_report] exp.sub_tasks = exp.sub_tasks[: FACTOR_PROP_SETTING.max_factor_per_report] logger.log_object(hypothesis, tag="hypothesis generation") diff --git a/rdagent/app/qlib_rd_loop/prompts.yaml b/rdagent/app/qlib_rd_loop/prompts.yaml index 2861ce1b..a39c3174 100644 --- a/rdagent/app/qlib_rd_loop/prompts.yaml +++ b/rdagent/app/qlib_rd_loop/prompts.yaml @@ -5,7 +5,10 @@ hypothesis_generation: { "hypothesis": "A clear and concise hypothesis based on the provided information.", "reason": "A detailed explanation supporting the generated hypothesis.", - "concise_reason": One line summary that focuses on the justification for the change that leads to the hypothesis (like a part of a knowledge that we are building) + "concise_reason": "One line summary that focuses on the justification for the change that leads to the hypothesis (like a part of a knowledge that we are building)", + "concise_observation": "One line summary. It focuses on the observation of the given scenario, data characteristics, or previous experiences (failures & succeses).", + "concise_justification": "One line summary. It focuses on the justification for the change in new hypothesis and the route of exploration supporting the growth of the hypothesis, based on the observation. ", + "concise_knowledge": "One line summary. It focuses on a transferable knowledege that comes with the new hypothesis. Use conditional grammar. eg. "If...., ..; When..., .; and etc" } user: |- diff --git a/rdagent/scenarios/qlib/developer/factor_runner.py b/rdagent/scenarios/qlib/developer/factor_runner.py index 49223695..c060e51f 100644 --- a/rdagent/scenarios/qlib/developer/factor_runner.py +++ b/rdagent/scenarios/qlib/developer/factor_runner.py @@ -152,5 +152,4 @@ def process_factor_data(self, exp_or_list: List[QlibFactorExperiment] | QlibFact if factor_dfs: return pd.concat(factor_dfs, axis=1) else: - logger.error("No valid factor data found to merge.") - return pd.DataFrame() # Return an empty DataFrame if no valid data + raise FactorEmptyError("No valid factor data found to merge.") diff --git a/rdagent/scenarios/qlib/developer/feedback.py b/rdagent/scenarios/qlib/developer/feedback.py index c0edfa24..48421032 100644 --- a/rdagent/scenarios/qlib/developer/feedback.py +++ b/rdagent/scenarios/qlib/developer/feedback.py @@ -97,8 +97,6 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac .render( hypothesis_text=hypothesis_text, task_details=tasks_factors, - # current_result=current_result, - # sota_result=sota_result, combined_result=combined_result, ) )