fix: fix some small bugs in report-factor loop (#152)

* Init todo * update all code * update * Extract factors from financial reports loop finished * Fix two small bugs. * Delete rdagent/app/qlib_rd_loop/run_script.sh * Minor mod * Delete rdagent/app/qlib_rd_loop/nohup.out * Fix a small bug in file reading. * some updates * Update the detailed process and prompt of factor loop. * Evaluation & dataset * Optimize the prompt for generating hypotheses and feedback in the factor loop. * Generate new data * dataset generation * Performed further optimizations on the factor loop and report extraction loop, added log handling for both processes, and implemented a screenshot feature for report extraction. * Update rdagent/components/coder/factor_coder/CoSTEER/evaluators.py * Update package.txt for fitz. * add the result * Performed further optimizations on the factor loop and report extraction loop, added log handling for both processes, and implemented a screenshot feature for report extraction. (#100) (#102) - Performed further optimizations on the factor loop and report extraction loop. - Added log handling for both processes. - Implemented a screenshot feature for report extraction. * Analysis * Optimized log output. * Factor update * A draft of the "Quick Start" section for README * Add scenario descriptions. * Updates * Adjust content * Enable logging of backtesting in Qlib and store rich-text descriptions in Trace. Support one-step debugging for factor extraction. * Reformat analysis.py * CI fix * Refactor * remove useless code * fix bugs (#111) * Fix two small bugs. * Fix a merge bug. * Fix two small bugs. * fix some bugs. * Fix some format bugs. * Restore a file. * Fix a format bug. * draft renew of evaluators * fix a small bug. * fix a small bug * Support Factor Report Loop * Update framework for extracting factors from research reports. * Refactor report-based factor extraction and fix minor bugs. * fix a small bug of log. * change some prompts * improve factor_runner * fix a small bug * change some prompts * cancel some comments * cancel some comments and fix some bugs * fix some bugs in factor from reports loop --------- Co-authored-by: Young <[email protected]> Co-authored-by: you-n-g <[email protected]> Co-authored-by: Taozhi Wang <[email protected]> Co-authored-by: Suhan Cui <[email protected]>
microsoft · Aug 2, 2024 · a79f9f9 · a79f9f9
1 parent 529f935
commit a79f9f9
Show file tree

Hide file tree

Showing 4 changed files with 19 additions and 31 deletions.
diff --git a/rdagent/app/qlib_rd_loop/factor_from_report_sh.py b/rdagent/app/qlib_rd_loop/factor_from_report_sh.py
@@ -67,11 +67,15 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str:
     )
 
     response_json = json.loads(response)
-    hypothesis_text = response_json.get("hypothesis", "No hypothesis generated.")
-    reason_text = response_json.get("reason", "No reason provided.")
-    concise_reason_text = response_json.get("concise_reason", "No concise reason provided.")
 
-    return Hypothesis(hypothesis=hypothesis_text, reason=reason_text, concise_reason=concise_reason_text)
+    return Hypothesis(
+        hypothesis=response_json.get("hypothesis", "No hypothesis provided"),
+        reason=response_json.get("reason", "No reason provided"),
+        concise_reason=response_json.get("concise_reason", "No concise reason provided"),
+        concise_observation=response_json.get("concise_observation", "No concise observation provided"),
+        concise_justification=response_json.get("concise_justification", "No concise justification provided"),
+        concise_knowledge=response_json.get("concise_knowledge", "No concise knowledge provided"),
+    )
 
 
 def extract_factors_and_implement(report_file_path: str) -> tuple:
@@ -118,7 +122,7 @@ def __init__(self, PROP_SETTING: BasePropSetting):
         self.trace = Trace(scen=scen)
 
         self.judge_pdf_data_items = judge_pdf_data
-        self.index = 0
+        self.pdf_file_index = 0
         self.hypo_exp_cache = (
             pickle.load(open(FACTOR_PROP_SETTING.report_extract_result, "rb"))
             if Path(FACTOR_PROP_SETTING.report_extract_result).exists()
@@ -129,28 +133,12 @@ def __init__(self, PROP_SETTING: BasePropSetting):
     def propose_hypo_exp(self, prev_out: dict[str, Any]):
         with logger.tag("r"):
             while True:
-                if self.index > 100:
-                    break
-                report_file_path = self.judge_pdf_data_items[self.index]
-                self.index += 1
-                if report_file_path in self.hypo_exp_cache:
-                    hypothesis, exp = self.hypo_exp_cache[report_file_path]
-                    exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [
-                        t[1] for t in self.trace.hist if t[2]
-                    ]
-                else:
+                report_file_path = self.judge_pdf_data_items[self.pdf_file_index]
+                self.pdf_file_index += 1
+                exp, hypothesis = extract_factors_and_implement(str(report_file_path))
+                if exp is None:
                     continue
-                # else:
-                #     exp, hypothesis = extract_factors_and_implement(str(report_file_path))
-                #     if exp is None:
-                #         continue
-                #     exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]]
-                #     self.hypo_exp_cache[report_file_path] = (hypothesis, exp)
-                #     pickle.dump(self.hypo_exp_cache, open(FACTOR_PROP_SETTING.report_extract_result, "wb"))
-                with logger.tag("extract_factors_and_implement"):
-                    with logger.tag("load_pdf_screenshot"):
-                        pdf_screenshot = extract_first_page_screenshot_from_pdf(report_file_path)
-                        logger.log_object(pdf_screenshot)
+                exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]]
                 exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_PROP_SETTING.max_factor_per_report]
                 exp.sub_tasks = exp.sub_tasks[: FACTOR_PROP_SETTING.max_factor_per_report]
                 logger.log_object(hypothesis, tag="hypothesis generation")

diff --git a/rdagent/app/qlib_rd_loop/prompts.yaml b/rdagent/app/qlib_rd_loop/prompts.yaml
@@ -5,7 +5,10 @@ hypothesis_generation:
     {
       "hypothesis": "A clear and concise hypothesis based on the provided information.",
       "reason": "A detailed explanation supporting the generated hypothesis.",
-      "concise_reason": One line summary that focuses on the justification for the change that leads to the hypothesis (like a part of a knowledge that we are building)
+      "concise_reason": "One line summary that focuses on the justification for the change that leads to the hypothesis (like a part of a knowledge that we are building)",
+      "concise_observation": "One line summary. It focuses on the observation of the given scenario, data characteristics, or previous experiences (failures & succeses).",
+      "concise_justification": "One line summary. It focuses on the justification for the change in new hypothesis and the route of exploration supporting the growth of the hypothesis, based on the observation. ",
+      "concise_knowledge": "One line summary. It focuses on a transferable knowledege that comes with the new hypothesis. Use conditional grammar. eg. "If...., ..; When..., .; and etc"
     }
 
   user: |-

diff --git a/rdagent/scenarios/qlib/developer/factor_runner.py b/rdagent/scenarios/qlib/developer/factor_runner.py
@@ -152,5 +152,4 @@ def process_factor_data(self, exp_or_list: List[QlibFactorExperiment] | QlibFact
         if factor_dfs:
             return pd.concat(factor_dfs, axis=1)
         else:
-            logger.error("No valid factor data found to merge.")
-            return pd.DataFrame()  # Return an empty DataFrame if no valid data
+            raise FactorEmptyError("No valid factor data found to merge.")
diff --git a/rdagent/scenarios/qlib/developer/feedback.py b/rdagent/scenarios/qlib/developer/feedback.py
@@ -97,8 +97,6 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
             .render(
                 hypothesis_text=hypothesis_text,
                 task_details=tasks_factors,
-                # current_result=current_result,
-                # sota_result=sota_result,
                 combined_result=combined_result,
             )
         )