microsoft · WinstonLiyt · Jul 29, 2024 · Jul 17, 2024 · Jul 18, 2024 · Jul 18, 2024
diff --git a/rdagent/app/qlib_rd_loop/factor_from_report_sh.py b/rdagent/app/qlib_rd_loop/factor_from_report_sh.py
@@ -7,7 +7,7 @@
 from dotenv import load_dotenv
 from jinja2 import Environment, StrictUndefined
 
-from rdagent.app.qlib_rd_loop.conf import PROP_SETTING
+from rdagent.app.qlib_rd_loop.conf import FACTOR_PROP_SETTING
 from rdagent.components.document_reader.document_reader import (
     extract_first_page_screenshot_from_pdf,
     load_and_process_pdfs_by_langchain,
@@ -37,33 +37,33 @@
 
 assert load_dotenv()
 
-scen: Scenario = import_class(PROP_SETTING.factor_scen)()
+scen: Scenario = import_class(FACTOR_PROP_SETTING.scen)()
 
-hypothesis_gen: HypothesisGen = import_class(PROP_SETTING.factor_hypothesis_gen)(scen)
+hypothesis_gen: HypothesisGen = import_class(FACTOR_PROP_SETTING.hypothesis_gen)(scen)
 
-hypothesis2experiment: Hypothesis2Experiment = import_class(PROP_SETTING.factor_hypothesis2experiment)()
+hypothesis2experiment: Hypothesis2Experiment = import_class(FACTOR_PROP_SETTING.hypothesis2experiment)()
 
-qlib_factor_coder: Developer = import_class(PROP_SETTING.factor_coder)(scen)
+qlib_factor_coder: Developer = import_class(FACTOR_PROP_SETTING.coder)(scen)
 
-qlib_factor_runner: Developer = import_class(PROP_SETTING.factor_runner)(scen)
+qlib_factor_runner: Developer = import_class(FACTOR_PROP_SETTING.runner)(scen)
 
-qlib_factor_summarizer: HypothesisExperiment2Feedback = import_class(PROP_SETTING.factor_summarizer)(scen)
+qlib_factor_summarizer: HypothesisExperiment2Feedback = import_class(FACTOR_PROP_SETTING.summarizer)(scen)
 
-with open(PROP_SETTING.report_result_json_file_path, "r") as f:
+with open(FACTOR_PROP_SETTING.report_result_json_file_path, "r") as f:
     judge_pdf_data = json.load(f)
 
 prompts_path = Path(__file__).parent / "prompts.yaml"
 prompts = Prompts(file_path=prompts_path)
 
 
 def save_progress(trace, current_index):
-    with open(PROP_SETTING.progress_file_path, "wb") as f:
+    with open(FACTOR_PROP_SETTING.progress_file_path, "wb") as f:
         pickle.dump((trace, current_index), f)
 
 
 def load_progress():
-    if Path(PROP_SETTING.progress_file_path).exists():
-        with open(PROP_SETTING.progress_file_path, "rb") as f:
+    if Path(FACTOR_PROP_SETTING.progress_file_path).exists():
+        with open(FACTOR_PROP_SETTING.progress_file_path, "rb") as f:
             return pickle.load(f)
     return Trace(scen=scen), 0
 
@@ -87,8 +87,9 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str:
     response_json = json.loads(response)
     hypothesis_text = response_json.get("hypothesis", "No hypothesis generated.")
     reason_text = response_json.get("reason", "No reason provided.")
+    concise_reason_text = response_json.get("concise_reason", "No concise reason provided.")
 
-    return Hypothesis(hypothesis=hypothesis_text, reason=reason_text)
+    return Hypothesis(hypothesis=hypothesis_text, reason=reason_text, concise_reason=concise_reason_text)
 
 
 def extract_factors_and_implement(report_file_path: str) -> tuple:
@@ -131,7 +132,9 @@ def extract_factors_and_implement(report_file_path: str) -> tuple:
             break
         file_path, attributes = judge_pdf_data_items[index]
         if attributes["class"] == 1:
-            report_file_path = Path(file_path.replace(PROP_SETTING.origin_report_path, PROP_SETTING.local_report_path))
+            report_file_path = Path(
+                file_path.replace(FACTOR_PROP_SETTING.origin_report_path, FACTOR_PROP_SETTING.local_report_path)
+            )
             if report_file_path.exists():
                 logger.info(f"Processing {report_file_path}")
 

diff --git a/rdagent/app/qlib_rd_loop/factor_w_sc.py b/rdagent/app/qlib_rd_loop/factor_w_sc.py
@@ -15,13 +15,13 @@
 class FactorRDLoop(RDLoop):
     skip_loop_error = (FactorEmptyError,)
 
-    def exp_gen(self, prev_out: dict[str, Any]):
-        with logger.tag("r"):  # research
-            exp = self.hypothesis2experiment.convert(prev_out["propose"], self.trace)
+    def running(self, prev_out: dict[str, Any]):
+        with logger.tag("ef"):  # evaluate and feedback
+            exp = self.runner.develop(prev_out["coding"])
             if exp is None:
                 logger.error(f"Factor extraction failed.")
                 raise FactorEmptyError("Factor extraction failed.")
-            logger.log_object(exp.sub_tasks, tag="experiment generation")
+            logger.log_object(exp, tag="runner result")
         return exp
 
 

diff --git a/rdagent/app/qlib_rd_loop/prompts.yaml b/rdagent/app/qlib_rd_loop/prompts.yaml
@@ -4,7 +4,8 @@ hypothesis_generation:
     Please ensure your response is in JSON format as shown below:
     {
       "hypothesis": "A clear and concise hypothesis based on the provided information.",
-      "reason": "A detailed explanation supporting the generated hypothesis."
+      "reason": "A detailed explanation supporting the generated hypothesis.",
+      "concise_reason": One line summary that focuses on the justification for the change that leads to the hypothesis (like a part of a knowledge that we are building)
     }
 
   user: |-

diff --git a/rdagent/components/benchmark/eval_method.py b/rdagent/components/benchmark/eval_method.py
@@ -19,14 +19,14 @@
 from rdagent.components.coder.factor_coder.factor import FactorFBWorkspace
 from rdagent.core.conf import RD_AGENT_SETTINGS
 from rdagent.core.developer import Developer
-from rdagent.core.exception import CoderException, RunnerException
+from rdagent.core.exception import CoderError
 from rdagent.core.experiment import Task, Workspace
 from rdagent.core.scenario import Scenario
 from rdagent.core.utils import multiprocessing_wrapper
 
 EVAL_RES = Dict[
     str,
-    List[Tuple[FactorEvaluator, Union[object, RunnerException]]],
+    List[Tuple[FactorEvaluator, Union[object, CoderError]]],
 ]
 
 

diff --git a/rdagent/scenarios/data_mining/proposal/model_proposal.py b/rdagent/scenarios/data_mining/proposal/model_proposal.py
@@ -50,7 +50,11 @@ def prepare_context(self, trace: Trace) -> Tuple[dict, bool]:
 
     def convert_response(self, response: str) -> ModelHypothesis:
         response_dict = json.loads(response)
-        hypothesis = DMModelHypothesis(hypothesis=response_dict["hypothesis"], reason=response_dict["reason"], concise_reason=response_dict["concise_reason"])
+        hypothesis = DMModelHypothesis(
+            hypothesis=response_dict["hypothesis"],
+            reason=response_dict["reason"],
+            concise_reason=response_dict["concise_reason"],
+        )
         return hypothesis
 
 

diff --git a/rdagent/scenarios/qlib/prompts.yaml b/rdagent/scenarios/qlib/prompts.yaml
@@ -43,21 +43,6 @@ model_hypothesis_specification: |-
 
     6th Round Hypothesis (If fourth round didn't work):  The model should be a CNN. The CNN should have 5 convolutional layers. Use Leaky ReLU activation for all layers. Use dropout regularization with a rate of 0.3. (Reasoning: As regularisation rate of 0.5 didn't work, we only change a new regularisation and keep the other elements that worked. This means making changes in the current level.)    
 
-factor_hypothesis_specification: |-
-  Additional Specifications:
-    Hypotheses should grow and evolve based on the previous hypothesis. If there is no previous hypothesis, start with something simple. Gradually build up upon previous hypotheses and feedback.
-    Ensure that the hypothesis focuses on the creation and selection of factors in quantitative finance. Each hypothesis should address specific factor characteristics such as type (momentum, value, quality), calculation methods, or inclusion criteria. Avoid hypotheses related to model architecture or optimization processes.
-
-  Sample Hypotheses (Only learn from the format as these are not the knowledge):
-    - "Include a momentum factor based on the last 12 months' returns."
-    - "Add a value factor calculated as the book-to-market ratio."
-    - "Incorporate a quality factor derived from return on equity (ROE)."
-    - "Use a volatility factor based on the standard deviation of returns over the past 6 months."
-    - "Include a sentiment factor derived from news sentiment scores."
-    - "The momentum factor should be calculated using a 6-month look-back period."
-    - "Combine value and momentum factors using a weighted average approach."
-    - "Filter stocks by market capitalization before calculating the factors."
-
 factor_hypothesis_specification: |-
   Specifications:
     - Hypotheses should grow and evolve based on the previous hypothesis. If there is no previous hypothesis, start with something simple.