From deeb83d1d3c84b58defc80fe7008c15d14c6e7cc Mon Sep 17 00:00:00 2001
From: Xisen Wang <118058822+Xisen-Wang@users.noreply.github.com>
Date: Tue, 9 Jul 2024 17:31:35 +0800
Subject: [PATCH] refine core to store experiment results and hypothesis
 feedback (#55)

* Update proposal.py

Completed The HypothesisFeedback Class.

* refine the core code

---------

Co-authored-by: xuyang1 <xuyang1@microsoft.com>
---
 rdagent/app/qlib_rd_loop/conf.py              | 18 +++++++--
 rdagent/app/qlib_rd_loop/factor.py            | 17 ++++----
 rdagent/app/qlib_rd_loop/model.py             | 40 ++++++++-----------
 .../components/proposal/factor_proposal.py    | 22 +++++-----
 rdagent/components/proposal/prompts.yaml      |  9 +++--
 rdagent/core/experiment.py                    |  2 +
 rdagent/core/proposal.py                      | 35 ++++++++--------
 rdagent/scenarios/qlib/factor_proposal.py     | 17 ++++----
 .../scenarios/qlib/task_generator/feedback.py |  5 +--
 9 files changed, 85 insertions(+), 80 deletions(-)

diff --git a/rdagent/app/qlib_rd_loop/conf.py b/rdagent/app/qlib_rd_loop/conf.py
index 6db7c1c3..7ae8fc8d 100644
--- a/rdagent/app/qlib_rd_loop/conf.py
+++ b/rdagent/app/qlib_rd_loop/conf.py
@@ -4,12 +4,22 @@
 class PropSetting(BaseSettings):
     """"""
 
-    scen: str = "rdagent.scenarios.qlib.experiment.factor_experiment.QlibFactorScenario"
-    hypothesis_gen: str = "rdagent.scenarios.qlib.factor_proposal.QlibFactorHypothesisGen"
-    hypothesis2experiment: str = "rdagent.scenarios.qlib.factor_proposal.QlibFactorHypothesis2Experiment"
+    qlib_factor_scen: str = "rdagent.scenarios.qlib.experiment.factor_experiment.QlibFactorScenario"
+    qlib_factor_hypothesis_gen: str = "rdagent.scenarios.qlib.factor_proposal.QlibFactorHypothesisGen"
+    qlib_factor_hypothesis2experiment: str = "rdagent.scenarios.qlib.factor_proposal.QlibFactorHypothesis2Experiment"
     qlib_factor_coder: str = "rdagent.scenarios.qlib.factor_task_implementation.QlibFactorCoSTEER"
     qlib_factor_runner: str = "rdagent.scenarios.qlib.task_generator.data.QlibFactorRunner"
-    qlib_factor_summarizer: str = "rdagent.scenarios.qlib.task_generator.feedback.QlibFactorExperiment2Feedback"
+    qlib_factor_summarizer: str = (
+        "rdagent.scenarios.qlib.task_generator.feedback.QlibFactorHypothesisExperiment2Feedback"
+    )
+
+    # TODO: model part is not finished yet
+    qlib_model_scen: str = ""
+    qlib_model_hypothesis_gen: str = ""
+    qlib_model_hypothesis2experiment: str = ""
+    qlib_model_coder: str = ""
+    qlib_model_runner: str = ""
+    qlib_model_summarizer: str = ""
 
     evolving_n: int = 10
 
diff --git a/rdagent/app/qlib_rd_loop/factor.py b/rdagent/app/qlib_rd_loop/factor.py
index b6bb0599..2a790597 100644
--- a/rdagent/app/qlib_rd_loop/factor.py
+++ b/rdagent/app/qlib_rd_loop/factor.py
@@ -6,37 +6,34 @@
 
 load_dotenv(override=True)
 
-# import_from
 from rdagent.app.qlib_rd_loop.conf import PROP_SETTING
 from rdagent.core.proposal import (
-    Experiment2Feedback,
     Hypothesis2Experiment,
+    HypothesisExperiment2Feedback,
     HypothesisGen,
-    HypothesisSet,
     Trace,
 )
 from rdagent.core.task_generator import TaskGenerator
 from rdagent.core.utils import import_class
 
-scen = import_class(PROP_SETTING.scen)()
+scen = import_class(PROP_SETTING.qlib_factor_scen)()
 
-hypothesis_gen: HypothesisGen = import_class(PROP_SETTING.hypothesis_gen)(scen)
+hypothesis_gen: HypothesisGen = import_class(PROP_SETTING.qlib_factor_hypothesis_gen)(scen)
 
-hypothesis2experiment: Hypothesis2Experiment = import_class(PROP_SETTING.hypothesis2experiment)()
+hypothesis2experiment: Hypothesis2Experiment = import_class(PROP_SETTING.qlib_factor_hypothesis2experiment)()
 
 qlib_factor_coder: TaskGenerator = import_class(PROP_SETTING.qlib_factor_coder)(scen)
 qlib_factor_runner: TaskGenerator = import_class(PROP_SETTING.qlib_factor_runner)(scen)
 
-qlib_factor_summarizer: Experiment2Feedback = import_class(PROP_SETTING.qlib_factor_summarizer)()
+qlib_factor_summarizer: HypothesisExperiment2Feedback = import_class(PROP_SETTING.qlib_factor_summarizer)()
 
 
 trace = Trace(scen=scen)
-hs = HypothesisSet(trace=trace)
 for _ in range(PROP_SETTING.evolving_n):
     hypothesis = hypothesis_gen.gen(trace)
-    exp = hypothesis2experiment.convert(hs)
+    exp = hypothesis2experiment.convert(hypothesis, trace)
     exp = qlib_factor_coder.generate(exp)
     exp = qlib_factor_runner.generate(exp)
-    feedback = qlib_factor_summarizer.summarize(exp)
+    feedback = qlib_factor_summarizer.generateFeedback(exp, hypothesis, trace)
 
     trace.hist.append((hypothesis, exp, feedback))
diff --git a/rdagent/app/qlib_rd_loop/model.py b/rdagent/app/qlib_rd_loop/model.py
index c92c1d48..c30aa08f 100644
--- a/rdagent/app/qlib_rd_loop/model.py
+++ b/rdagent/app/qlib_rd_loop/model.py
@@ -4,39 +4,33 @@
 """
 
 # import_from
-from rdagent.app.model_proposal.conf import MODEL_PROP_SETTING
 
+from rdagent.app.qlib_rd_loop.conf import PROP_SETTING
 from rdagent.core.proposal import (
-    Experiment2Feedback,
     Hypothesis2Experiment,
-    HypothesisSet,
+    HypothesisExperiment2Feedback,
     Trace,
 )
 from rdagent.core.task_generator import TaskGenerator
+from rdagent.core.utils import import_class
 
-# load_from_cls_uri
+scen = import_class(PROP_SETTING.qlib_model_scen)()
 
+hypothesis_gen = import_class(PROP_SETTING.qlib_model_hypothesis_gen)(scen)
 
-scen = load_from_cls_uri(MODEL_PROP_SETTING.scen)()
+hypothesis2experiment: Hypothesis2Experiment = import_class(PROP_SETTING.qlib_model_hypothesis2experiment)()
 
-hypothesis_gen = load_from_cls_uri(MODEL_PROP_SETTING.hypothesis_gen)(scen)
+qlib_model_coder: TaskGenerator = import_class(PROP_SETTING.qlib_model_coder)(scen)
+qlib_model_runner: TaskGenerator = import_class(PROP_SETTING.qlib_model_runner)(scen)
 
-hypothesis2task: Hypothesis2Experiment = load_from_cls_uri(MODEL_PROP_SETTING.hypothesis2task)()
+qlib_model_summarizer: HypothesisExperiment2Feedback = import_class(PROP_SETTING.qlib_model_hypothesis2experiment)(scen)
 
-task_gen: TaskGenerator = load_from_cls_uri(MODEL_PROP_SETTING.task_gen)(scen)  # for implementation
-
-imp2feedback: Experiment2Feedback = load_from_cls_uri(MODEL_PROP_SETTING.imp2feedback)(scen)  # for implementation
-
-
-iter_n = MODEL_PROP_SETTING.iter_n
-
-trace = Trace()
-
-hypothesis_set = HypothesisSet()
-for _ in range(iter_n):
+trace = Trace(scen=scen)
+for _ in range(PROP_SETTING.evolving_n):
     hypothesis = hypothesis_gen.gen(trace)
-    task = hypothesis2task.convert(hypothesis)
-    imp = task_gen.gen(task)
-    imp.execute()
-    feedback = imp2feedback.summarize(imp)
-    trace.hist.append((hypothesis, feedback))
+    exp = hypothesis2experiment.convert(hypothesis, trace)
+    exp = qlib_model_coder.generate(exp)
+    exp = qlib_model_runner.generate(exp)
+    feedback = qlib_model_summarizer.generateFeedback(exp, hypothesis, trace)
+
+    trace.hist.append((hypothesis, exp, feedback))
diff --git a/rdagent/components/proposal/factor_proposal.py b/rdagent/components/proposal/factor_proposal.py
index b01c511f..cba6728b 100644
--- a/rdagent/components/proposal/factor_proposal.py
+++ b/rdagent/components/proposal/factor_proposal.py
@@ -10,7 +10,6 @@
     Hypothesis,
     Hypothesis2Experiment,
     HypothesisGen,
-    HypothesisSet,
     Scenario,
     Trace,
 )
@@ -28,12 +27,10 @@ def __init__(self, scen: Scenario):
 
     # The following methods are scenario related so they should be implemented in the subclass
     @abstractmethod
-    def prepare_context(self, trace: Trace) -> Tuple[dict, bool]:
-        ...
+    def prepare_context(self, trace: Trace) -> Tuple[dict, bool]: ...
 
     @abstractmethod
-    def convert_response(self, response: str) -> FactorHypothesis:
-        ...
+    def convert_response(self, response: str) -> FactorHypothesis: ...
 
     def gen(self, trace: Trace) -> FactorHypothesis:
         context_dict, json_flag = self.prepare_context(trace)
@@ -67,20 +64,18 @@ def __init__(self) -> None:
         super().__init__()
 
     @abstractmethod
-    def prepare_context(self, hs: HypothesisSet) -> Tuple[dict, bool]:
-        ...
+    def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict, bool]: ...
 
     @abstractmethod
-    def convert_response(self, response: str) -> FactorExperiment:
-        ...
+    def convert_response(self, response: str, trace: Trace) -> FactorExperiment: ...
 
-    def convert(self, hs: HypothesisSet) -> FactorExperiment:
-        context, json_flag = self.prepare_context(hs)
+    def convert(self, hypothesis: Hypothesis, trace: Trace) -> FactorExperiment:
+        context, json_flag = self.prepare_context(hypothesis, trace)
         system_prompt = (
             Environment(undefined=StrictUndefined)
             .from_string(prompt_dict["factor_hypothesis2experiment"]["system_prompt"])
             .render(
-                scenario=hs.trace.scen.get_scenario_all_desc(),
+                scenario=trace.scen.get_scenario_all_desc(),
                 experiment_output_format=context["experiment_output_format"],
             )
         )
@@ -88,6 +83,7 @@ def convert(self, hs: HypothesisSet) -> FactorExperiment:
             Environment(undefined=StrictUndefined)
             .from_string(prompt_dict["factor_hypothesis2experiment"]["user_prompt"])
             .render(
+                target_hypothesis=context["target_hypothesis"],
                 hypothesis_and_feedback=context["hypothesis_and_feedback"],
                 factor_list=context["factor_list"],
                 RAG=context["RAG"],
@@ -96,4 +92,4 @@ def convert(self, hs: HypothesisSet) -> FactorExperiment:
 
         resp = APIBackend().build_messages_and_create_chat_completion(user_prompt, system_prompt, json_mode=json_flag)
 
-        return self.convert_response(resp)
+        return self.convert_response(resp, trace)
diff --git a/rdagent/components/proposal/prompts.yaml b/rdagent/components/proposal/prompts.yaml
index e79fa70c..e979c670 100644
--- a/rdagent/components/proposal/prompts.yaml
+++ b/rdagent/components/proposal/prompts.yaml
@@ -21,14 +21,17 @@ factor_hypothesis2experiment:
     The factors are used in certain scenario, the scenario is as follows:
     {{ scenario }}
     The user will use the factors generated to do some experiments. The user will provide this information to you:
-    1. The hypothesis generated in the previous steps and their corresponding feedbacks.
-    2. Former proposed factors on similar hypothesis.
-    3. Some additional information to help you generate new factors.
+    1. The target hypothesis you are targeting to generate factors for.
+    2. The hypothesis generated in the previous steps and their corresponding feedbacks.
+    3. Former proposed factors on similar hypothesis.
+    4. Some additional information to help you generate new factors.
     Please generate the output following the format below:
     {{ experiment_output_format }}
     
   user_prompt: |-
     The user has made several hypothesis on this scenario and did several evaluation on them.
+    The target hypothesis you are targeting to generate factors for is as follows:
+    {{ target_hypothesis }}
     The former hypothesis and the corresponding feedbacks are as follows:
     {{ hypothesis_and_feedback }}
     The former proposed factors on similar hypothesis are as follows:
diff --git a/rdagent/core/experiment.py b/rdagent/core/experiment.py
index b209e3c9..b7abb860 100644
--- a/rdagent/core/experiment.py
+++ b/rdagent/core/experiment.py
@@ -122,6 +122,8 @@ class Experiment(ABC, Generic[ASpecificTask, ASpecificImp]):
     def __init__(self, sub_tasks: Sequence[ASpecificTask]) -> None:
         self.sub_tasks = sub_tasks
         self.sub_implementations: Sequence[ASpecificImp] = [None for _ in self.sub_tasks]
+        self.based_experiments: Sequence[Experiment] = []
+        self.result: object = None  # The result of the experiment, can be different types in different scenarios.
 
 
 TaskOrExperiment = TypeVar("TaskOrExperiment", Task, Experiment)
diff --git a/rdagent/core/proposal.py b/rdagent/core/proposal.py
index 1b4fb923..93e20453 100644
--- a/rdagent/core/proposal.py
+++ b/rdagent/core/proposal.py
@@ -23,6 +23,10 @@ class Hypothesis:
     def __init__(self, hypothesis: str, reason: str) -> None:
         self.hypothesis: str = hypothesis
         self.reason: str = reason
+    
+    def __str__(self) -> str:
+        return f"""Hypothesis: {self.hypothesis}
+Reason: {self.reason}"""
 
     # source: data_ana | model_nan = None
 
@@ -30,7 +34,16 @@ def __init__(self, hypothesis: str, reason: str) -> None:
 # Origin(path of repo/data/feedback) => view/summarization => generated Hypothesis
 
 
-class HypothesisFeedback(Feedback): ...
+class HypothesisFeedback(Feedback):
+    def __init__(self, observations: str, hypothesis_evaluation: str, new_hypothesis: str, reason: str, decision: bool):
+        self.observations = observations
+        self.hypothesis_evaluation = hypothesis_evaluation
+        self.new_hypothesis = new_hypothesis
+        self.reason = reason
+        self.decision = decision
+
+    def __bool__(self):
+        return self.decision
 
 
 ASpecificScen = TypeVar("ASpecificScen", bound=Scenario)
@@ -59,19 +72,6 @@ def gen(self, trace: Trace) -> Hypothesis:
         """
 
 
-class HypothesisSet:
-    """
-    # drop, append
-
-    hypothesis_imp: list[float] | None  # importance of each hypothesis
-    true_hypothesis or false_hypothesis
-    """
-
-    def __init__(self, trace: Trace, hypothesis_list: list[Hypothesis] = []) -> None:
-        self.hypothesis_list: list[Hypothesis] = hypothesis_list
-        self.trace: Trace = trace
-
-
 ASpecificExp = TypeVar("ASpecificExp", bound=Experiment)
 
 
@@ -81,21 +81,22 @@ class Hypothesis2Experiment(ABC, Generic[ASpecificExp]):
     """
 
     @abstractmethod
-    def convert(self, hs: HypothesisSet) -> ASpecificExp:
+    def convert(self, hypothesis: Hypothesis, trace: Trace) -> ASpecificExp:
         """Connect the idea proposal to implementation"""
         ...
 
 
 # Boolean, Reason, Confidence, etc.
 
+
 class HypothesisExperiment2Feedback:
     """ "Generated feedbacks on the hypothesis from **Executed** Implementations of different tasks & their comparisons with previous performances"""
 
     def generateFeedback(self, ti: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback:
         """
-        The `ti` should be executed and the results should be included, as well as the comparison between previous results (done by LLM). 
+        The `ti` should be executed and the results should be included, as well as the comparison between previous results (done by LLM).
         For example: `mlflow` of Qlib will be included.
         """
-        return HypothesisFeedback()
+        raise NotImplementedError("generateFeedback method is not implemented.")
 
     # def generateResultComparison()
diff --git a/rdagent/scenarios/qlib/factor_proposal.py b/rdagent/scenarios/qlib/factor_proposal.py
index 52f65de4..216d4dc8 100644
--- a/rdagent/scenarios/qlib/factor_proposal.py
+++ b/rdagent/scenarios/qlib/factor_proposal.py
@@ -12,7 +12,7 @@
     FactorHypothesisGen,
 )
 from rdagent.core.prompts import Prompts
-from rdagent.core.proposal import HypothesisSet, Scenario, Trace
+from rdagent.core.proposal import Hypothesis, Scenario, Trace
 
 prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts.yaml")
 
@@ -43,23 +43,24 @@ def convert_response(self, response: str) -> FactorHypothesis:
 
 
 class QlibFactorHypothesis2Experiment(FactorHypothesis2Experiment):
-    def prepare_context(self, hs: HypothesisSet) -> Tuple[dict | bool]:
-        scenario = hs.trace.scen.get_scenario_all_desc()
+    def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict | bool]:
+        scenario = trace.scen.get_scenario_all_desc()
         experiment_output_format = prompt_dict["experiment_output_format"]
 
         hypothesis_and_feedback = (
             Environment(undefined=StrictUndefined)
             .from_string(prompt_dict["hypothesis_and_feedback"])
-            .render(trace=hs.trace)
+            .render(trace=trace)
         )
 
-        experiment_list: List[FactorExperiment] = [t[1] for t in hs.trace.hist]
+        experiment_list: List[FactorExperiment] = [t[1] for t in trace.hist]
 
         factor_list = []
         for experiment in experiment_list:
             factor_list.extend(experiment.sub_tasks)
 
         return {
+            "target_hypothesis": str(hypothesis),
             "scenario": scenario,
             "hypothesis_and_feedback": hypothesis_and_feedback,
             "experiment_output_format": experiment_output_format,
@@ -67,7 +68,7 @@ def prepare_context(self, hs: HypothesisSet) -> Tuple[dict | bool]:
             "RAG": ...,
         }, True
 
-    def convert_response(self, response: str) -> FactorExperiment:
+    def convert_response(self, response: str, trace: Trace) -> FactorExperiment:
         response_dict = json.loads(response)
         tasks = []
         for factor_name in response_dict:
@@ -75,4 +76,6 @@ def convert_response(self, response: str) -> FactorExperiment:
             formulation = response_dict[factor_name]["formulation"]
             variables = response_dict[factor_name]["variables"]
             tasks.append(FactorTask(factor_name, description, formulation, variables))
-        return FactorExperiment(tasks)
+        exp = FactorExperiment(tasks)
+        exp.based_experiments = [t[1] for t in trace.hist if t[2]]
+        return exp
diff --git a/rdagent/scenarios/qlib/task_generator/feedback.py b/rdagent/scenarios/qlib/task_generator/feedback.py
index 8fc42768..9fed7e89 100644
--- a/rdagent/scenarios/qlib/task_generator/feedback.py
+++ b/rdagent/scenarios/qlib/task_generator/feedback.py
@@ -1,8 +1,7 @@
 # TODO:
 # Implement to feedback.
 
-from rdagent.core.proposal import Experiment2Feedback
+from rdagent.core.proposal import HypothesisExperiment2Feedback
 
 
-class QlibFactorExperiment2Feedback(Experiment2Feedback):
-    ...
+class QlibFactorHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): ...