From 53a2dd9e64cb3b9ee155aa3ffa87312fbec00187 Mon Sep 17 00:00:00 2001
From: Xisen Wang <118058822+Xisen-Wang@users.noreply.github.com>
Date: Mon, 15 Jul 2024 11:40:09 +0800
Subject: [PATCH 1/5] Update QlibModelHypothesisExperiment2Feedback Class

- Implemented generateFeedback()
- Tested to be working
- Added conditional prompts to deal with "1st generation"
- Requires Trace class to have get_last_experiment_info
- Future Todo: Revise Prompts & Turn into YAML
---
 .../scenarios/qlib/task_generator/feedback.py | 88 ++++++++++++++++++-
 1 file changed, 85 insertions(+), 3 deletions(-)

diff --git a/rdagent/scenarios/qlib/task_generator/feedback.py b/rdagent/scenarios/qlib/task_generator/feedback.py
index 0c6a09dd..c6dd5745 100644
--- a/rdagent/scenarios/qlib/task_generator/feedback.py
+++ b/rdagent/scenarios/qlib/task_generator/feedback.py
@@ -1,10 +1,92 @@
 # TODO:
 # Implement to feedback.
-
-from rdagent.core.proposal import HypothesisExperiment2Feedback
+import json
+from rdagent.oai.llm_utils import APIBackend
+from rdagent.core.proposal import HypothesisExperiment2Feedback, Trace, Hypothesis, HypothesisFeedback, Scenario
+from rdagent.core.experiment import Experiment
 
 
 class QlibFactorHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): ...
 
 
-class QlibModelHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): ...
+
+class QlibModelHypothesisExperiment2Feedback(HypothesisExperiment2Feedback):
+    """Generated feedbacks on the hypothesis from **Executed** Implementations of different tasks & their comparisons with previous performances"""
+
+    def generateFeedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback:
+        """
+        The `ti` should be executed and the results should be included, as well as the comparison between previous results (done by LLM).
+        For example: `mlflow` of Qlib will be included.
+        """
+
+        # Define the system prompt for hypothesis feedback
+        sys_prompt_hypothesis = (
+            "You are a professional result analysis assistant. You will receive a result and a hypothesis. "
+            "Your task is to provide feedback on how well the result supports or refutes the hypothesis by judging from the observation of performance increase or decrease. "
+            "Please provide detailed and constructive feedback. "
+            "Example JSON Structure for Result Analysis: "
+            '{"Observations": "Your overall observations here", "Feedback for Hypothesis": "Observations related to the hypothesis", '
+            '"New Hypothesis": "Put your new hypothesis here.", "Reasoning": "Provide reasoning for the hypothesis here.", '
+            '"Decision": "True or False"}'
+        )
+
+        # Define the user prompt for hypothesis feedback
+        context = trace.scen
+        last_experiment_info = trace.get_last_experiment_info()
+
+        if last_experiment_info:
+            last_hypothesis, last_task, last_result = last_experiment_info
+            last_info_str = f"Last Round Information:\nHypothesis: {last_hypothesis.hypothesis}\nTask: {last_task}\nResult: {last_result}\n"
+        else:
+            last_info_str = "This is the first round. No previous information available."
+
+        usr_prompt_hypothesis = f'''
+            We are in an experiment of finding hypothesis and validating or rejecting them so that in the end we have a powerful model generated.
+            Here are the context: {context}. 
+            {last_info_str}
+            
+            Now let's come to this round. You will receive the result and you will evaluate if the performance increases or decreases. 
+            Hypothesis: {hypothesis.hypothesis}\n
+            Relevant Reasoning: {hypothesis.reason}\n
+            Result: {exp.result}\n
+
+            Compare and observe. Which result has a better return and lower risk? If the performance increases， the hypothesis should be considered positive (working). 
+            Hence, with the hypotheses, relevant reasonings, and results in mind (comparison), provide detailed and constructive feedback and suggest a new hypothesis. 
+        '''
+
+        try:
+            # Call the APIBackend to generate the response for hypothesis feedback
+            response_hypothesis = APIBackend().build_messages_and_create_chat_completion(
+                user_prompt=usr_prompt_hypothesis,
+                system_prompt=sys_prompt_hypothesis,
+                json_mode=True,
+            )
+
+            # Log the raw response for debugging
+            print("Raw Response for Hypothesis Feedback:\n", response_hypothesis)
+
+            # Parse the JSON response to extract the feedback
+            response_json_hypothesis = json.loads(response_hypothesis)
+            hypothesis_feedback = HypothesisFeedback(
+                observations=response_json_hypothesis.get("Observations", "No observations provided"),
+                hypothesis_evaluation=response_json_hypothesis.get("Feedback for Hypothesis", "No feedback provided"),
+                new_hypothesis=response_json_hypothesis.get("New Hypothesis", "No new hypothesis provided"),
+                reason=response_json_hypothesis.get("Reasoning", "No reasoning provided"),
+                decision=response_json_hypothesis.get("Decision", "false").lower() == "true"
+            )
+
+            return hypothesis_feedback
+
+        except json.JSONDecodeError as e:
+            print("Error parsing JSON response from LLM for hypothesis feedback:", e)
+        except Exception as e:
+            print("An unexpected error occurred while generating hypothesis feedback:", e)
+
+        return HypothesisFeedback(
+            observations="No observations",
+            hypothesis_evaluation="No feedback",
+            new_hypothesis="No new hypothesis",
+            reason="No reasoning",
+            decision=False
+        )
+

From d68a666004253401622c55f627885f6af1bd85da Mon Sep 17 00:00:00 2001
From: Xisen Wang <118058822+Xisen-Wang@users.noreply.github.com>
Date: Mon, 15 Jul 2024 11:41:22 +0800
Subject: [PATCH 2/5] Updated Trace class

- Updated get_last_experiment_info(), a very useful function that returns the information of last experiment (to be used in feedback generation).
---
 rdagent/core/proposal.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/rdagent/core/proposal.py b/rdagent/core/proposal.py
index 93e20453..cf7e7cbf 100644
--- a/rdagent/core/proposal.py
+++ b/rdagent/core/proposal.py
@@ -54,6 +54,14 @@ def __init__(self, scen: ASpecificScen) -> None:
         self.scen: ASpecificScen = scen
         self.hist: list[Tuple[Hypothesis, Experiment, HypothesisFeedback]] = []
 
+    def get_last_experiment_info(self) -> Tuple[Hypothesis, ASpecificTask, Any]:
+        """Access the last experiment result, sub-task, and the corresponding hypothesis."""
+        if not self.hist:
+            return None
+        last_hypothesis, last_experiment, _ = self.hist[-1]
+        last_task = last_experiment.sub_tasks[-1]
+        last_result = last_experiment.result
+        return last_hypothesis, last_task, last_result
 
 class HypothesisGen:
     def __init__(self, scen: Scenario):

From b32d9ddf1b431bcf08446e37fb693ca5599c1b66 Mon Sep 17 00:00:00 2001
From: you-n-g <you-n-g@users.noreply.github.com>
Date: Mon, 15 Jul 2024 11:58:36 +0800
Subject: [PATCH 3/5] Update rdagent/scenarios/qlib/task_generator/feedback.py

---
 rdagent/scenarios/qlib/task_generator/feedback.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rdagent/scenarios/qlib/task_generator/feedback.py b/rdagent/scenarios/qlib/task_generator/feedback.py
index c6dd5745..a18145ed 100644
--- a/rdagent/scenarios/qlib/task_generator/feedback.py
+++ b/rdagent/scenarios/qlib/task_generator/feedback.py
@@ -78,6 +78,7 @@ def generateFeedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace
             return hypothesis_feedback
 
         except json.JSONDecodeError as e:
+            # TODO:  (Xiao) I think raising a specific type of ERROR to make caller know sth bad has happend would be more reasonable
             print("Error parsing JSON response from LLM for hypothesis feedback:", e)
         except Exception as e:
             print("An unexpected error occurred while generating hypothesis feedback:", e)

From e55997888e2f45cfe304c9088266fb695cea5177 Mon Sep 17 00:00:00 2001
From: you-n-g <you-n-g@users.noreply.github.com>
Date: Mon, 15 Jul 2024 11:58:41 +0800
Subject: [PATCH 4/5] Update rdagent/core/proposal.py

---
 rdagent/core/proposal.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rdagent/core/proposal.py b/rdagent/core/proposal.py
index cf7e7cbf..58192be9 100644
--- a/rdagent/core/proposal.py
+++ b/rdagent/core/proposal.py
@@ -56,6 +56,7 @@ def __init__(self, scen: ASpecificScen) -> None:
 
     def get_last_experiment_info(self) -> Tuple[Hypothesis, ASpecificTask, Any]:
         """Access the last experiment result, sub-task, and the corresponding hypothesis."""
+        # TODO: The return value does not align with the signature.
         if not self.hist:
             return None
         last_hypothesis, last_experiment, _ = self.hist[-1]

From f3316d60d57a12cc15d422236c8064c1fdd6edfb Mon Sep 17 00:00:00 2001
From: Xisen Wang <118058822+Xisen-Wang@users.noreply.github.com>
Date: Mon, 15 Jul 2024 17:20:04 +0800
Subject: [PATCH 5/5] Update feedback.py

- Deleted one printing
---
 rdagent/scenarios/qlib/task_generator/feedback.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/rdagent/scenarios/qlib/task_generator/feedback.py b/rdagent/scenarios/qlib/task_generator/feedback.py
index a18145ed..4054ef9f 100644
--- a/rdagent/scenarios/qlib/task_generator/feedback.py
+++ b/rdagent/scenarios/qlib/task_generator/feedback.py
@@ -61,10 +61,7 @@ def generateFeedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace
                 system_prompt=sys_prompt_hypothesis,
                 json_mode=True,
             )
-
-            # Log the raw response for debugging
-            print("Raw Response for Hypothesis Feedback:\n", response_hypothesis)
-
+            
             # Parse the JSON response to extract the feedback
             response_json_hypothesis = json.loads(response_hypothesis)
             hypothesis_feedback = HypothesisFeedback(