Skip to content

Commit

Permalink
Merge pull request #68 from microsoft/update-feedback
Browse files Browse the repository at this point in the history
Model Feedback Implementation Done | Future Todo: Redesign Prompts & Reformat, Rethink get_last_experiment_info, Raise Errors
  • Loading branch information
xisen-w authored Jul 15, 2024
2 parents 3190d43 + f3316d6 commit c578bf0
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 3 deletions.
9 changes: 9 additions & 0 deletions rdagent/core/proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ def __init__(self, scen: ASpecificScen) -> None:
self.scen: ASpecificScen = scen
self.hist: list[Tuple[Hypothesis, Experiment, HypothesisFeedback]] = []

def get_last_experiment_info(self) -> Tuple[Hypothesis, ASpecificTask, Any]:
"""Access the last experiment result, sub-task, and the corresponding hypothesis."""
# TODO: The return value does not align with the signature.
if not self.hist:
return None
last_hypothesis, last_experiment, _ = self.hist[-1]
last_task = last_experiment.sub_tasks[-1]
last_result = last_experiment.result
return last_hypothesis, last_task, last_result

class HypothesisGen:
def __init__(self, scen: Scenario):
Expand Down
86 changes: 83 additions & 3 deletions rdagent/scenarios/qlib/task_generator/feedback.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,90 @@
# TODO:
# Implement to feedback.

from rdagent.core.proposal import HypothesisExperiment2Feedback
import json
from rdagent.oai.llm_utils import APIBackend
from rdagent.core.proposal import HypothesisExperiment2Feedback, Trace, Hypothesis, HypothesisFeedback, Scenario
from rdagent.core.experiment import Experiment


class QlibFactorHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): ...


class QlibModelHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): ...

class QlibModelHypothesisExperiment2Feedback(HypothesisExperiment2Feedback):
"""Generated feedbacks on the hypothesis from **Executed** Implementations of different tasks & their comparisons with previous performances"""

def generateFeedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trace) -> HypothesisFeedback:
"""
The `ti` should be executed and the results should be included, as well as the comparison between previous results (done by LLM).
For example: `mlflow` of Qlib will be included.
"""

# Define the system prompt for hypothesis feedback
sys_prompt_hypothesis = (
"You are a professional result analysis assistant. You will receive a result and a hypothesis. "
"Your task is to provide feedback on how well the result supports or refutes the hypothesis by judging from the observation of performance increase or decrease. "
"Please provide detailed and constructive feedback. "
"Example JSON Structure for Result Analysis: "
'{"Observations": "Your overall observations here", "Feedback for Hypothesis": "Observations related to the hypothesis", '
'"New Hypothesis": "Put your new hypothesis here.", "Reasoning": "Provide reasoning for the hypothesis here.", '
'"Decision": "True or False"}'
)

# Define the user prompt for hypothesis feedback
context = trace.scen
last_experiment_info = trace.get_last_experiment_info()

if last_experiment_info:
last_hypothesis, last_task, last_result = last_experiment_info
last_info_str = f"Last Round Information:\nHypothesis: {last_hypothesis.hypothesis}\nTask: {last_task}\nResult: {last_result}\n"
else:
last_info_str = "This is the first round. No previous information available."

usr_prompt_hypothesis = f'''
We are in an experiment of finding hypothesis and validating or rejecting them so that in the end we have a powerful model generated.
Here are the context: {context}.
{last_info_str}
Now let's come to this round. You will receive the result and you will evaluate if the performance increases or decreases.
Hypothesis: {hypothesis.hypothesis}\n
Relevant Reasoning: {hypothesis.reason}\n
Result: {exp.result}\n
Compare and observe. Which result has a better return and lower risk? If the performance increases, the hypothesis should be considered positive (working).
Hence, with the hypotheses, relevant reasonings, and results in mind (comparison), provide detailed and constructive feedback and suggest a new hypothesis.
'''

try:
# Call the APIBackend to generate the response for hypothesis feedback
response_hypothesis = APIBackend().build_messages_and_create_chat_completion(
user_prompt=usr_prompt_hypothesis,
system_prompt=sys_prompt_hypothesis,
json_mode=True,
)

# Parse the JSON response to extract the feedback
response_json_hypothesis = json.loads(response_hypothesis)
hypothesis_feedback = HypothesisFeedback(
observations=response_json_hypothesis.get("Observations", "No observations provided"),
hypothesis_evaluation=response_json_hypothesis.get("Feedback for Hypothesis", "No feedback provided"),
new_hypothesis=response_json_hypothesis.get("New Hypothesis", "No new hypothesis provided"),
reason=response_json_hypothesis.get("Reasoning", "No reasoning provided"),
decision=response_json_hypothesis.get("Decision", "false").lower() == "true"
)

return hypothesis_feedback

except json.JSONDecodeError as e:
# TODO: (Xiao) I think raising a specific type of ERROR to make caller know sth bad has happend would be more reasonable
print("Error parsing JSON response from LLM for hypothesis feedback:", e)
except Exception as e:
print("An unexpected error occurred while generating hypothesis feedback:", e)

return HypothesisFeedback(
observations="No observations",
hypothesis_evaluation="No feedback",
new_hypothesis="No new hypothesis",
reason="No reasoning",
decision=False
)

0 comments on commit c578bf0

Please sign in to comment.