Skip to content

Commit

Permalink
add query knowledge for model and workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
WinstonLiyt committed Dec 23, 2024
1 parent 251688b commit 438a569
Show file tree
Hide file tree
Showing 11 changed files with 212 additions and 224 deletions.
23 changes: 15 additions & 8 deletions rdagent/app/data_science/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
Experiment2Feedback,
ExpGen,
Hypothesis2Experiment,
HypothesisFeedback,
HypothesisGen,
Trace,
)
Expand All @@ -32,7 +33,7 @@
from rdagent.scenarios.data_science.experiment.experiment import DSExperiment
from rdagent.scenarios.data_science.proposal.exp_gen import DSExpGen, DSTrace
from rdagent.scenarios.kaggle.kaggle_crawler import download_data
from rdagent.core.proposal import HypothesisFeedback


class DataScienceRDLoop(RDLoop):
skip_loop_error = (NextLoopException,)
Expand Down Expand Up @@ -98,13 +99,19 @@ def running(self, prev_out: dict[str, Any]):

def feedback(self, prev_out: dict[str, Any]):
if not self.trace.all_components_completed():
self.trace.hist.append((prev_out["direct_exp_gen"].hypothesis, prev_out["coding"], HypothesisFeedback(
observations="Not all 5 components are completed, skip feedback of DataScienceRDLoop.",
hypothesis_evaluation="",
new_hypothesis="",
reason="",
decision=True
)))
self.trace.hist.append(
(
prev_out["direct_exp_gen"].hypothesis,
prev_out["coding"],
HypothesisFeedback(
observations="Not all 5 components are completed, skip feedback of DataScienceRDLoop.",
hypothesis_evaluation="",
new_hypothesis="",
reason="",
decision=True,
),
)
)
raise NextLoopException("Not all 5 components are completed, skip feedback of DataScienceRDLoop.")

feedback = self.summarizer.generate_feedback(
Expand Down
3 changes: 1 addition & 2 deletions rdagent/components/coder/data_science/feature/prompts.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
feature:
system: |-
You are a world-class data scientist and machine learning engineer with deep expertise in statistics, mathematics, and computer science.
You are a world-class data scientist and machine learning engineer with deep expertise in statistics, mathematics, and computer science.
Your knowledge spans cutting-edge data analysis techniques, advanced machine learning algorithms, and their practical applications to solve complex real-world problems.
This project involves implementing feature engineering techniques to prepare data for machine learning models, and this project code will be written by GPT.
Expand Down Expand Up @@ -33,7 +33,6 @@ feature:
{% endfor %}
{% endif %}
```
user: |-
---------Feature Processing Specification---------
{{ feature_spec }}
Expand Down
72 changes: 69 additions & 3 deletions rdagent/components/coder/data_science/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,88 @@
import json
from pathlib import Path

from jinja2 import Environment, StrictUndefined

from rdagent.components.coder.CoSTEER import CoSTEER
from rdagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS
from rdagent.components.coder.CoSTEER.evaluators import CoSTEERMultiEvaluator
from rdagent.components.coder.CoSTEER.evolving_strategy import (
MultiProcessEvolvingStrategy,
)
from rdagent.components.coder.CoSTEER.knowledge_management import (
CoSTEERQueriedKnowledge,
)
from rdagent.components.coder.data_science.model.es import (
ModelMultiProcessEvolvingStrategy,
)
from rdagent.components.coder.data_science.model.eval import (
ModelGeneralCaseSpecEvaluator,
)
from rdagent.components.coder.data_science.model.exp import ModelTask
from rdagent.core.experiment import FBWorkspace
from rdagent.core.scenario import Scenario
from rdagent.oai.llm_utils import APIBackend
from rdagent.utils.agent.tpl import T

# from rdagent.utils.agent.tpl import T
# T(".prompts:model_generator.user").r()


class ModelMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
def implement_one_task(
self,
target_task: ModelTask,
queried_knowledge: CoSTEERQueriedKnowledge | None = None,
workspace: FBWorkspace | None = None,
) -> dict[str, str]:
model_information_str = target_task.get_task_information()

# 1. query
queried_similar_successful_knowledge = (
queried_knowledge.task_to_similar_task_successful_knowledge[model_information_str]
if queried_knowledge is not None
else []
)
queried_former_failed_knowledge = (
queried_knowledge.task_to_former_failed_traces[model_information_str]
if queried_knowledge is not None
else []
)

# 2. code
system_prompt = T(".prompts:model_coder.system").r(
queried_similar_successful_knowledge=queried_similar_successful_knowledge,
queried_former_failed_knowledge=queried_former_failed_knowledge[0],
)
user_prompt = T(".prompts:model_coder.user").r(
model_spec=workspace.code_dict["spec/model.md"],
latest_code=workspace.code_dict.get("model01.py"),
)

model_code = json.loads(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
)
)["code"]

return {
"model01.py": model_code,
}

def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
"""
Assign the code list to the evolving item.
The code list is aligned with the evolving item's sub-tasks.
If a task is not implemented, put a None in the list.
"""
for index in range(len(evo.sub_tasks)):
if code_list[index] is None:
continue
if evo.sub_workspace_list[index] is None:
# evo.sub_workspace_list[index] = FBWorkspace(target_task=evo.sub_tasks[index])
evo.sub_workspace_list[index] = evo.experiment_workspace
evo.sub_workspace_list[index].inject_code(**code_list[index])
return evo


class ModelCoSTEER(CoSTEER):
def __init__(
self,
Expand Down
120 changes: 0 additions & 120 deletions rdagent/components/coder/data_science/model/es.py

This file was deleted.

42 changes: 19 additions & 23 deletions rdagent/components/coder/data_science/model/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@ model_coder:
You are tasked with implementing PyTorch models based on specific requirements provided by the user. The user’s ultimate goal is to obtain accurate predictions from the model on input data. Follow the instructions below to ensure your response is correct and aligned with the user’s expectations.
Instructions for Code Generation:
Specification Compliance:
The user has provided a detailed framework or set of specifications under {{ spec }}. Your code must strictly adhere to this specification, including any required classes, methods, and organizational structure. Do not implement or add anything outside the scope of the provided specification.
Leveraging User Inputs:
The user may provide various forms of additional information to guide you:
Expand All @@ -26,39 +23,38 @@ model_coder:
{
"code": "Your corrected or newly implemented Python code as a single string"
}
user: |-
Here is all the relevant information for this task:
Target Model Details:
{{ model_information_str }}
-----------Here is the relevant information for this task-----------
{% if queried_similar_successful_knowledge|length != 0 %}
--------------Successful Implementations for Similar Models:--------------
{% for similar_successful_knowledge in queried_similar_successful_knowledge %}
===== Model {{loop.index}}: =====
====={% for similar_successful_knowledge in queried_similar_successful_knowledge %} Model {{loop.index}}:=====
{{ similar_successful_knowledge.target_task.get_task_information() }}
===== Code: =====
=====Code:=====
{{ similar_successful_knowledge.implementation.code }}
{% endfor %}
{% endif %}
{% if queried_former_failed_knowledge|length != 0 %}
--------------Previous Failed Attempts:--------------
{% for former_failed_knowledge in queried_former_failed_knowledge %}
Attempt {{ loop.index }}:
===== Code: =====
{% for former_failed_knowledge in queried_former_failed_knowledge %} Attempt {{ loop.index }}:
=====Code:=====
{{ former_failed_knowledge.implementation.code }}
===== Feedback: =====
=====Feedback:=====
{{ former_failed_knowledge.feedback }}
{% endfor %}
{% endif %}
{% endif %}
user: |-
---------Model Specification---------
{{ model_spec }}
{% if latest_code %}
---------Former Specification---------
Former Code: {{ latest_code }}
You should follow the former code to improve it.
{% endif %}
{% if current_code is not none %}
--------------Latest Code:--------------
{{ current_code }}
{% else %}
No prior code has been implemented.
{% endif %}
model_eval:
system: |-
Expand Down
3 changes: 0 additions & 3 deletions rdagent/components/coder/data_science/model/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@

from rdagent.components.coder.CoSTEER.config import CoSTEER_SETTINGS
from rdagent.components.coder.data_science.model import ModelCoSTEER
from rdagent.components.coder.data_science.model.es import (
ModelMultiProcessEvolvingStrategy,
)
from rdagent.components.coder.data_science.model.eval import (
ModelGeneralCaseSpecEvaluator,
)
Expand Down
Loading

0 comments on commit 438a569

Please sign in to comment.