Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debugging Prior to Eval #67

Merged
merged 9 commits into from
Jan 25, 2024
57 changes: 26 additions & 31 deletions mdagent/mainagent/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,32 @@
structured_prompt = PromptTemplate(
input_variables=["input"],
template="""
You are an expert molecular dynamics scientist and
your task is to respond to the question or
solve the problem to the best of your ability using
the provided tools.

You can only respond with a single complete
"Thought, Action, Action Input" format
OR a single "Final Answer" format.

Complete format:

Thought: (reflect on your progress and decide what
to do next)
Action: (the action name, should be the name of a tool)
Action Input: (the input string to the action)

OR

Final Answer: (the final answer to the original input
question)

Use the tools provided, using the most specific tool
available for each action.
Once you map a path to a short name, you may only use
that short name in future actions.
Your final answer should contain all information
necessary to answer the question and subquestions.
Your thought process should be clean and clear,
and you must explicitly state the actions you are taking.
Question: {input}
""",
You are an expert molecular dynamics scientist and
your task is to respond to the question or
solve the problem to the best of your ability using
the provided tools.

You can only respond with a single complete
'Thought, Action, Action Input' format
OR a single 'Final Answer' format.

Complete format:
Thought: (reflect on your progress and decide what " "to do next)
Action: (the action name, should be the name of a tool)
Action Input: (the input string to the action)

OR

Final Answer: (the final answer to the original input
question)

Use the tools provided, using the most specific tool
available for each action.
Your final answer should contain all information
necessary to answer the question and subquestions.
Your thought process should be clean and clear,
and you must explicitly state the actions you are taking.
Question: {input} """,
)


Expand Down
5 changes: 2 additions & 3 deletions mdagent/tools/base_tools/preprocess_tools/clean_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ class CleaningToolFunctionInput(BaseModel):
"""Input model for CleaningToolFunction"""

pdb_id: str = Field(..., description="ID of the pdb/cif file in the path registry")
output_path: Optional[str] = Field(..., description="Path to the output file")
replace_nonstandard_residues: bool = Field(
True, description="Whether to replace nonstandard residues with standard ones. "
)
Expand Down Expand Up @@ -301,7 +300,7 @@ def _run(self, **input_args) -> str:
pdbfile_name = pdbfile.split("/")[-1]
name = pdbfile_name.split("_")[0]
end = pdbfile_name.split(".")[1]
print(f"pdbfile: {pdbfile}", f"name: {name}", f"end: {end}")

except Exception as e:
print(f"error retrieving from path_registry, trying to read file {e}")
return "File not found in path registry. "
Expand Down Expand Up @@ -384,7 +383,7 @@ def _run(self, **input_args) -> str:
self.path_registry.map_path(
file_id, f"{directory}/{file_name}", file_description
)
return f"{file_id} written to {directory}/{file_name}"
return f"File cleaned!\nFile ID:{file_id}\nPath:{directory}/{file_name}"
except FileNotFoundError:
return "Check your file path. File not found."
except Exception as e:
Expand Down
91 changes: 52 additions & 39 deletions mdagent/tools/base_tools/simulation_tools/create_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
from langchain.tools import BaseTool
from pydantic import BaseModel, Field

from mdagent.utils import PathRegistry
from mdagent.utils import FileType, PathRegistry


class ModifyScriptUtils:
llm: Optional[BaseLanguageModel]

def __init__(self, llm):
llm = llm
self.llm = llm

Examples = [
"""
Expand Down Expand Up @@ -90,31 +92,30 @@ def _prompt_summary(self, query: str, llm: BaseLanguageModel = None):
if not llm:
raise ValueError("No language model provided at ModifyScriptTool")

prompt_template = """ You're an expert programmer and in molecular dynamics.
Your job is to make a script to make a simmulatiuon.
in openmm.
Youre starting point is a base script that runs a protein on its own.
The protein itself doesnt require more preperation.
The forcefields, integrator, and constraints are already set up for you.
You need to add lines to fullfill the user requirement.
Your answer has to be the modified script.
Your answer should be a python script.
Dont use ''' to comment out the code, use # instead.
Describe your thoughts and changes before you start writing the script.
The script will be rum as it is, so make it completely.

The format should be as follows:
THOUGHTS: (Your thoughts as an openmm expert with the base script and the query)
CHANGES:(what modifications youre doing to the script)
SCRIPT: (The COMPLETE modified script)
FINAL THOUGHTS: (Optional, Any final thoughts or comments
you have about the script)


Base_SCRIPT:
{base_script}
Question: {query}
"""
prompt_template = (
"You're an expert programmer and in molecular dynamics. "
"Your job is to make a script to make a simmulation "
"in openmm. "
"Youre starting point is a base script that runs a protein on its own. "
"The protein itself doesnt require more preperation. "
"The forcefields, integrator, and constraints are already set up for you. "
"You need to add lines to fullfill the user requirement. "
"Your answer has to be the modified script. "
"Your answer should be a python script. "
"Dont use ''' to comment out the code, use # instead. "
"Describe your thoughts and changes before you start writing the script. "
"The script will be rum as it is, so make it completely. "
"The format should be as follows: "
"THOUGHTS: (Your thoughts as an openmm expert with the base "
"script and the query) \n"
"CHANGES:(what modifications youre doing to the script)\n "
"SCRIPT: (The COMPLETE modified script)\n "
"FINAL THOUGHTS: (Optional, Any final thoughts or comments\n "
"you have about the script\n "
"Base_SCRIPT:\n"
"{base_script} \n"
"Question: {query} "
)

prompt = PromptTemplate(
template=prompt_template, input_variables=["base_script", "query"]
Expand All @@ -134,32 +135,44 @@ def remove_leading_spaces(self, text):
class ModifyScriptInput(BaseModel):
query: str = Field(
...,
description="""Simmulation required by the user.You MUST
specify the objective, requirements of the simulation as well
as on what protein you are working.""",
description=(
"Simmulation required by the user.You MUST "
"specify the objective, requirements of the simulation as well "
"as on what protein you are working."
),
)
script: str = Field(..., description=" simulation ID of the base script file")


class ModifyBaseSimulationScriptTool(BaseTool):
name: str = "ModifyScriptTool"
description: str = """This tool takes a base simulation script and a user
requirement and returns a modified script. """
description: str = (
"This tool takes a base simulation script and a user "
"requirement and returns a modified script. "
)

args_schema = ModifyScriptInput
llm: Optional[BaseLanguageModel]
path_registry: Optional[PathRegistry]

def __init__(self, path_registry: Optional[PathRegistry], llm: BaseLanguageModel):
def __init__(self, path_registry: Optional[PathRegistry], llm):
super().__init__()
self.path_registry = path_registry
self.llm = llm

def _run(self, **input):
def _run(self, *args, **input):
if self.llm is None: # this should not happen
print("No language model provided at ModifyScriptTool")
return "llm not initialized"
if len(args) > 0:
return (
"This tool expects you to provide the input as a "
"dictionary: {'query': 'your query', 'script': 'script id'}"
)

base_script_id = input.get("script")
if not base_script_id:
return """No id provided. The keys for the input are:
'query' and 'script'"""
return "No id provided. The keys for the input are: " "query' and 'script'"
try:
base_script_path = self.path_registry.get_mapped_path(base_script_id)
parts = base_script_path.split("/")
Expand All @@ -170,7 +183,7 @@ def _run(self, **input):
with open(base_script_path, "r") as file:
base_script = file.read()
base_script = "".join(base_script)
utils = ModifyScriptUtils()
utils = ModifyScriptUtils(self.llm)

description = input.get("query")
answer = utils._prompt_summary(
Expand All @@ -186,9 +199,9 @@ def _run(self, **input):
script_content = textwrap.dedent(script_content).strip()
# Write to file
filename = self.path_registry.write_file_name(
type="SIMULATION", Sim_id=base_script_id, modified=True
type=FileType.SIMULATION, Sim_id=base_script_id, modified=True
)
file_id = self.path_registry.get_fileid(filename, type="SIMULATION")
file_id = self.path_registry.get_fileid(filename, type=FileType.SIMULATION)
directory = "files/simulations"
if not os.path.exists(directory):
os.makedirs(directory)
Expand Down
Loading
Loading