Skip to content

Commit

Permalink
Debugging Prior to Eval (#67)
Browse files Browse the repository at this point in the history
1. add *arg detecter, in modify simul tool.
2. Improve string printing inside the agent with () format

3 1. fix bug in modifyscript tool at init and with registry

* 69 dealing with simulfiles (#70)

1. add FileType.RECORD functionality in writefilenames and get ids at the path registry (write name and get id)
2. Add handling temp files in setup and run: if final record files are saved in the path registry, if not they get deleted.

3. fix bug in cleaningtool with pdbfile_name assignment and change the new parameter in setup and run to "save"
  • Loading branch information
Jgmedina95 authored Jan 25, 2024
1 parent f9939b2 commit 60bb0e6
Show file tree
Hide file tree
Showing 7 changed files with 315 additions and 164 deletions.
57 changes: 26 additions & 31 deletions mdagent/mainagent/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,32 @@
structured_prompt = PromptTemplate(
input_variables=["input"],
template="""
You are an expert molecular dynamics scientist and
your task is to respond to the question or
solve the problem to the best of your ability using
the provided tools.
You can only respond with a single complete
"Thought, Action, Action Input" format
OR a single "Final Answer" format.
Complete format:
Thought: (reflect on your progress and decide what
to do next)
Action: (the action name, should be the name of a tool)
Action Input: (the input string to the action)
OR
Final Answer: (the final answer to the original input
question)
Use the tools provided, using the most specific tool
available for each action.
Once you map a path to a short name, you may only use
that short name in future actions.
Your final answer should contain all information
necessary to answer the question and subquestions.
Your thought process should be clean and clear,
and you must explicitly state the actions you are taking.
Question: {input}
""",
You are an expert molecular dynamics scientist and
your task is to respond to the question or
solve the problem to the best of your ability using
the provided tools.
You can only respond with a single complete
'Thought, Action, Action Input' format
OR a single 'Final Answer' format.
Complete format:
Thought: (reflect on your progress and decide what " "to do next)
Action: (the action name, should be the name of a tool)
Action Input: (the input string to the action)
OR
Final Answer: (the final answer to the original input
question)
Use the tools provided, using the most specific tool
available for each action.
Your final answer should contain all information
necessary to answer the question and subquestions.
Your thought process should be clean and clear,
and you must explicitly state the actions you are taking.
Question: {input} """,
)


Expand Down
13 changes: 7 additions & 6 deletions mdagent/tools/base_tools/preprocess_tools/clean_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ class CleaningToolFunctionInput(BaseModel):
"""Input model for CleaningToolFunction"""

pdb_id: str = Field(..., description="ID of the pdb/cif file in the path registry")
output_path: Optional[str] = Field(..., description="Path to the output file")
replace_nonstandard_residues: bool = Field(
True, description="Whether to replace nonstandard residues with standard ones. "
)
Expand Down Expand Up @@ -278,6 +277,7 @@ def _run(self, **input_args) -> str:
else:
input_args = input_args
pdbfile_id = input_args.get("pdb_id", None)
# TODO check if pdbfile_id is a valid pdb_id from the registry
if pdbfile_id is None:
return """No file was provided.
The input has to be a dictionary with the key 'pdb_id'"""
Expand All @@ -298,10 +298,11 @@ def _run(self, **input_args) -> str:
try:
pdbfile = self.path_registry.get_mapped_path(pdbfile_id)
if "/" in pdbfile:
pdbfile_name = pdbfile.split("/")[-1]
name = pdbfile_name.split("_")[0]
end = pdbfile_name.split(".")[1]
print(f"pdbfile: {pdbfile}", f"name: {name}", f"end: {end}")
pdbfile = pdbfile.split("/")[-1]

name = pdbfile.split("_")[0]
end = pdbfile.split(".")[1]

except Exception as e:
print(f"error retrieving from path_registry, trying to read file {e}")
return "File not found in path registry. "
Expand Down Expand Up @@ -384,7 +385,7 @@ def _run(self, **input_args) -> str:
self.path_registry.map_path(
file_id, f"{directory}/{file_name}", file_description
)
return f"{file_id} written to {directory}/{file_name}"
return f"File cleaned!\nFile ID:{file_id}\nPath:{directory}/{file_name}"
except FileNotFoundError:
return "Check your file path. File not found."
except Exception as e:
Expand Down
91 changes: 52 additions & 39 deletions mdagent/tools/base_tools/simulation_tools/create_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
from langchain.tools import BaseTool
from pydantic import BaseModel, Field

from mdagent.utils import PathRegistry
from mdagent.utils import FileType, PathRegistry


class ModifyScriptUtils:
llm: Optional[BaseLanguageModel]

def __init__(self, llm):
llm = llm
self.llm = llm

Examples = [
"""
Expand Down Expand Up @@ -90,31 +92,30 @@ def _prompt_summary(self, query: str, llm: BaseLanguageModel = None):
if not llm:
raise ValueError("No language model provided at ModifyScriptTool")

prompt_template = """ You're an expert programmer and in molecular dynamics.
Your job is to make a script to make a simmulatiuon.
in openmm.
Youre starting point is a base script that runs a protein on its own.
The protein itself doesnt require more preperation.
The forcefields, integrator, and constraints are already set up for you.
You need to add lines to fullfill the user requirement.
Your answer has to be the modified script.
Your answer should be a python script.
Dont use ''' to comment out the code, use # instead.
Describe your thoughts and changes before you start writing the script.
The script will be rum as it is, so make it completely.
The format should be as follows:
THOUGHTS: (Your thoughts as an openmm expert with the base script and the query)
CHANGES:(what modifications youre doing to the script)
SCRIPT: (The COMPLETE modified script)
FINAL THOUGHTS: (Optional, Any final thoughts or comments
you have about the script)
Base_SCRIPT:
{base_script}
Question: {query}
"""
prompt_template = (
"You're an expert programmer and in molecular dynamics. "
"Your job is to make a script to make a simmulation "
"in openmm. "
"Youre starting point is a base script that runs a protein on its own. "
"The protein itself doesnt require more preperation. "
"The forcefields, integrator, and constraints are already set up for you. "
"You need to add lines to fullfill the user requirement. "
"Your answer has to be the modified script. "
"Your answer should be a python script. "
"Dont use ''' to comment out the code, use # instead. "
"Describe your thoughts and changes before you start writing the script. "
"The script will be rum as it is, so make it completely. "
"The format should be as follows: "
"THOUGHTS: (Your thoughts as an openmm expert with the base "
"script and the query) \n"
"CHANGES:(what modifications youre doing to the script)\n "
"SCRIPT: (The COMPLETE modified script)\n "
"FINAL THOUGHTS: (Optional, Any final thoughts or comments\n "
"you have about the script\n "
"Base_SCRIPT:\n"
"{base_script} \n"
"Question: {query} "
)

prompt = PromptTemplate(
template=prompt_template, input_variables=["base_script", "query"]
Expand All @@ -134,32 +135,44 @@ def remove_leading_spaces(self, text):
class ModifyScriptInput(BaseModel):
query: str = Field(
...,
description="""Simmulation required by the user.You MUST
specify the objective, requirements of the simulation as well
as on what protein you are working.""",
description=(
"Simmulation required by the user.You MUST "
"specify the objective, requirements of the simulation as well "
"as on what protein you are working."
),
)
script: str = Field(..., description=" simulation ID of the base script file")


class ModifyBaseSimulationScriptTool(BaseTool):
name: str = "ModifyScriptTool"
description: str = """This tool takes a base simulation script and a user
requirement and returns a modified script. """
description: str = (
"This tool takes a base simulation script and a user "
"requirement and returns a modified script. "
)

args_schema = ModifyScriptInput
llm: Optional[BaseLanguageModel]
path_registry: Optional[PathRegistry]

def __init__(self, path_registry: Optional[PathRegistry], llm: BaseLanguageModel):
def __init__(self, path_registry: Optional[PathRegistry], llm):
super().__init__()
self.path_registry = path_registry
self.llm = llm

def _run(self, **input):
def _run(self, *args, **input):
if self.llm is None: # this should not happen
print("No language model provided at ModifyScriptTool")
return "llm not initialized"
if len(args) > 0:
return (
"This tool expects you to provide the input as a "
"dictionary: {'query': 'your query', 'script': 'script id'}"
)

base_script_id = input.get("script")
if not base_script_id:
return """No id provided. The keys for the input are:
'query' and 'script'"""
return "No id provided. The keys for the input are: " "query' and 'script'"
try:
base_script_path = self.path_registry.get_mapped_path(base_script_id)
parts = base_script_path.split("/")
Expand All @@ -170,7 +183,7 @@ def _run(self, **input):
with open(base_script_path, "r") as file:
base_script = file.read()
base_script = "".join(base_script)
utils = ModifyScriptUtils()
utils = ModifyScriptUtils(self.llm)

description = input.get("query")
answer = utils._prompt_summary(
Expand All @@ -186,9 +199,9 @@ def _run(self, **input):
script_content = textwrap.dedent(script_content).strip()
# Write to file
filename = self.path_registry.write_file_name(
type="SIMULATION", Sim_id=base_script_id, modified=True
type=FileType.SIMULATION, Sim_id=base_script_id, modified=True
)
file_id = self.path_registry.get_fileid(filename, type="SIMULATION")
file_id = self.path_registry.get_fileid(filename, type=FileType.SIMULATION)
directory = "files/simulations"
if not os.path.exists(directory):
os.makedirs(directory)
Expand Down
Loading

0 comments on commit 60bb0e6

Please sign in to comment.