Debugging Prior to Eval (#67)

1. add *arg detecter, in modify simul tool. 2. Improve string printing inside the agent with () format 3 1. fix bug in modifyscript tool at init and with registry * 69 dealing with simulfiles (#70) 1. add FileType.RECORD functionality in writefilenames and get ids at the path registry (write name and get id) 2. Add handling temp files in setup and run: if final record files are saved in the path registry, if not they get deleted. 3. fix bug in cleaningtool with pdbfile_name assignment and change the new parameter in setup and run to "save"
ur-whitelab · Jan 25, 2024 · 60bb0e6 · 60bb0e6
1 parent f9939b2
commit 60bb0e6
Show file tree

Hide file tree

Showing 7 changed files with 315 additions and 164 deletions.
diff --git a/mdagent/mainagent/prompt.py b/mdagent/mainagent/prompt.py
@@ -3,37 +3,32 @@
 structured_prompt = PromptTemplate(
     input_variables=["input"],
     template="""
-    You are an expert molecular dynamics scientist and
-    your task is to respond to the question or
-    solve the problem to the best of your ability using
-    the provided tools.
-
-    You can only respond with a single complete
-    "Thought, Action, Action Input" format
-    OR a single "Final Answer" format.
-
-    Complete format:
-
-    Thought: (reflect on your progress and decide what
-    to do next)
-    Action: (the action name, should be the name of a tool)
-    Action Input: (the input string to the action)
-
-    OR
-
-    Final Answer: (the final answer to the original input
-    question)
-
-    Use the tools provided, using the most specific tool
-    available for each action.
-    Once you map a path to a short name, you may only use
-    that short name in future actions.
-    Your final answer should contain all information
-    necessary to answer the question and subquestions.
-    Your thought process should be clean and clear,
-    and you must explicitly state the actions you are taking.
-    Question: {input}
-    """,
+        You are an expert molecular dynamics scientist and
+        your task is to respond to the question or
+        solve the problem to the best of your ability using
+        the provided tools.
+
+        You can only respond with a single complete
+        'Thought, Action, Action Input' format
+        OR a single 'Final Answer' format.
+
+        Complete format:
+        Thought: (reflect on your progress and decide what " "to do next)
+        Action: (the action name, should be the name of a tool)
+        Action Input: (the input string to the action)
+
+        OR
+
+        Final Answer: (the final answer to the original input
+        question)
+
+        Use the tools provided, using the most specific tool
+        available for each action.
+        Your final answer should contain all information
+        necessary to answer the question and subquestions.
+        Your thought process should be clean and clear,
+        and you must explicitly state the actions you are taking.
+        Question: {input} """,
 )
 
 

diff --git a/mdagent/tools/base_tools/preprocess_tools/clean_tools.py b/mdagent/tools/base_tools/preprocess_tools/clean_tools.py
@@ -227,7 +227,6 @@ class CleaningToolFunctionInput(BaseModel):
     """Input model for CleaningToolFunction"""
 
     pdb_id: str = Field(..., description="ID of the pdb/cif file in the path registry")
-    output_path: Optional[str] = Field(..., description="Path to the output file")
     replace_nonstandard_residues: bool = Field(
         True, description="Whether to replace nonstandard residues with standard ones. "
     )
@@ -278,6 +277,7 @@ def _run(self, **input_args) -> str:
             else:
                 input_args = input_args
             pdbfile_id = input_args.get("pdb_id", None)
+            # TODO check if pdbfile_id is a valid pdb_id from the registry
             if pdbfile_id is None:
                 return """No file was provided.
                 The input has to be a dictionary with the key 'pdb_id'"""
@@ -298,10 +298,11 @@ def _run(self, **input_args) -> str:
             try:
                 pdbfile = self.path_registry.get_mapped_path(pdbfile_id)
                 if "/" in pdbfile:
-                    pdbfile_name = pdbfile.split("/")[-1]
-                name = pdbfile_name.split("_")[0]
-                end = pdbfile_name.split(".")[1]
-                print(f"pdbfile: {pdbfile}", f"name: {name}", f"end: {end}")
+                    pdbfile = pdbfile.split("/")[-1]
+
+                name = pdbfile.split("_")[0]
+                end = pdbfile.split(".")[1]
+
             except Exception as e:
                 print(f"error retrieving from path_registry, trying to read file {e}")
                 return "File not found in path registry. "
@@ -384,7 +385,7 @@ def _run(self, **input_args) -> str:
             self.path_registry.map_path(
                 file_id, f"{directory}/{file_name}", file_description
             )
-            return f"{file_id} written to {directory}/{file_name}"
+            return f"File cleaned!\nFile ID:{file_id}\nPath:{directory}/{file_name}"
         except FileNotFoundError:
             return "Check your file path. File not found."
         except Exception as e:

diff --git a/mdagent/tools/base_tools/simulation_tools/create_simulation.py b/mdagent/tools/base_tools/simulation_tools/create_simulation.py
@@ -8,12 +8,14 @@
 from langchain.tools import BaseTool
 from pydantic import BaseModel, Field
 
-from mdagent.utils import PathRegistry
+from mdagent.utils import FileType, PathRegistry
 
 
 class ModifyScriptUtils:
+    llm: Optional[BaseLanguageModel]
+
     def __init__(self, llm):
-        llm = llm
+        self.llm = llm
 
     Examples = [
         """
@@ -90,31 +92,30 @@ def _prompt_summary(self, query: str, llm: BaseLanguageModel = None):
         if not llm:
             raise ValueError("No language model provided at ModifyScriptTool")
 
-        prompt_template = """ You're an expert programmer and in molecular dynamics.
-        Your job is to make a script to make a simmulatiuon.
-        in openmm.
-        Youre starting point is a base script that runs a protein on its own.
-        The protein itself doesnt require more preperation.
-        The forcefields, integrator, and constraints are already set up for you.
-        You need to add lines to fullfill the user requirement.
-        Your answer has to be the modified script.
-        Your answer should be a python script.
-        Dont use ''' to comment out the code, use # instead.
-        Describe your thoughts and changes before you start writing the script.
-        The script will be rum as it is, so make it completely.
-
-        The format should be as follows:
-        THOUGHTS: (Your thoughts as an openmm expert with the base script and the query)
-        CHANGES:(what modifications youre doing to the script)
-        SCRIPT: (The COMPLETE modified script)
-        FINAL THOUGHTS: (Optional, Any final thoughts or comments
-        you have about the script)
-
-
-        Base_SCRIPT:
-        {base_script}
-        Question: {query}
-        """
+        prompt_template = (
+            "You're an expert programmer and in molecular dynamics. "
+            "Your job is to make a script to make a simmulation "
+            "in openmm. "
+            "Youre starting point is a base script that runs a protein on its own. "
+            "The protein itself doesnt require more preperation. "
+            "The forcefields, integrator, and constraints are already set up for you. "
+            "You need to add lines to fullfill the user requirement. "
+            "Your answer has to be the modified script. "
+            "Your answer should be a python script. "
+            "Dont use ''' to comment out the code, use # instead. "
+            "Describe your thoughts and changes before you start writing the script. "
+            "The script will be rum as it is, so make it completely. "
+            "The format should be as follows: "
+            "THOUGHTS: (Your thoughts as an openmm expert with the base "
+            "script and the query) \n"
+            "CHANGES:(what modifications youre doing to the script)\n "
+            "SCRIPT: (The COMPLETE modified script)\n "
+            "FINAL THOUGHTS: (Optional, Any final thoughts or comments\n "
+            "you have about the script\n "
+            "Base_SCRIPT:\n"
+            "{base_script} \n"
+            "Question: {query} "
+        )
 
         prompt = PromptTemplate(
             template=prompt_template, input_variables=["base_script", "query"]
@@ -134,32 +135,44 @@ def remove_leading_spaces(self, text):
 class ModifyScriptInput(BaseModel):
     query: str = Field(
         ...,
-        description="""Simmulation required by the user.You MUST
-                    specify the objective, requirements of the simulation as well
-                    as on what protein you are working.""",
+        description=(
+            "Simmulation required by the user.You MUST "
+            "specify the objective, requirements of the simulation as well "
+            "as on what protein you are working."
+        ),
     )
     script: str = Field(..., description=" simulation ID of the base script file")
 
 
 class ModifyBaseSimulationScriptTool(BaseTool):
     name: str = "ModifyScriptTool"
-    description: str = """This tool takes a base simulation script and a user
-    requirement and returns a modified script. """
+    description: str = (
+        "This tool takes a base simulation script and a user "
+        "requirement and returns a modified script. "
+    )
 
     args_schema = ModifyScriptInput
     llm: Optional[BaseLanguageModel]
     path_registry: Optional[PathRegistry]
 
-    def __init__(self, path_registry: Optional[PathRegistry], llm: BaseLanguageModel):
+    def __init__(self, path_registry: Optional[PathRegistry], llm):
         super().__init__()
         self.path_registry = path_registry
         self.llm = llm
 
-    def _run(self, **input):
+    def _run(self, *args, **input):
+        if self.llm is None:  # this should not happen
+            print("No language model provided at ModifyScriptTool")
+            return "llm not initialized"
+        if len(args) > 0:
+            return (
+                "This tool expects you to provide the input as a "
+                "dictionary: {'query': 'your query', 'script': 'script id'}"
+            )
+
         base_script_id = input.get("script")
         if not base_script_id:
-            return """No id provided. The keys for the input are:
-             'query' and 'script'"""
+            return "No id provided. The keys for the input are: " "query' and 'script'"
         try:
             base_script_path = self.path_registry.get_mapped_path(base_script_id)
             parts = base_script_path.split("/")
@@ -170,7 +183,7 @@ def _run(self, **input):
         with open(base_script_path, "r") as file:
             base_script = file.read()
         base_script = "".join(base_script)
-        utils = ModifyScriptUtils()
+        utils = ModifyScriptUtils(self.llm)
 
         description = input.get("query")
         answer = utils._prompt_summary(
@@ -186,9 +199,9 @@ def _run(self, **input):
         script_content = textwrap.dedent(script_content).strip()
         # Write to file
         filename = self.path_registry.write_file_name(
-            type="SIMULATION", Sim_id=base_script_id, modified=True
+            type=FileType.SIMULATION, Sim_id=base_script_id, modified=True
         )
-        file_id = self.path_registry.get_fileid(filename, type="SIMULATION")
+        file_id = self.path_registry.get_fileid(filename, type=FileType.SIMULATION)
         directory = "files/simulations"
         if not os.path.exists(directory):
             os.makedirs(directory)