merge main into branch

ur-whitelab · Feb 7, 2024 · 4637619 · 4637619
2 parents d2d7b58 + a56cfe8
commit 4637619
Show file tree

Hide file tree

Showing 29 changed files with 1,447 additions and 896 deletions.
diff --git a/README.md b/README.md
@@ -23,6 +23,13 @@ Other tools require API keys, such as paper-qa for literature searches. We recom
 1. Copy the `.env.example` file and rename it to `.env`: `cp .env.example .env`
 2. Replace the placeholder values in `.env` with your actual keys
 
+## Using Streamlit Interface
+If you'd like to use MDAgent via the streamlit app, make sure you have completed the steps above. Then, in your terminal, run `streamlit run st_app.py` in the project root directory.
+
+From there you may upload files to use during the run. Note: the app is currently limited to uploading .pdb and .cif files, and the max size is defaulted at 200MB.
+- To upload larger files, instead run `streamlit run st_app.py --server.maxUploadSize=some_large_number`
+- To add different file types, you can add your desired file type to the list in the [streamlit app file](https://github.com/ur-whitelab/md-agent/blob/main/st_app.py).
+
 
 ## Contributing
 

diff --git a/mdagent/mainagent/agent.py b/mdagent/mainagent/agent.py
@@ -7,7 +7,7 @@
 from mdagent.subagents import SubAgentSettings
 from mdagent.utils import PathRegistry, _make_llm
 
-from ..tools import make_all_tools
+from ..tools import get_tools, make_all_tools
 from .prompt import openaifxn_prompt, structured_prompt
 
 load_dotenv()
@@ -35,7 +35,7 @@ class MDAgent:
     def __init__(
         self,
         tools=None,
-        agent_type="OpenAIFunctionsAgent",  # this can also be strucured_chat
+        agent_type="OpenAIFunctionsAgent",  # this can also be structured_chat
         model="gpt-4-1106-preview",  # current name for gpt-4 turbo
         tools_model="gpt-4-1106-preview",
         temp=0.1,
@@ -45,14 +45,21 @@ def __init__(
         subagents_model="gpt-4-1106-preview",
         ckpt_dir="ckpt",
         resume=False,
-        top_k_tools=10,
+        top_k_tools=20,  # set "all" if you want to use all tools (& skills if resume)
         use_human_tool=False,
+        uploaded_files=[],  # user input files to add to path registry
     ):
         if path_registry is None:
             path_registry = PathRegistry.get_instance()
-        if tools is None:
-            tools_llm = _make_llm(tools_model, temp, verbose)
-            tools = make_all_tools(tools_llm, human=use_human_tool)
+        self.uploaded_files = uploaded_files
+        for file in uploaded_files:  # todo -> allow users to add descriptions?
+            path_registry.map_path(file, file, description="User uploaded file")
+
+        self.agent_type = agent_type
+        self.user_tools = tools
+        self.tools_llm = _make_llm(tools_model, temp, verbose)
+        self.top_k_tools = top_k_tools
+        self.use_human_tool = use_human_tool
 
         self.llm = ChatOpenAI(
             temperature=temp,
@@ -61,11 +68,7 @@ def __init__(
             streaming=True,
             callbacks=[StreamingStdOutCallbackHandler()],
         )
-        self.agent = AgentExecutor.from_agent_and_tools(
-            tools=tools,
-            agent=AgentType.get_agent(agent_type).from_llm_and_tools(self.llm, tools),
-            handle_parsing_errors=True,
-        )
+
         # assign prompt
         if agent_type == "Structured":
             self.prompt = structured_prompt
@@ -80,9 +83,37 @@ def __init__(
             verbose=verbose,
             ckpt_dir=ckpt_dir,
             resume=resume,
-            retrieval_top_k=top_k_tools,
+        )
+
+    def _initialize_tools_and_agent(self, user_input=None):
+        """Retrieve tools and initialize the agent."""
+        if self.user_tools is not None:
+            self.tools = self.user_tools
+        else:
+            if self.top_k_tools != "all" and user_input is not None:
+                # retrieve only tools relevant to user input
+                self.tools = get_tools(
+                    query=user_input,
+                    llm=self.tools_llm,
+                    subagent_settings=self.subagents_settings,
+                    human=self.use_human_tool,
+                )
+            else:
+                # retrieve all tools, including new tools if any
+                self.tools = make_all_tools(
+                    self.tools_llm,
+                    subagent_settings=self.subagents_settings,
+                    human=self.use_human_tool,
+                )
+        return AgentExecutor.from_agent_and_tools(
+            tools=self.tools,
+            agent=AgentType.get_agent(self.agent_type).from_llm_and_tools(
+                self.llm,
+                self.tools,
+            ),
+            handle_parsing_errors=True,
         )
 
     def run(self, user_input, callbacks=None):
-        # todo: check this for both agent types
+        self.agent = self._initialize_tools_and_agent(user_input)
         return self.agent.run(self.prompt.format(input=user_input), callbacks=callbacks)
diff --git a/mdagent/mainagent/prompt.py b/mdagent/mainagent/prompt.py
@@ -3,37 +3,32 @@
 structured_prompt = PromptTemplate(
     input_variables=["input"],
     template="""
-    You are an expert molecular dynamics scientist and
-    your task is to respond to the question or
-    solve the problem to the best of your ability using
-    the provided tools.
-
-    You can only respond with a single complete
-    "Thought, Action, Action Input" format
-    OR a single "Final Answer" format.
-
-    Complete format:
-
-    Thought: (reflect on your progress and decide what
-    to do next)
-    Action: (the action name, should be the name of a tool)
-    Action Input: (the input string to the action)
-
-    OR
-
-    Final Answer: (the final answer to the original input
-    question)
-
-    Use the tools provided, using the most specific tool
-    available for each action.
-    Once you map a path to a short name, you may only use
-    that short name in future actions.
-    Your final answer should contain all information
-    necessary to answer the question and subquestions.
-    Your thought process should be clean and clear,
-    and you must explicitly state the actions you are taking.
-    Question: {input}
-    """,
+        You are an expert molecular dynamics scientist and
+        your task is to respond to the question or
+        solve the problem to the best of your ability using
+        the provided tools.
+
+        You can only respond with a single complete
+        'Thought, Action, Action Input' format
+        OR a single 'Final Answer' format.
+
+        Complete format:
+        Thought: (reflect on your progress and decide what " "to do next)
+        Action: (the action name, should be the name of a tool)
+        Action Input: (the input string to the action)
+
+        OR
+
+        Final Answer: (the final answer to the original input
+        question)
+
+        Use the tools provided, using the most specific tool
+        available for each action.
+        Your final answer should contain all information
+        necessary to answer the question and subquestions.
+        Your thought process should be clean and clear,
+        and you must explicitly state the actions you are taking.
+        Question: {input} """,
 )
 
 

diff --git a/mdagent/subagents/agents/skill.py b/mdagent/subagents/agents/skill.py
@@ -149,7 +149,7 @@ def update_skill_library(self, function, code_script, description, arguments):
         )
         self.vectordb.persist()
 
-    def execute_skill_function(self, tool_name, path_registry, **kwargs):
+    def execute_skill_function(self, tool_name, **kwargs):
         code = self.skills.get(tool_name, {}).get("code", None)
         if not code:
             raise ValueError(
@@ -158,7 +158,7 @@ def execute_skill_function(self, tool_name, path_registry, **kwargs):
             )
         # capture initial state
         initial_files = set(os.listdir("."))
-        initial_registry = path_registry.list_path_names()
+        initial_registry = self.path_registry.list_path_names()
 
         try:
             self._check_arguments(tool_name, **kwargs)
@@ -172,7 +172,7 @@ def execute_skill_function(self, tool_name, path_registry, **kwargs):
         # capture final state
         new_files = list(set(os.listdir(".")) - initial_files)
         new_registry = list(
-            set(path_registry.list_path_names()) - set(initial_registry)
+            set(self.path_registry.list_path_names()) - set(initial_registry)
         )
         message = "Successfully executed code."
         if new_files:

diff --git a/mdagent/subagents/subagent_fxns.py b/mdagent/subagents/subagent_fxns.py
@@ -2,22 +2,21 @@
 import os
 from typing import Optional
 
-from mdagent.utils import PathRegistry
+import streamlit as st
 
 from .subagent_setup import SubAgentInitializer, SubAgentSettings
 
 
 class Iterator:
     def __init__(
         self,
-        path_registry: Optional[PathRegistry],
         subagent_settings: Optional[SubAgentSettings],
         all_tools_string: Optional[str] = None,
         current_tools: Optional[dict] = None,
     ):
-        self.path_registry = path_registry
         if subagent_settings is None:
             raise ValueError("Subagent settings cannot be None")  # shouldn't happen
+        self.path_registry = subagent_settings.path_registry
         self.ckpt_dir = subagent_settings.ckpt_dir
         self.all_tools_string = all_tools_string
         self.current_tools = current_tools
@@ -79,6 +78,7 @@ def _run_loop(self, task, full_history, skills):
         """
         critique = None
         print("\n\033[46m action agent is running, writing code\033[0m")
+        st.markdown("action agent is running, writing code", unsafe_allow_html=True)
         success, code, fxn_name, code_output = self.action._run_code(
             full_history, task, skills
         )
@@ -129,12 +129,20 @@ def _run_iterations(self, run, task):
 
                 # give successful code to tool/skill manager
                 print("\n\033[46mThe new code is complete, running skill agent\033[0m")
+                st.markdown(
+                    "The new code is complete, running skill agent",
+                    unsafe_allow_html=True,
+                )
                 tool_name = self.skill.add_new_tool(fxn_name, code)
                 return success, tool_name
             iter += 1
 
         # if max iterations reached without success, save failures to file
         print("\n\033[46m Max iterations reached, saving failed history to file\033[0m")
+        st.markdown(
+            "Max iterations reached, saving failed history to file",
+            unsafe_allow_html=True,
+        )
         tool_name = None
         full_failed = self._add_to_history(
             full_history,

diff --git a/mdagent/tools/base_tools/__init__.py b/mdagent/tools/base_tools/__init__.py
@@ -3,9 +3,8 @@
 from .analysis_tools.rmsd_tools import RMSDCalculator
 from .analysis_tools.vis_tools import (
     CheckDirectoryFiles,
-    PlanBVisualizationTool,
     VisFunctions,
-    VisualizationToolRender,
+    VisualizeProtein,
 )
 from .preprocess_tools.clean_tools import (
     AddHydrogensCleaningTool,
@@ -36,7 +35,7 @@
     "Name2PDBTool",
     "PackMolTool",
     "PPIDistance",
-    "PlanBVisualizationTool",
+    "VisualizeProtein",
     "RMSDCalculator",
     "RemoveWaterCleaningTool",
     "Scholar2ResultLLM",
@@ -46,7 +45,6 @@
     "SimulationOutputFigures",
     "SpecializedCleanTool",
     "VisFunctions",
-    "VisualizationToolRender",
     "get_pdb",
     "CleaningToolFunction",
     "SetUpandRunFunction",

diff --git a/mdagent/tools/base_tools/analysis_tools/__init__.py b/mdagent/tools/base_tools/analysis_tools/__init__.py
@@ -1,19 +1,13 @@
 from .plot_tools import SimulationOutputFigures
 from .ppi_tools import PPIDistance
 from .rmsd_tools import RMSDCalculator
-from .vis_tools import (
-    CheckDirectoryFiles,
-    PlanBVisualizationTool,
-    VisFunctions,
-    VisualizationToolRender,
-)
+from .vis_tools import CheckDirectoryFiles, VisFunctions, VisualizeProtein
 
 __all__ = [
     "PPIDistance",
     "RMSDCalculator",
     "SimulationOutputFigures",
     "CheckDirectoryFiles",
-    "PlanBVisualizationTool",
+    "VisualizeProtein",
     "VisFunctions",
-    "VisualizationToolRender",
 ]
diff --git a/mdagent/tools/base_tools/analysis_tools/plot_tools.py b/mdagent/tools/base_tools/analysis_tools/plot_tools.py
@@ -1,9 +1,12 @@
 import csv
 import re
+from typing import Optional
 
 import matplotlib.pyplot as plt
 from langchain.tools import BaseTool
 
+from mdagent.utils import PathRegistry
+
 
 def process_csv(file_name):
     with open(file_name, "r") as f:
@@ -24,57 +27,55 @@ def plot_data(data, headers, matched_headers):
     # Get the first matched header
     if matched_headers:
         time_or_step = matched_headers[0][1]
+        xlab = "step" if "step" in time_or_step.lower() else "time"
     else:
         print("No 'step' or 'time' headers found.")
         return
 
     failed_headers = []
-
     created_plots = []
-    # For each header (except the time/step one), plot time/step vs that header
-    header_count = 0
     for header in headers:
         if header != time_or_step:
-            header_count += 1
             try:
-                # Extract the data for the x and y axes
                 x = [float(row[time_or_step]) for row in data]
                 y = [float(row[header]) for row in data]
 
-                if "step" in time_or_step.lower():
-                    xlab = "step"
-                if "(" in header:
-                    header_lab = (header.split("(")[0]).strip()
-                # Generate the plot
+                header_lab = (
+                    header.split("(")[0].strip() if "(" in header else header
+                ).lower()
+                plot_name = f"{xlab}_vs_{header_lab}.png"
+
+                # Generate and save the plot
                 plt.figure()
                 plt.plot(x, y)
                 plt.xlabel(xlab)
                 plt.ylabel(header)
                 plt.title(f"{xlab} vs {header_lab}")
-
-                # Save the figure
-                plt.savefig(f"{xlab}_vs_{header_lab}.png")
+                plt.savefig(plot_name)
                 plt.close()
-                created_plots.append(f"{xlab}_vs_{header_lab}.png")
-            except ValueError:  # If data cannot be converted to float
+
+                created_plots.append(plot_name)
+            except ValueError:
                 failed_headers.append(header)
 
-    # If all plots failed, raise an exception
-    if len(failed_headers) == len(headers) - header_count:
+    if len(failed_headers) == len(headers) - 1:  # -1 to account for time_or_step header
         raise Exception("All plots failed due to non-numeric data.")
+
     return ", ".join(created_plots)
 
 
 class SimulationOutputFigures(BaseTool):
     name = "PostSimulationFigures"
     description = """This tool will take
-    a csv file output from an openmm
+    a csv file id output from an openmm
     simulation and create figures for
     all physical parameters
     versus timestep of the simulation.
     Give this tool the path to the
     csv file output from the simulation."""
 
+    path_registry: Optional[PathRegistry]
+
     def _run(self, file_path: str) -> str:
         """use the tool."""
         try: