Skip to content

Commit

Permalink
merge main into branch
Browse files Browse the repository at this point in the history
  • Loading branch information
SamCox822 committed Feb 7, 2024
2 parents d2d7b58 + a56cfe8 commit 4637619
Show file tree
Hide file tree
Showing 29 changed files with 1,447 additions and 896 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ Other tools require API keys, such as paper-qa for literature searches. We recom
1. Copy the `.env.example` file and rename it to `.env`: `cp .env.example .env`
2. Replace the placeholder values in `.env` with your actual keys

## Using Streamlit Interface
If you'd like to use MDAgent via the streamlit app, make sure you have completed the steps above. Then, in your terminal, run `streamlit run st_app.py` in the project root directory.

From there you may upload files to use during the run. Note: the app is currently limited to uploading .pdb and .cif files, and the max size is defaulted at 200MB.
- To upload larger files, instead run `streamlit run st_app.py --server.maxUploadSize=some_large_number`
- To add different file types, you can add your desired file type to the list in the [streamlit app file](https://github.com/ur-whitelab/md-agent/blob/main/st_app.py).


## Contributing

Expand Down
57 changes: 44 additions & 13 deletions mdagent/mainagent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from mdagent.subagents import SubAgentSettings
from mdagent.utils import PathRegistry, _make_llm

from ..tools import make_all_tools
from ..tools import get_tools, make_all_tools
from .prompt import openaifxn_prompt, structured_prompt

load_dotenv()
Expand Down Expand Up @@ -35,7 +35,7 @@ class MDAgent:
def __init__(
self,
tools=None,
agent_type="OpenAIFunctionsAgent", # this can also be strucured_chat
agent_type="OpenAIFunctionsAgent", # this can also be structured_chat
model="gpt-4-1106-preview", # current name for gpt-4 turbo
tools_model="gpt-4-1106-preview",
temp=0.1,
Expand All @@ -45,14 +45,21 @@ def __init__(
subagents_model="gpt-4-1106-preview",
ckpt_dir="ckpt",
resume=False,
top_k_tools=10,
top_k_tools=20, # set "all" if you want to use all tools (& skills if resume)
use_human_tool=False,
uploaded_files=[], # user input files to add to path registry
):
if path_registry is None:
path_registry = PathRegistry.get_instance()
if tools is None:
tools_llm = _make_llm(tools_model, temp, verbose)
tools = make_all_tools(tools_llm, human=use_human_tool)
self.uploaded_files = uploaded_files
for file in uploaded_files: # todo -> allow users to add descriptions?
path_registry.map_path(file, file, description="User uploaded file")

self.agent_type = agent_type
self.user_tools = tools
self.tools_llm = _make_llm(tools_model, temp, verbose)
self.top_k_tools = top_k_tools
self.use_human_tool = use_human_tool

self.llm = ChatOpenAI(
temperature=temp,
Expand All @@ -61,11 +68,7 @@ def __init__(
streaming=True,
callbacks=[StreamingStdOutCallbackHandler()],
)
self.agent = AgentExecutor.from_agent_and_tools(
tools=tools,
agent=AgentType.get_agent(agent_type).from_llm_and_tools(self.llm, tools),
handle_parsing_errors=True,
)

# assign prompt
if agent_type == "Structured":
self.prompt = structured_prompt
Expand All @@ -80,9 +83,37 @@ def __init__(
verbose=verbose,
ckpt_dir=ckpt_dir,
resume=resume,
retrieval_top_k=top_k_tools,
)

def _initialize_tools_and_agent(self, user_input=None):
"""Retrieve tools and initialize the agent."""
if self.user_tools is not None:
self.tools = self.user_tools
else:
if self.top_k_tools != "all" and user_input is not None:
# retrieve only tools relevant to user input
self.tools = get_tools(
query=user_input,
llm=self.tools_llm,
subagent_settings=self.subagents_settings,
human=self.use_human_tool,
)
else:
# retrieve all tools, including new tools if any
self.tools = make_all_tools(
self.tools_llm,
subagent_settings=self.subagents_settings,
human=self.use_human_tool,
)
return AgentExecutor.from_agent_and_tools(
tools=self.tools,
agent=AgentType.get_agent(self.agent_type).from_llm_and_tools(
self.llm,
self.tools,
),
handle_parsing_errors=True,
)

def run(self, user_input, callbacks=None):
# todo: check this for both agent types
self.agent = self._initialize_tools_and_agent(user_input)
return self.agent.run(self.prompt.format(input=user_input), callbacks=callbacks)
57 changes: 26 additions & 31 deletions mdagent/mainagent/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,32 @@
structured_prompt = PromptTemplate(
input_variables=["input"],
template="""
You are an expert molecular dynamics scientist and
your task is to respond to the question or
solve the problem to the best of your ability using
the provided tools.
You can only respond with a single complete
"Thought, Action, Action Input" format
OR a single "Final Answer" format.
Complete format:
Thought: (reflect on your progress and decide what
to do next)
Action: (the action name, should be the name of a tool)
Action Input: (the input string to the action)
OR
Final Answer: (the final answer to the original input
question)
Use the tools provided, using the most specific tool
available for each action.
Once you map a path to a short name, you may only use
that short name in future actions.
Your final answer should contain all information
necessary to answer the question and subquestions.
Your thought process should be clean and clear,
and you must explicitly state the actions you are taking.
Question: {input}
""",
You are an expert molecular dynamics scientist and
your task is to respond to the question or
solve the problem to the best of your ability using
the provided tools.
You can only respond with a single complete
'Thought, Action, Action Input' format
OR a single 'Final Answer' format.
Complete format:
Thought: (reflect on your progress and decide what " "to do next)
Action: (the action name, should be the name of a tool)
Action Input: (the input string to the action)
OR
Final Answer: (the final answer to the original input
question)
Use the tools provided, using the most specific tool
available for each action.
Your final answer should contain all information
necessary to answer the question and subquestions.
Your thought process should be clean and clear,
and you must explicitly state the actions you are taking.
Question: {input} """,
)


Expand Down
6 changes: 3 additions & 3 deletions mdagent/subagents/agents/skill.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def update_skill_library(self, function, code_script, description, arguments):
)
self.vectordb.persist()

def execute_skill_function(self, tool_name, path_registry, **kwargs):
def execute_skill_function(self, tool_name, **kwargs):
code = self.skills.get(tool_name, {}).get("code", None)
if not code:
raise ValueError(
Expand All @@ -158,7 +158,7 @@ def execute_skill_function(self, tool_name, path_registry, **kwargs):
)
# capture initial state
initial_files = set(os.listdir("."))
initial_registry = path_registry.list_path_names()
initial_registry = self.path_registry.list_path_names()

try:
self._check_arguments(tool_name, **kwargs)
Expand All @@ -172,7 +172,7 @@ def execute_skill_function(self, tool_name, path_registry, **kwargs):
# capture final state
new_files = list(set(os.listdir(".")) - initial_files)
new_registry = list(
set(path_registry.list_path_names()) - set(initial_registry)
set(self.path_registry.list_path_names()) - set(initial_registry)
)
message = "Successfully executed code."
if new_files:
Expand Down
14 changes: 11 additions & 3 deletions mdagent/subagents/subagent_fxns.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,21 @@
import os
from typing import Optional

from mdagent.utils import PathRegistry
import streamlit as st

from .subagent_setup import SubAgentInitializer, SubAgentSettings


class Iterator:
def __init__(
self,
path_registry: Optional[PathRegistry],
subagent_settings: Optional[SubAgentSettings],
all_tools_string: Optional[str] = None,
current_tools: Optional[dict] = None,
):
self.path_registry = path_registry
if subagent_settings is None:
raise ValueError("Subagent settings cannot be None") # shouldn't happen
self.path_registry = subagent_settings.path_registry
self.ckpt_dir = subagent_settings.ckpt_dir
self.all_tools_string = all_tools_string
self.current_tools = current_tools
Expand Down Expand Up @@ -79,6 +78,7 @@ def _run_loop(self, task, full_history, skills):
"""
critique = None
print("\n\033[46m action agent is running, writing code\033[0m")
st.markdown("action agent is running, writing code", unsafe_allow_html=True)
success, code, fxn_name, code_output = self.action._run_code(
full_history, task, skills
)
Expand Down Expand Up @@ -129,12 +129,20 @@ def _run_iterations(self, run, task):

# give successful code to tool/skill manager
print("\n\033[46mThe new code is complete, running skill agent\033[0m")
st.markdown(
"The new code is complete, running skill agent",
unsafe_allow_html=True,
)
tool_name = self.skill.add_new_tool(fxn_name, code)
return success, tool_name
iter += 1

# if max iterations reached without success, save failures to file
print("\n\033[46m Max iterations reached, saving failed history to file\033[0m")
st.markdown(
"Max iterations reached, saving failed history to file",
unsafe_allow_html=True,
)
tool_name = None
full_failed = self._add_to_history(
full_history,
Expand Down
6 changes: 2 additions & 4 deletions mdagent/tools/base_tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
from .analysis_tools.rmsd_tools import RMSDCalculator
from .analysis_tools.vis_tools import (
CheckDirectoryFiles,
PlanBVisualizationTool,
VisFunctions,
VisualizationToolRender,
VisualizeProtein,
)
from .preprocess_tools.clean_tools import (
AddHydrogensCleaningTool,
Expand Down Expand Up @@ -36,7 +35,7 @@
"Name2PDBTool",
"PackMolTool",
"PPIDistance",
"PlanBVisualizationTool",
"VisualizeProtein",
"RMSDCalculator",
"RemoveWaterCleaningTool",
"Scholar2ResultLLM",
Expand All @@ -46,7 +45,6 @@
"SimulationOutputFigures",
"SpecializedCleanTool",
"VisFunctions",
"VisualizationToolRender",
"get_pdb",
"CleaningToolFunction",
"SetUpandRunFunction",
Expand Down
10 changes: 2 additions & 8 deletions mdagent/tools/base_tools/analysis_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
from .plot_tools import SimulationOutputFigures
from .ppi_tools import PPIDistance
from .rmsd_tools import RMSDCalculator
from .vis_tools import (
CheckDirectoryFiles,
PlanBVisualizationTool,
VisFunctions,
VisualizationToolRender,
)
from .vis_tools import CheckDirectoryFiles, VisFunctions, VisualizeProtein

__all__ = [
"PPIDistance",
"RMSDCalculator",
"SimulationOutputFigures",
"CheckDirectoryFiles",
"PlanBVisualizationTool",
"VisualizeProtein",
"VisFunctions",
"VisualizationToolRender",
]
37 changes: 19 additions & 18 deletions mdagent/tools/base_tools/analysis_tools/plot_tools.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import csv
import re
from typing import Optional

import matplotlib.pyplot as plt
from langchain.tools import BaseTool

from mdagent.utils import PathRegistry


def process_csv(file_name):
with open(file_name, "r") as f:
Expand All @@ -24,57 +27,55 @@ def plot_data(data, headers, matched_headers):
# Get the first matched header
if matched_headers:
time_or_step = matched_headers[0][1]
xlab = "step" if "step" in time_or_step.lower() else "time"
else:
print("No 'step' or 'time' headers found.")
return

failed_headers = []

created_plots = []
# For each header (except the time/step one), plot time/step vs that header
header_count = 0
for header in headers:
if header != time_or_step:
header_count += 1
try:
# Extract the data for the x and y axes
x = [float(row[time_or_step]) for row in data]
y = [float(row[header]) for row in data]

if "step" in time_or_step.lower():
xlab = "step"
if "(" in header:
header_lab = (header.split("(")[0]).strip()
# Generate the plot
header_lab = (
header.split("(")[0].strip() if "(" in header else header
).lower()
plot_name = f"{xlab}_vs_{header_lab}.png"

# Generate and save the plot
plt.figure()
plt.plot(x, y)
plt.xlabel(xlab)
plt.ylabel(header)
plt.title(f"{xlab} vs {header_lab}")

# Save the figure
plt.savefig(f"{xlab}_vs_{header_lab}.png")
plt.savefig(plot_name)
plt.close()
created_plots.append(f"{xlab}_vs_{header_lab}.png")
except ValueError: # If data cannot be converted to float

created_plots.append(plot_name)
except ValueError:
failed_headers.append(header)

# If all plots failed, raise an exception
if len(failed_headers) == len(headers) - header_count:
if len(failed_headers) == len(headers) - 1: # -1 to account for time_or_step header
raise Exception("All plots failed due to non-numeric data.")

return ", ".join(created_plots)


class SimulationOutputFigures(BaseTool):
name = "PostSimulationFigures"
description = """This tool will take
a csv file output from an openmm
a csv file id output from an openmm
simulation and create figures for
all physical parameters
versus timestep of the simulation.
Give this tool the path to the
csv file output from the simulation."""

path_registry: Optional[PathRegistry]

def _run(self, file_path: str) -> str:
"""use the tool."""
try:
Expand Down
Loading

0 comments on commit 4637619

Please sign in to comment.