need to update my hydrogen bonding branch with changes in main for a…

…ccuracy. Merge branch 'main' of https://github.com/ur-whitelab/md-agent into hydrogen_bonding
ur-whitelab · Oct 7, 2024 · b36f645 · b36f645
2 parents 660758e + f4cecf4
commit b36f645
Show file tree

Hide file tree

Showing 65 changed files with 1,624 additions and 9,743 deletions.
diff --git a/.env.example b/.env.example
@@ -1,8 +1,11 @@
 # Copy this file to a new file named .env and replace the placeholders with your actual keys.
+# REMOVE "pragma: allowlist secret" when you replace with actual keys.
 # DO NOT fill your keys directly into this file.
 
 # OpenAI API Key
 OPENAI_API_KEY=YOUR_OPENAI_API_KEY_GOES_HERE  # pragma: allowlist secret
 
-# Serp API key
-SERP_API_KEY=YOUR_SERP_API_KEY_GOES_HERE  # pragma: allowlist secret
+# PQA API Key to use LiteratureSearch tool (optional) -- it also requires OpenAI key
+PQA_API_KEY=YOUR_PQA_API_KEY_GOES_HERE # pragma: allowlist secret
+
+# Optional: add TogetherAI, Fireworks, or Anthropic API key here to use their models
diff --git a/README.md b/README.md
@@ -1,38 +1,48 @@
-MD-Agent is a LLM-agent based toolset for Molecular Dynamics.
+MDAgent is a LLM-agent based toolset for Molecular Dynamics.
 It's built using Langchain and uses a collection of tools to set up and execute molecular dynamics simulations, particularly in OpenMM.
 
 
 ## Environment Setup
 To use the OpenMM features in the agent, please set up a conda environment, following these steps.
-- Create conda environment: `conda env create -n mdagent -f environment.yaml`
-- Activate your environment: `conda activate mdagent`
+```
+conda env create -n mdagent -f environment.yaml
+conda activate mdagent
+```
+
+If you already have a conda environment, you can install dependencies before you activate it with the following step.
+- Install the necessary conda dependencies: `conda env update -n <YOUR_CONDA_ENV_HERE> -f environment.yaml`
 
-If you already have a conda environment, you can install dependencies with the following step.
-- Install the necessary conda dependencies: `conda install -c conda-forge openmm pdbfixer mdtraj`
 
 
 ## Installation
 ```
 pip install git+https://github.com/ur-whitelab/md-agent.git
 ```
 
-
 ## Usage
-The first step is to set up your API keys in your environment. An OpenAI key is necessary for this project.
+The next step is to set up your API keys in your environment. An API key for LLM provider is necessary for this project. Supported LLM providers are OpenAI, TogetherAI, Fireworks, and Anthropic.
 Other tools require API keys, such as paper-qa for literature searches. We recommend setting up the keys in a .env file. You can use the provided .env.example file as a template.
 1. Copy the `.env.example` file and rename it to `.env`: `cp .env.example .env`
 2. Replace the placeholder values in `.env` with your actual keys
 
-<!-- ## Using Streamlit Interface
-If you'd like to use MDAgent via the streamlit app, make sure you have completed the steps above. Then, in your terminal, run `streamlit run st_app.py` in the project root directory.
+You can ask MDAgent to conduct molecular dynamics tasks using OpenAI's GPT model
+```
+from mdagent import MDAgent
+
+agent = MDAgent(model="gpt-3.5-turbo")
+agent.run("Simulate protein 1ZNI at 300 K for 0.1 ps and calculate the RMSD over time.")
+```
+Note: to distinguish Together models from the rest, you'll need to add "together\" prefix in model flag, such as `agent = MDAgent(model="together/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo")`
 
-From there you may upload files to use during the run. Note: the app is currently limited to uploading .pdb and .cif files, and the max size is defaulted at 200MB.
-- To upload larger files, instead run `streamlit run st_app.py --server.maxUploadSize=some_large_number`
-- To add different file types, you can add your desired file type to the list in the [streamlit app file](https://github.com/ur-whitelab/md-agent/blob/main/st_app.py). -->
+## LLM Providers
+By default, we support LLMs through OpenAI API. However, feel free to use other LLM providers. Make sure to install the necessary package for it. Here's list of packages required for alternative LLM providers we support:
+- `pip install langchain-together` to use models from TogetherAI
+- `pip install langchain-anthropic` to use models from Anthropic
+- `pip install langchain-fireworks` to use models from Fireworks
 
 
 ## Contributing
 
-We welcome contributions to MD-Agent! If you're interested in contributing to the project, please check out our [Contributor's Guide](CONTRIBUTING.md) for detailed instructions on getting started, feature development, and the pull request process.
+We welcome contributions to MDAgent! If you're interested in contributing to the project, please check out our [Contributor's Guide](CONTRIBUTING.md) for detailed instructions on getting started, feature development, and the pull request process.
 
-We value and appreciate all contributions to MD-Agent.
+We value and appreciate all contributions to MDAgent.
diff --git a/environment.yaml b/environment.yaml
@@ -5,6 +5,8 @@ dependencies:
   - openmm >= 7.6
   - pdbfixer >= 1.5
   - mdtraj
+  - openff-toolkit
+  - openmmforcefields
   - pip
   - pip:
     - flake8

diff --git a/mdagent/agent/agent.py b/mdagent/agent/agent.py
@@ -3,13 +3,11 @@
 from dotenv import load_dotenv
 from langchain.agents import AgentExecutor, OpenAIFunctionsAgent
 from langchain.agents.structured_chat.base import StructuredChatAgent
-from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from langchain.chat_models import ChatOpenAI
 
-from ..tools import get_tools, make_all_tools
+from ..tools import get_relevant_tools, make_all_tools
 from ..utils import PathRegistry, SetCheckpoint, _make_llm
 from .memory import MemoryManager
-from .query_filter import make_prompt
+from .prompt import openaifxn_prompt, structured_prompt
 
 load_dotenv()
 
@@ -38,20 +36,26 @@ def __init__(
         tools=None,
         agent_type="OpenAIFunctionsAgent",  # this can also be structured_chat
         model="gpt-4-1106-preview",  # current name for gpt-4 turbo
-        tools_model="gpt-4-1106-preview",
+        tools_model=None,
         temp=0.1,
-        verbose=True,
+        streaming=True,
+        verbose=False,
         ckpt_dir="ckpt",
         top_k_tools=20,  # set "all" if you want to use all tools
         use_human_tool=False,
         uploaded_files=[],  # user input files to add to path registry
         run_id="",
-        use_memory=True,
+        use_memory=False,
     ):
+        self.llm = _make_llm(model, temp, streaming)
+        if tools_model is None:
+            tools_model = model
+        self.tools_llm = _make_llm(tools_model, temp, streaming)
+
         self.use_memory = use_memory
         self.path_registry = PathRegistry.get_instance(ckpt_dir=ckpt_dir)
         self.ckpt_dir = self.path_registry.ckpt_dir
-        self.memory = MemoryManager(self.path_registry, run_id=run_id)
+        self.memory = MemoryManager(self.path_registry, self.tools_llm, run_id=run_id)
         self.run_id = self.memory.run_id
 
         self.uploaded_files = uploaded_files
@@ -60,18 +64,10 @@ def __init__(
 
         self.agent = None
         self.agent_type = agent_type
-        self.user_tools = tools
-        self.tools_llm = _make_llm(tools_model, temp, verbose)
         self.top_k_tools = top_k_tools
         self.use_human_tool = use_human_tool
-
-        self.llm = ChatOpenAI(
-            temperature=temp,
-            model=model,
-            client=None,
-            streaming=True,
-            callbacks=[StreamingStdOutCallbackHandler()],
-        )
+        self.user_tools = tools
+        self.verbose = verbose
 
     def _initialize_tools_and_agent(self, user_input=None):
         """Retrieve tools and initialize the agent."""
@@ -80,9 +76,10 @@ def _initialize_tools_and_agent(self, user_input=None):
         else:
             if self.top_k_tools != "all" and user_input is not None:
                 # retrieve only tools relevant to user input
-                self.tools = get_tools(
+                self.tools = get_relevant_tools(
                     query=user_input,
                     llm=self.tools_llm,
+                    top_k_tools=self.top_k_tools,
                     human=self.use_human_tool,
                 )
             else:
@@ -97,29 +94,38 @@ def _initialize_tools_and_agent(self, user_input=None):
                 self.llm,
                 self.tools,
             ),
+            verbose=self.verbose,
             handle_parsing_errors=True,
         )
 
     def run(self, user_input, callbacks=None):
         run_memory = self.memory.run_id_mem if self.use_memory else None
-        self.prompt = make_prompt(
-            user_input, self.agent_type, model="gpt-3.5-turbo", run_memory=run_memory
-        )
+        if self.agent_type == "Structured":
+            self.prompt = structured_prompt.format(input=user_input, context=run_memory)
+        elif self.agent_type == "OpenAIFunctionsAgent":
+            self.prompt = openaifxn_prompt.format(input=user_input, context=run_memory)
         self.agent = self._initialize_tools_and_agent(user_input)
-        model_output = self.agent.run(self.prompt, callbacks=callbacks)
+        model_output = self.agent.invoke(self.prompt, callbacks=callbacks)
         if self.use_memory:
             self.memory.generate_agent_summary(model_output)
             print("Your run id is: ", self.run_id)
         return model_output, self.run_id
 
     def iter(self, user_input, include_run_info=True):
+        run_memory = self.memory.run_id_mem if self.use_memory else None
+
         if self.agent is None:
-            self.prompt = make_prompt(
-                user_input, self.agent_type, model="gpt-3.5-turbo"
-            )
-            self.agent = self._initialize_tools_and_agent(user_input)
-        for step in self.agent.iter(self.prompt, include_run_info=include_run_info):
-            yield step
+            if self.agent_type == "Structured":
+                self.prompt = structured_prompt.format(
+                    input=user_input, context=run_memory
+                )
+            elif self.agent_type == "OpenAIFunctionsAgent":
+                self.prompt = openaifxn_prompt.format(
+                    input=user_input, context=run_memory
+                )
+                self.agent = self._initialize_tools_and_agent(user_input)
+            for step in self.agent.iter(self.prompt, include_run_info=include_run_info):
+                yield step
 
     def force_clear_mem(self, all=False) -> str:
         if all:

diff --git a/mdagent/agent/memory.py b/mdagent/agent/memory.py
@@ -3,10 +3,8 @@
 import random
 import string
 
-from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from langchain.chains import LLMChain
-from langchain.chat_models import ChatOpenAI
 from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
 
 from mdagent.utils import PathRegistry
 
@@ -32,8 +30,7 @@ class MemoryManager:
     def __init__(
         self,
         path_registry: PathRegistry,
-        model="gpt-3.5-turbo",
-        temp=0.1,
+        llm,
         run_id="",
     ):
         self.path_registry = path_registry
@@ -46,14 +43,7 @@ def __init__(
         else:
             pull_mem = True
 
-        llm = ChatOpenAI(
-            temperature=temp,
-            model=model,
-            client=None,
-            streaming=True,
-            callbacks=[StreamingStdOutCallbackHandler()],
-        )
-        self.llm_agent_trace = LLMChain(llm=llm, prompt=agent_summary_template)
+        self.llm_agent_trace = agent_summary_template | llm | StrOutputParser()
 
         self._make_all_dirs()
         if pull_mem:
@@ -138,7 +128,7 @@ def generate_agent_summary(self, agent_trace):
         Returns:
         - None
         """
-        llm_out = self.llm_agent_trace({"agent_trace": agent_trace})["text"]
+        llm_out = self.llm_agent_trace.invoke({"agent_trace": agent_trace})
         key_str = f"{self.run_id}.{self.get_summary_number()}"
         run_summary = {key_str: llm_out}
         self._write_to_json(run_summary, self.agent_trace_summary)