diff --git a/mdagent/agent/agent.py b/mdagent/agent/agent.py index b15052ce..3d9864fa 100644 --- a/mdagent/agent/agent.py +++ b/mdagent/agent/agent.py @@ -78,6 +78,7 @@ def _initialize_tools_and_agent(self, user_input=None): self.tools = get_tools( query=user_input, llm=self.tools_llm, + top_k_tools=self.top_k_tools, human=self.use_human_tool, ) else: @@ -103,6 +104,7 @@ def run(self, user_input, callbacks=None): elif self.agent_type == "OpenAIFunctionsAgent": self.prompt = openaifxn_prompt.format(input=user_input, context=run_memory) self.agent = self._initialize_tools_and_agent(user_input) + print("Num of tokens:", self.llm.get_num_tokens(self.prompt)) model_output = self.agent.invoke(self.prompt, callbacks=callbacks) if self.use_memory: self.memory.generate_agent_summary(model_output) diff --git a/mdagent/agent/memory.py b/mdagent/agent/memory.py index e12988b3..f450e47c 100644 --- a/mdagent/agent/memory.py +++ b/mdagent/agent/memory.py @@ -128,7 +128,6 @@ def generate_agent_summary(self, agent_trace): Returns: - None """ - print(agent_trace) llm_out = self.llm_agent_trace.invoke({"agent_trace": agent_trace}) key_str = f"{self.run_id}.{self.get_summary_number()}" run_summary = {key_str: llm_out} diff --git a/mdagent/utils/makellm.py b/mdagent/utils/makellm.py index 832042ac..e884d58f 100644 --- a/mdagent/utils/makellm.py +++ b/mdagent/utils/makellm.py @@ -1,6 +1,5 @@ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain_openai import ChatOpenAI -from langchain_together import ChatTogether def _make_llm(model, temp, verbose): @@ -12,14 +11,25 @@ def _make_llm(model, temp, verbose): streaming=True if verbose else False, callbacks=[StreamingStdOutCallbackHandler()] if verbose else None, ) - elif model.startswith("Meta-Llama"): - llm = ChatTogether( + elif model.startswith("llama"): + from langchain_fireworks import ChatFireworks + + llm = ChatFireworks( temperature=temp, - model=f"meta-llama/{model}", + model_name=f"accounts/fireworks/models/{model}", request_timeout=1000, streaming=True if verbose else False, callbacks=[StreamingStdOutCallbackHandler()] if verbose else None, ) + # elif model.startswith("Meta-Llama"): + # from langchain_together import ChatTogether + # llm = ChatTogether( + # temperature=temp, + # model=f"meta-llama/{model}", + # request_timeout=1000, + # streaming=True if verbose else False, + # callbacks=[StreamingStdOutCallbackHandler()] if verbose else None, + # ) else: raise ValueError(f"Invalid or Unsupported model name: {model}") return llm diff --git a/notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_1.ipynb b/notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_1.ipynb new file mode 100644 index 00000000..9dd84640 --- /dev/null +++ b/notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_1.ipynb @@ -0,0 +1,328 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import os\n", + "from mdagent import MDAgent" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 2024-08-05\n", + "time: 16:26:57\n" + ] + } + ], + "source": [ + "#todays date and time\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "prompt1 = \"Simulate pdb 1MBN at two different temperatures: 300K, 400K for 1ns seconds each. Plot RMSD of both over time, and compare the final secondary structures at the end of the simulations.\"\n", + "llm_var = \"llama-v3p1-405b-instruct\"\n", + "tools = \"all\"\n", + "agent = MDAgent(agent_type=\"Structured\", model=llm_var, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LLM: accounts/fireworks/models/llama-v3p1-405b-instruct \n", + "Temperature: 0.1\n" + ] + } + ], + "source": [ + "print(\"LLM: \",agent.llm.model_name,\"\\nTemperature: \",agent.llm.temperature)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Num of tokens: 384\n", + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "Thought: First, I need to download the PDB file for 1MBN.\n", + "Action: PDBFileDownloader\n", + "Action Input: {\"query\": \"1MBN\"}<|eom_id|><|start_header_id|>assistant<|end_header_id|>\n", + "\n", + "\u001b[32;1m\u001b[1;3mThought: First, I need to download the PDB file for 1MBN.\n", + "Action: PDBFileDownloader\n", + "Action Input: {\"query\": \"1MBN\"}<|eom_id|><|start_header_id|>assistant<|end_header_id|>\n", + "\n", + "\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "Here is a summary of the agent's attempts to solve the problem:\n", + "\n", + "**Prompt:** Simulate pdb 1MBN at two different temperatures: 300K, 400K for 1ns seconds each. Plot RMSD of both over time, and compare the final secondary structures at the end of the simulations.\n", + "\n", + "**Step 1:**\n", + "Thought: First, I need to download the PDB file for 1MBN.\n", + "Action: PDBFileDownloader\n", + "Action Input: {\"query\": \"1MBN\"}\n", + "\n", + "The agent has not yet completed the task, so there are no additional steps or a final solution to report. The agent's next step will likely involve using the downloaded PDB file to set up and run the molecular dynamics simulations at the two specified temperatures.Your run id is: XCYLSITB\n" + ] + } + ], + "source": [ + "result = agent.run(prompt1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Thought: First, I need to download the PDB file for 1MBN.\\nAction: PDBFileDownloader\\nAction Input: {\"query\": \"1MBN\"}<|eom_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output, run_id = result\n", + "output['output']" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "384\n", + "66\n" + ] + } + ], + "source": [ + "print(agent.llm.get_num_tokens(output['input']))\n", + "print(agent.llm.get_num_tokens(output['output']))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0.2.12'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import langchain\n", + "langchain.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'error': {'message': 'Input validation error: `inputs` tokens + `max_new_tokens` must be <= 4097. Given: 7338 `inputs` tokens and 4096 `max_new_tokens`',\n", + " 'type': 'invalid_request_error',\n", + " 'param': 'max_tokens',\n", + " 'code': None}}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{'error': \n", + " {\n", + " 'message': 'Input validation error: `inputs` tokens + `max_new_tokens` must be <= 4097. Given: 7338 `inputs` tokens and 4096 `max_new_tokens`', \n", + " 'type': 'invalid_request_error', \n", + " 'param': 'max_tokens', \n", + " 'code': None\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 2024-08-05\n", + "time: 16:27:07\n" + ] + } + ], + "source": [ + "#print final date and time\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No names found. The JSON file is empty or does not contain name mappings.\n" + ] + } + ], + "source": [ + "registry = agent.path_registry\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Cannot embed the '' image format", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[12], line 9\u001b[0m\n\u001b[1;32m 5\u001b[0m path1 \u001b[38;5;241m=\u001b[39m registry\u001b[38;5;241m.\u001b[39mget_mapped_path(rmsd1ID)\n\u001b[1;32m 6\u001b[0m path2 \u001b[38;5;241m=\u001b[39m registry\u001b[38;5;241m.\u001b[39mget_mapped_path(rmsd2ID)\n\u001b[0;32m----> 9\u001b[0m Image(filename\u001b[38;5;241m=\u001b[39mpath1)\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/IPython/core/display.py:961\u001b[0m, in \u001b[0;36mImage.__init__\u001b[0;34m(self, data, url, filename, format, embed, width, height, retina, unconfined, metadata, alt)\u001b[0m\n\u001b[1;32m 958\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membed \u001b[38;5;241m=\u001b[39m embed \u001b[38;5;28;01mif\u001b[39;00m embed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m (url \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 960\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membed \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mformat \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ACCEPTABLE_EMBEDDINGS:\n\u001b[0;32m--> 961\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot embed the \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m image format\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mformat))\n\u001b[1;32m 962\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membed:\n\u001b[1;32m 963\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mimetype \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_MIMETYPES\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mformat)\n", + "\u001b[0;31mValueError\u001b[0m: Cannot embed the '' image format" + ] + } + ], + "source": [ + "#plotting rmsd of both simulations\n", + "from IPython.display import Image\n", + "rmsd1ID = 'fig0_192900'\n", + "rmsd2ID = 'fig0_192903'\n", + "path1 = registry.get_mapped_path(rmsd1ID)\n", + "path2 = registry.get_mapped_path(rmsd2ID)\n", + "\n", + "\n", + "Image(filename=path1)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Image(filename=path2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Experiment Result:\n", + "### Completed without Exception or TimeOut Errors ❌\n", + "### Attempted all necessary steps ✅\n", + "### Logic make sense ✅\n", + "### Correct Answer ❌\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Agent did not realize that it finished" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_2.ipynb b/notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_2.ipynb new file mode 100644 index 00000000..eb096efb --- /dev/null +++ b/notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_2.ipynb @@ -0,0 +1,222 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import os\n", + "from mdagent import MDAgent\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 2024-08-05\n", + "time: 16:33:15\n", + "LLM: accounts/fireworks/models/llama-v3p1-405b-instruct \n", + "Temperature: 0.1\n" + ] + } + ], + "source": [ + "prompt2 = \"Download the pdb file for PDB ID 1LYZ.\"\n", + "llm_var = \"llama-v3p1-405b-instruct\"\n", + "tools = \"all\"\n", + "agent = MDAgent(agent_type=\"Structured\", model=llm_var, top_k_tools=tools)\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)\n", + "print(\"LLM: \",agent.llm.model_name,\"\\nTemperature: \",agent.llm.temperature)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Num of tokens: 351\n", + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "Thought: I need to download the PDB file for the given PDB ID 1LYZ.\n", + "Action: PDBFileDownloader\n", + "Action Input: {\"query\": \"1LYZ\"}\u001b[32;1m\u001b[1;3mThought: I need to download the PDB file for the given PDB ID 1LYZ.\n", + "Action: PDBFileDownloader\n", + "Action Input: {\"query\": \"1LYZ\"}\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "Here is a summary of the agent's attempts to solve the problem:\n", + "\n", + "**Prompt:** Download the pdb file for PDB ID 1LYZ.\n", + "\n", + "**Step 1:**\n", + "* **Thought:** The agent recognizes the need to download the PDB file for the given PDB ID 1LYZ.\n", + "* **Action:** The agent decides to use the \"PDBFileDownloader\" tool.\n", + "* **Action Input:** The agent provides the input string {\"query\": \"1LYZ\"} to the PDBFileDownloader tool.\n", + "\n", + "**Final Solution:** Not yet available, as this is the first step in the process. The agent has initiated the download of the PDB file for PDB ID 1LYZ using the PDBFileDownloader tool. Further steps may be required to complete the task.Your run id is: 8RUM0LO5\n" + ] + }, + { + "data": { + "text/plain": [ + "({'input': '\\n You are an expert molecular dynamics scientist and\\n your task is to respond to the question or\\n solve the problem to the best of your ability using\\n the provided tools.\\n\\n You can only respond with a single complete\\n Thought, Action, Action Input\\' format\\n OR a single \\'Final Answer\\' format.\\n\\n Complete format:\\n Thought: (reflect on your progress and decide what \" \"to do next)\\n Action: (the action name, should be the name of a tool)\\n Action Input: (the input string to the action)\\n\\n OR\\n\\n Final Answer: (the final response to the original input\\n question, when all steps are complete)\\n\\n You are required to use the tools provided,\\n using the most specific tool\\n available for each action.\\n Your final answer should contain all information\\n necessary to answer the question and subquestions.\\n Before you finish, reflect on your progress and make\\n sure you have addressed the question in its entirety.\\n\\n If you are asked to continue\\n or reference previous runs,\\n the context will be provided to you.\\n If context is provided, you should assume\\n you are continuing a chat.\\n\\n Here is the input:\\n Previous Context: None\\n Question: Download the pdb file for PDB ID 1LYZ. ',\n", + " 'output': 'Thought: I need to download the PDB file for the given PDB ID 1LYZ.\\nAction: PDBFileDownloader\\nAction Input: {\"query\": \"1LYZ\"}'},\n", + " '8RUM0LO5')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(prompt2)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 2024-08-05\n", + "time: 16:33:24\n" + ] + } + ], + "source": [ + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent.path_registry" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No names found. The JSON file is empty or does not contain name mappings.\n" + ] + } + ], + "source": [ + "print(registry.list_path_names_and_descriptions())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ddb167e43b204cd29e8d97df3af2600c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "ename": "ValueError", + "evalue": "you must provide file extension if using file-like object or text content", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnglview\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnv\u001b[39;00m\n\u001b[1;32m 3\u001b[0m path \u001b[38;5;241m=\u001b[39m registry\u001b[38;5;241m.\u001b[39mget_mapped_path(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1LYZ_160238\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m view \u001b[38;5;241m=\u001b[39m nv\u001b[38;5;241m.\u001b[39mshow_file(path)\n\u001b[1;32m 5\u001b[0m view\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/nglview/show.py:246\u001b[0m, in \u001b[0;36mshow_file\u001b[0;34m(path, **kwargs)\u001b[0m\n\u001b[1;32m 244\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 245\u001b[0m view \u001b[38;5;241m=\u001b[39m NGLWidget()\n\u001b[0;32m--> 246\u001b[0m view\u001b[38;5;241m.\u001b[39madd_component(path, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 247\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m view\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/nglview/widget.py:1216\u001b[0m, in \u001b[0;36mNGLWidget.add_component\u001b[0;34m(self, filename, **kwargs)\u001b[0m\n\u001b[1;32m 1213\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m package_name \u001b[38;5;129;01min\u001b[39;00m BACKENDS:\n\u001b[1;32m 1214\u001b[0m filename \u001b[38;5;241m=\u001b[39m BACKENDS[package_name](filename)\n\u001b[0;32m-> 1216\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_load_data(filename, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1217\u001b[0m \u001b[38;5;66;03m# assign an ID\u001b[39;00m\n\u001b[1;32m 1218\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ngl_component_ids\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mstr\u001b[39m(uuid\u001b[38;5;241m.\u001b[39muuid4()))\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/nglview/widget.py:1254\u001b[0m, in \u001b[0;36mNGLWidget._load_data\u001b[0;34m(self, obj, **kwargs)\u001b[0m\n\u001b[1;32m 1251\u001b[0m blob \u001b[38;5;241m=\u001b[39m fh\u001b[38;5;241m.\u001b[39mread(force_buffer\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1252\u001b[0m passing_buffer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m-> 1254\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fh\u001b[38;5;241m.\u001b[39mext \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m passing_buffer:\n\u001b[1;32m 1255\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmust provide extension\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 1257\u001b[0m kwargs2[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mext\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m fh\u001b[38;5;241m.\u001b[39mext\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/nglview/utils/py_utils.py:242\u001b[0m, in \u001b[0;36mFileManager.ext\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msrc, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mread\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_filename\n\u001b[1;32m 241\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_url):\n\u001b[0;32m--> 242\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 243\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou must provide file extension if using file-like object or text content\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 244\u001b[0m )\n\u001b[1;32m 245\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_compressed:\n\u001b[1;32m 246\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msrc\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m]\n", + "\u001b[0;31mValueError\u001b[0m: you must provide file extension if using file-like object or text content" + ] + } + ], + "source": [ + "#show the image from the path of the registry and the ID \n", + "import nglview as nv\n", + "path = registry.get_mapped_path(\"1LYZ_160238\")\n", + "view = nv.show_file(path)\n", + "view" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Experiment Result:\n", + "### Completed without Exception or TimeOut Errors ✅\n", + "### Attempted all necessary steps ✅\n", + "### Logic make sense ✅\n", + "### Correct Answer ✅\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_3.ipynb b/notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_3.ipynb new file mode 100644 index 00000000..a3287f25 --- /dev/null +++ b/notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_3.ipynb @@ -0,0 +1,211 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import os\n", + "from mdagent import MDAgent\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 2024-08-05\n", + "time: 16:45:33\n", + "LLM: accounts/fireworks/models/llama-v3p1-405b-instruct \n", + "Temperature: 0.1\n" + ] + } + ], + "source": [ + "prompt3 = \"Download the PDB file for PDB ID 1GZX. Then, analyze the secondary structure of the protein and tell me how many chains, sheets, etc. there are.\"\n", + "llm_var = \"llama-v3p1-405b-instruct\"\n", + "tools = \"all\"\n", + "agent = MDAgent(agent_type=\"Structured\", model=llm_var, top_k_tools=tools)\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)\n", + "print(\"LLM: \",agent.llm.model_name,\"\\nTemperature: \",agent.llm.temperature)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Num of tokens: 374\n", + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "Thought: First, I need to download the PDB file for PDB ID 1GZX. Then, I can analyze the secondary structure of the protein.\n", + "\n", + "Action: PDBFileDownloader\n", + "Action Input: {\"query\": \"1GZX\"}\n", + "\n", + "\u001b[32;1m\u001b[1;3mThought: First, I need to download the PDB file for PDB ID 1GZX. Then, I can analyze the secondary structure of the protein.\n", + "\n", + "Action: PDBFileDownloader\n", + "Action Input: {\"query\": \"1GZX\"}\n", + "\n", + "\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "Here is a summary of the agent's attempts to solve the problem:\n", + "\n", + "**Prompt:** Download the PDB file for PDB ID 1GZX. Then, analyze the secondary structure of the protein and tell me how many chains, sheets, etc. there are.\n", + "\n", + "**Step 1:**\n", + "**Thought:** First, I need to download the PDB file for PDB ID 1GZX. Then, I can analyze the secondary structure of the protein.\n", + "**Action:** PDBFileDownloader\n", + "**Action Input:** {\"query\": \"1GZX\"}\n", + "\n", + "The agent has not yet completed the task, as it has only taken the first step to download the PDB file. The analysis of the secondary structure of the protein has not been performed. There is no final solution yet.Your run id is: P5DY6NVM\n" + ] + }, + { + "data": { + "text/plain": [ + "({'input': '\\n You are an expert molecular dynamics scientist and\\n your task is to respond to the question or\\n solve the problem to the best of your ability using\\n the provided tools.\\n\\n You can only respond with a single complete\\n Thought, Action, Action Input\\' format\\n OR a single \\'Final Answer\\' format.\\n\\n Complete format:\\n Thought: (reflect on your progress and decide what \" \"to do next)\\n Action: (the action name, should be the name of a tool)\\n Action Input: (the input string to the action)\\n\\n OR\\n\\n Final Answer: (the final response to the original input\\n question, when all steps are complete)\\n\\n You are required to use the tools provided,\\n using the most specific tool\\n available for each action.\\n Your final answer should contain all information\\n necessary to answer the question and subquestions.\\n Before you finish, reflect on your progress and make\\n sure you have addressed the question in its entirety.\\n\\n If you are asked to continue\\n or reference previous runs,\\n the context will be provided to you.\\n If context is provided, you should assume\\n you are continuing a chat.\\n\\n Here is the input:\\n Previous Context: None\\n Question: Download the PDB file for PDB ID 1GZX. Then, analyze the secondary structure of the protein and tell me how many chains, sheets, etc. there are. ',\n", + " 'output': 'Thought: First, I need to download the PDB file for PDB ID 1GZX. Then, I can analyze the secondary structure of the protein.\\n\\nAction: PDBFileDownloader\\nAction Input: {\"query\": \"1GZX\"}\\n\\n'},\n", + " 'P5DY6NVM')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(prompt3)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 2024-08-05\n", + "time: 16:45:45\n" + ] + } + ], + "source": [ + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No names found. The JSON file is empty or does not contain name mappings.\n" + ] + } + ], + "source": [ + "registry = agent.path_registry\n", + "print(registry.list_path_names_and_descriptions())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "ename": "OSError", + "evalue": "The topology is loaded by filename extension, and the detected \".\" format is not supported. Supported topology formats include \".pdb\", \".pdb.gz\", \".h5\", \".lh5\", \".prmtop\", \".parm7\", \".prm7\", \".psf\", \".mol2\", \".hoomdxml\", \".gro\", \".arc\", \".hdf5\" and \".gsd\".", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[6], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmdtraj\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mmd\u001b[39;00m\n\u001b[1;32m 2\u001b[0m path \u001b[38;5;241m=\u001b[39m registry\u001b[38;5;241m.\u001b[39mget_mapped_path(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1GZX_161339\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m traj \u001b[38;5;241m=\u001b[39m md\u001b[38;5;241m.\u001b[39mload(path)\n\u001b[1;32m 4\u001b[0m top \u001b[38;5;241m=\u001b[39m traj\u001b[38;5;241m.\u001b[39mtopology\n\u001b[1;32m 5\u001b[0m number_of_chains \u001b[38;5;241m=\u001b[39m top\u001b[38;5;241m.\u001b[39mn_chains\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/mdtraj/core/trajectory.py:396\u001b[0m, in \u001b[0;36mload\u001b[0;34m(filename_or_filenames, discard_overlapping_frames, **kwargs)\u001b[0m\n\u001b[1;32m 394\u001b[0m topkwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstride\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 395\u001b[0m topkwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstart\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m--> 396\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtop\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m _parse_topology(kwargs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtop\u001b[39m\u001b[38;5;124m\"\u001b[39m, filename_or_filenames[\u001b[38;5;241m0\u001b[39m]), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mtopkwargs)\n\u001b[1;32m 398\u001b[0m \u001b[38;5;66;03m#get the right loader\u001b[39;00m\n\u001b[1;32m 399\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 400\u001b[0m \u001b[38;5;66;03m#loader = _LoaderRegistry[extension][0]\u001b[39;00m\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/mdtraj/core/trajectory.py:191\u001b[0m, in \u001b[0;36m_parse_topology\u001b[0;34m(top, **kwargs)\u001b[0m\n\u001b[1;32m 189\u001b[0m topology \u001b[38;5;241m=\u001b[39m load_gsd_topology(top, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 190\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(top, (string_types, os\u001b[38;5;241m.\u001b[39mPathLike)):\n\u001b[0;32m--> 191\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mThe topology is loaded by filename extension, and the \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdetected \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m format is not supported. Supported topology \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 193\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mformats include \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m and \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m (\n\u001b[1;32m 194\u001b[0m ext, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m e \u001b[38;5;28;01mfor\u001b[39;00m e \u001b[38;5;129;01min\u001b[39;00m _TOPOLOGY_EXTS[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]]),\n\u001b[1;32m 195\u001b[0m _TOPOLOGY_EXTS[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]))\n\u001b[1;32m 196\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mA topology is required. You supplied top=\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m \u001b[38;5;28mstr\u001b[39m(top))\n", + "\u001b[0;31mOSError\u001b[0m: The topology is loaded by filename extension, and the detected \".\" format is not supported. Supported topology formats include \".pdb\", \".pdb.gz\", \".h5\", \".lh5\", \".prmtop\", \".parm7\", \".prm7\", \".psf\", \".mol2\", \".hoomdxml\", \".gro\", \".arc\", \".hdf5\" and \".gsd\"." + ] + } + ], + "source": [ + "import mdtraj as md\n", + "path = registry.get_mapped_path(\"1GZX_161339\")\n", + "traj = md.load(path)\n", + "top = traj.topology\n", + "number_of_chains = top.n_chains\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",number_of_chains)\n", + "print(\"Number of sheets: \",len([i for i in secondary_structure[0] if i == 'E']))\n", + "print(\"Number of helices: \",len([i for i in secondary_structure[0] if i == 'H']))\n", + "print(\"Number of coils: \",len([i for i in secondary_structure[0] if i == 'C']))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Experiment Result:\n", + "### Completed without Exception or TimeOut Errors ✅\n", + "### Attempted all necessary steps ✅\n", + "### Logic make sense ✅\n", + "### Correct Answer ✅" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/experiments/experiment_k1/llama-v3p1-70b-instruct/exp_1 copy.ipynb b/notebooks/experiments/experiment_k1/llama-v3p1-70b-instruct/exp_1 copy.ipynb new file mode 100644 index 00000000..d58fa49f --- /dev/null +++ b/notebooks/experiments/experiment_k1/llama-v3p1-70b-instruct/exp_1 copy.ipynb @@ -0,0 +1,420 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import os\n", + "from mdagent import MDAgent" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 2024-08-05\n", + "time: 16:24:23\n" + ] + } + ], + "source": [ + "#todays date and time\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "prompt1 = \"Simulate pdb 1MBN at two different temperatures: 300K, 400K for 1ns seconds each. Plot RMSD of both over time, and compare the final secondary structures at the end of the simulations.\"\n", + "llm_var = \"llama-v3p1-70b-instruct\"\n", + "tools = \"all\"\n", + "agent = MDAgent(agent_type=\"Structured\", model=llm_var, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LLM: accounts/fireworks/models/llama-v3p1-70b-instruct \n", + "Temperature: 0.1\n" + ] + } + ], + "source": [ + "print(\"LLM: \",agent.llm.model_name,\"\\nTemperature: \",agent.llm.temperature)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "4bc037360b8b407688ad283b0411702a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "tokenizer_config.json: 0%| | 0.00/26.0 [00:00 Entering new AgentExecutor chain...\u001b[0m\n", + "Thought: To simulate the protein 1MBN at two different temperatures, I will first need to download the PDB file and then set up and run the simulations. After the simulations are complete, I can calculate the RMSD of both simulations over time and compare the final secondary structures.\n", + "\n", + "Action: SetUpandRunFunction\n", + "Action Input: \n", + "{\n", + " \"pdb_id\": \"1MBN\",\n", + " \"forcefield_files\": [\"amber99sbildn.xml\", \"tip3p.xml\"],\n", + " \"save\": true,\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"PME\",\n", + " \"nonbondedCutoff\": \"1.0 * nanometers\",\n", + " \"ewaldErrorTolerance\": 0.00001,\n", + " \"constraints\": \"HBonds\",\n", + " \"rigidWater\": true,\n", + " \"constraintTolerance\": 0.00001,\n", + " \"solvate\": true\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.002 * picoseconds\",\n", + " \"Pressure\": \"1.0 * bar\"\n", + " },\n", + " \"simulation_params\": {\n", + " \"Ensemble\": \"NPT\",\n", + " \"Number of Steps\": 500000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + "}\n", + "\n", + "\u001b[32;1m\u001b[1;3mThought: To simulate the protein 1MBN at two different temperatures, I will first need to download the PDB file and then set up and run the simulations. After the simulations are complete, I can calculate the RMSD of both simulations over time and compare the final secondary structures.\n", + "\n", + "Action: SetUpandRunFunction\n", + "Action Input: \n", + "{\n", + " \"pdb_id\": \"1MBN\",\n", + " \"forcefield_files\": [\"amber99sbildn.xml\", \"tip3p.xml\"],\n", + " \"save\": true,\n", + " \"system_params\": {\n", + " \"nonbondedMethod\": \"PME\",\n", + " \"nonbondedCutoff\": \"1.0 * nanometers\",\n", + " \"ewaldErrorTolerance\": 0.00001,\n", + " \"constraints\": \"HBonds\",\n", + " \"rigidWater\": true,\n", + " \"constraintTolerance\": 0.00001,\n", + " \"solvate\": true\n", + " },\n", + " \"integrator_params\": {\n", + " \"integrator_type\": \"LangevinMiddle\",\n", + " \"Temperature\": \"300 * kelvin\",\n", + " \"Friction\": \"1.0 / picoseconds\",\n", + " \"Timestep\": \"0.002 * picoseconds\",\n", + " \"Pressure\": \"1.0 * bar\"\n", + " },\n", + " \"simulation_params\": {\n", + " \"Ensemble\": \"NPT\",\n", + " \"Number of Steps\": 500000,\n", + " \"record_interval_steps\": 100,\n", + " \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", + " }\n", + "}\n", + "\n", + "\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "Here is a summary of the agent's attempts to solve the problem:\n", + "\n", + "**Prompt:** The agent was given a task to simulate the protein 1MBN at two different temperatures (300K and 400K) for 1ns seconds each, plot the RMSD of both simulations over time, and compare the final secondary structures at the end of the simulations.\n", + "\n", + "**Step 1:** The agent reflected on the problem and decided to download the PDB file and set up and run the simulations. The agent chose to use the `SetUpandRunFunction` tool to achieve this.\n", + "\n", + "**Action Input:** The agent provided the following input to the `SetUpandRunFunction` tool:\n", + "\n", + "* `pdb_id`: \"1MBN\"\n", + "* `forcefield_files`: [\"amber99sbildn.xml\", \"tip3p.xml\"]\n", + "* `save`: true\n", + "* `system_params`: various parameters for the simulation, including nonbonded method, cutoff, and constraints\n", + "* `integrator_params`: parameters for the integrator, including temperature, friction, and timestep\n", + "* `simulation_params`: parameters for the simulation, including ensemble, number of steps, and record interval\n", + "\n", + "**Note:** The agent only simulated the protein at 300K in this step, but the prompt required simulations at both 300K and 400K.\n", + "\n", + "**Current Status:** The agent has only completed the first step of the simulation at 300K and has not yet simulated the protein at 400K, calculated the RMSD of both simulations, or compared the final secondary structures.\n", + "\n", + "**Next Steps:** The agent should simulate the protein at 400K, calculate the RMSD of both simulations, and compare the final secondary structures to complete the task.Your run id is: HSE53GCT\n" + ] + } + ], + "source": [ + "result = agent.run(prompt1)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Thought: To simulate the protein 1MBN at two different temperatures, I need to first download the PDB file of 1MBN. Then, I will set up and run two separate simulations at 300K and 400K for 1ns each. After the simulations, I will compute the RMSD of both simulations over time and compare the final secondary structures.\\n\\nAction: PDBFileDownloader\\nAction Input: 1MBN'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output, run_id = result\n", + "output['output']" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "312\n", + "91\n" + ] + } + ], + "source": [ + "print(agent.llm.get_num_tokens(output['input']))\n", + "print(agent.llm.get_num_tokens(output['output']))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0.2.12'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import langchain\n", + "langchain.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "{'error': \n", + " {\n", + " 'message': 'Input validation error: `inputs` tokens + `max_new_tokens` must be <= 4097. Given: 7338 `inputs` tokens and 4096 `max_new_tokens`', \n", + " 'type': 'invalid_request_error', \n", + " 'param': 'max_tokens', \n", + " 'code': None\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#print final date and time\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent.path_registry\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#plotting rmsd of both simulations\n", + "from IPython.display import Image\n", + "rmsd1ID = 'fig0_192900'\n", + "rmsd2ID = 'fig0_192903'\n", + "path1 = registry.get_mapped_path(rmsd1ID)\n", + "path2 = registry.get_mapped_path(rmsd2ID)\n", + "\n", + "\n", + "Image(filename=path1)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Image(filename=path2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Experiment Result:\n", + "### Completed without Exception or TimeOut Errors ❌\n", + "### Attempted all necessary steps ✅\n", + "### Logic make sense ✅\n", + "### Correct Answer ❌\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Agent did not realize that it finished" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/experiments/experiment_k1/llama-v3p1-70b-instruct/exp_2 copy.ipynb b/notebooks/experiments/experiment_k1/llama-v3p1-70b-instruct/exp_2 copy.ipynb new file mode 100644 index 00000000..6aa080b9 --- /dev/null +++ b/notebooks/experiments/experiment_k1/llama-v3p1-70b-instruct/exp_2 copy.ipynb @@ -0,0 +1,227 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import os\n", + "from mdagent import MDAgent\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 2024-08-05\n", + "time: 16:41:03\n", + "LLM: accounts/fireworks/models/llama-v3p1-70b-instruct \n", + "Temperature: 0.1\n" + ] + } + ], + "source": [ + "prompt2 = \"Download the pdb file for PDB ID 1LYZ.\"\n", + "llm_var = \"llama-v3p1-70b-instruct\"\n", + "tools = \"all\"\n", + "agent = MDAgent(agent_type=\"Structured\", model=llm_var, top_k_tools=tools)\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)\n", + "print(\"LLM: \",agent.llm.model_name,\"\\nTemperature: \",agent.llm.temperature)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Num of tokens: 351\n", + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "Thought: To download the PDB file for PDB ID 1LYZ, I need to use the PDBFileDownloader tool.\n", + "\n", + "Action: PDBFileDownloader\n", + "Action Input: 1LYZ\u001b[32;1m\u001b[1;3mThought: To download the PDB file for PDB ID 1LYZ, I need to use the PDBFileDownloader tool.\n", + "\n", + "Action: PDBFileDownloader\n", + "Action Input: 1LYZ\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "Here is a summary of the agent's attempts to solve the problem:\n", + "\n", + "**Prompt:** The agent was given the task to download the PDB file for PDB ID 1LYZ.\n", + "\n", + "**Step 1:** The agent reflected on the problem and decided to use the PDBFileDownloader tool to download the PDB file.\n", + "\n", + "**Action:** The agent chose the PDBFileDownloader tool.\n", + "\n", + "**Action Input:** The agent provided the input \"1LYZ\" to the PDBFileDownloader tool.\n", + "\n", + "**Final Solution:** The agent successfully downloaded the PDB file for PDB ID 1LYZ using the PDBFileDownloader tool.\n", + "\n", + "Note that this was a straightforward problem, and the agent was able to solve it in a single step. There were no errors or complications reported in the history.Your run id is: 6TONOT9N\n" + ] + }, + { + "data": { + "text/plain": [ + "({'input': '\\n You are an expert molecular dynamics scientist and\\n your task is to respond to the question or\\n solve the problem to the best of your ability using\\n the provided tools.\\n\\n You can only respond with a single complete\\n Thought, Action, Action Input\\' format\\n OR a single \\'Final Answer\\' format.\\n\\n Complete format:\\n Thought: (reflect on your progress and decide what \" \"to do next)\\n Action: (the action name, should be the name of a tool)\\n Action Input: (the input string to the action)\\n\\n OR\\n\\n Final Answer: (the final response to the original input\\n question, when all steps are complete)\\n\\n You are required to use the tools provided,\\n using the most specific tool\\n available for each action.\\n Your final answer should contain all information\\n necessary to answer the question and subquestions.\\n Before you finish, reflect on your progress and make\\n sure you have addressed the question in its entirety.\\n\\n If you are asked to continue\\n or reference previous runs,\\n the context will be provided to you.\\n If context is provided, you should assume\\n you are continuing a chat.\\n\\n Here is the input:\\n Previous Context: None\\n Question: Download the pdb file for PDB ID 1LYZ. ',\n", + " 'output': 'Thought: To download the PDB file for PDB ID 1LYZ, I need to use the PDBFileDownloader tool.\\n\\nAction: PDBFileDownloader\\nAction Input: 1LYZ'},\n", + " '6TONOT9N')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(prompt2)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 2024-08-05\n", + "time: 16:41:29\n" + ] + } + ], + "source": [ + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent.path_registry" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No names found. The JSON file is empty or does not contain name mappings.\n" + ] + } + ], + "source": [ + "print(registry.list_path_names_and_descriptions())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7341ace3ad0242e698765d6a0f3833ba", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "ename": "ValueError", + "evalue": "you must provide file extension if using file-like object or text content", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnglview\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnv\u001b[39;00m\n\u001b[1;32m 3\u001b[0m path \u001b[38;5;241m=\u001b[39m registry\u001b[38;5;241m.\u001b[39mget_mapped_path(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1LYZ_160238\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m view \u001b[38;5;241m=\u001b[39m nv\u001b[38;5;241m.\u001b[39mshow_file(path)\n\u001b[1;32m 5\u001b[0m view\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/nglview/show.py:246\u001b[0m, in \u001b[0;36mshow_file\u001b[0;34m(path, **kwargs)\u001b[0m\n\u001b[1;32m 244\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 245\u001b[0m view \u001b[38;5;241m=\u001b[39m NGLWidget()\n\u001b[0;32m--> 246\u001b[0m view\u001b[38;5;241m.\u001b[39madd_component(path, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 247\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m view\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/nglview/widget.py:1216\u001b[0m, in \u001b[0;36mNGLWidget.add_component\u001b[0;34m(self, filename, **kwargs)\u001b[0m\n\u001b[1;32m 1213\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m package_name \u001b[38;5;129;01min\u001b[39;00m BACKENDS:\n\u001b[1;32m 1214\u001b[0m filename \u001b[38;5;241m=\u001b[39m BACKENDS[package_name](filename)\n\u001b[0;32m-> 1216\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_load_data(filename, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 1217\u001b[0m \u001b[38;5;66;03m# assign an ID\u001b[39;00m\n\u001b[1;32m 1218\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ngl_component_ids\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mstr\u001b[39m(uuid\u001b[38;5;241m.\u001b[39muuid4()))\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/nglview/widget.py:1254\u001b[0m, in \u001b[0;36mNGLWidget._load_data\u001b[0;34m(self, obj, **kwargs)\u001b[0m\n\u001b[1;32m 1251\u001b[0m blob \u001b[38;5;241m=\u001b[39m fh\u001b[38;5;241m.\u001b[39mread(force_buffer\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1252\u001b[0m passing_buffer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m-> 1254\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fh\u001b[38;5;241m.\u001b[39mext \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m passing_buffer:\n\u001b[1;32m 1255\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmust provide extension\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 1257\u001b[0m kwargs2[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mext\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m fh\u001b[38;5;241m.\u001b[39mext\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/nglview/utils/py_utils.py:242\u001b[0m, in \u001b[0;36mFileManager.ext\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msrc, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mread\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_filename\n\u001b[1;32m 241\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_url):\n\u001b[0;32m--> 242\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 243\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou must provide file extension if using file-like object or text content\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 244\u001b[0m )\n\u001b[1;32m 245\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mis_compressed:\n\u001b[1;32m 246\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msrc\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m]\n", + "\u001b[0;31mValueError\u001b[0m: you must provide file extension if using file-like object or text content" + ] + } + ], + "source": [ + "#show the image from the path of the registry and the ID \n", + "import nglview as nv\n", + "path = registry.get_mapped_path(\"1LYZ_160238\")\n", + "view = nv.show_file(path)\n", + "view" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Experiment Result:\n", + "### Completed without Exception or TimeOut Errors ✅\n", + "### Attempted all necessary steps ✅\n", + "### Logic make sense ✅\n", + "### Correct Answer ✅\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/experiments/experiment_k1/llama-v3p1-70b-instruct/exp_3 copy.ipynb b/notebooks/experiments/experiment_k1/llama-v3p1-70b-instruct/exp_3 copy.ipynb new file mode 100644 index 00000000..2d1fe1ff --- /dev/null +++ b/notebooks/experiments/experiment_k1/llama-v3p1-70b-instruct/exp_3 copy.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import os\n", + "from mdagent import MDAgent\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 2024-08-05\n", + "time: 16:46:39\n", + "LLM: accounts/fireworks/models/llama-v3p1-70b-instruct \n", + "Temperature: 0.1\n" + ] + } + ], + "source": [ + "prompt3 = \"Download the PDB file for PDB ID 1GZX. Then, analyze the secondary structure of the protein and tell me how many chains, sheets, etc. there are.\"\n", + "llm_var = \"llama-v3p1-70b-instruct\"\n", + "tools = \"all\"\n", + "agent = MDAgent(agent_type=\"Structured\", model=llm_var, top_k_tools=tools)\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)\n", + "print(\"LLM: \",agent.llm.model_name,\"\\nTemperature: \",agent.llm.temperature)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Num of tokens: 374\n", + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "Thought: To solve this problem, I need to first download the PDB file for the given PDB ID 1GZX. Then, I can analyze the secondary structure of the protein and provide the required information.\n", + "\n", + "Action: PDBFileDownloader\n", + "Action Input: 1GZX\u001b[32;1m\u001b[1;3mThought: To solve this problem, I need to first download the PDB file for the given PDB ID 1GZX. Then, I can analyze the secondary structure of the protein and provide the required information.\n", + "\n", + "Action: PDBFileDownloader\n", + "Action Input: 1GZX\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "Here is a summary of the agent's attempts to solve the problem:\n", + "\n", + "**Prompt:** The agent was given a task to download the PDB file for PDB ID 1GZX and then analyze the secondary structure of the protein to provide information on the number of chains, sheets, etc.\n", + "\n", + "**Step 1:** The agent reflected on the problem and decided to first download the PDB file for the given PDB ID 1GZX. To do this, the agent chose the `PDBFileDownloader` tool and provided the input `1GZX`.\n", + "\n", + "**Current Status:** The agent has completed the first step of downloading the PDB file, but has not yet analyzed the secondary structure of the protein.\n", + "\n", + "**Next Steps:** The agent will likely use a tool to analyze the secondary structure of the protein, such as a tool that can parse the PDB file and extract information on chains, sheets, etc.\n", + "\n", + "Note: The final solution has not been reached yet, as the agent has only completed the first step of downloading the PDB file.Your run id is: WAX4OAJ3\n" + ] + }, + { + "data": { + "text/plain": [ + "({'input': '\\n You are an expert molecular dynamics scientist and\\n your task is to respond to the question or\\n solve the problem to the best of your ability using\\n the provided tools.\\n\\n You can only respond with a single complete\\n Thought, Action, Action Input\\' format\\n OR a single \\'Final Answer\\' format.\\n\\n Complete format:\\n Thought: (reflect on your progress and decide what \" \"to do next)\\n Action: (the action name, should be the name of a tool)\\n Action Input: (the input string to the action)\\n\\n OR\\n\\n Final Answer: (the final response to the original input\\n question, when all steps are complete)\\n\\n You are required to use the tools provided,\\n using the most specific tool\\n available for each action.\\n Your final answer should contain all information\\n necessary to answer the question and subquestions.\\n Before you finish, reflect on your progress and make\\n sure you have addressed the question in its entirety.\\n\\n If you are asked to continue\\n or reference previous runs,\\n the context will be provided to you.\\n If context is provided, you should assume\\n you are continuing a chat.\\n\\n Here is the input:\\n Previous Context: None\\n Question: Download the PDB file for PDB ID 1GZX. Then, analyze the secondary structure of the protein and tell me how many chains, sheets, etc. there are. ',\n", + " 'output': 'Thought: To solve this problem, I need to first download the PDB file for the given PDB ID 1GZX. Then, I can analyze the secondary structure of the protein and provide the required information.\\n\\nAction: PDBFileDownloader\\nAction Input: 1GZX'},\n", + " 'WAX4OAJ3')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(prompt3)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date and time: 2024-08-05\n", + "time: 16:46:53\n" + ] + } + ], + "source": [ + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date and time:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No names found. The JSON file is empty or does not contain name mappings.\n" + ] + } + ], + "source": [ + "registry = agent.path_registry\n", + "print(registry.list_path_names_and_descriptions())" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "ename": "OSError", + "evalue": "The topology is loaded by filename extension, and the detected \".\" format is not supported. Supported topology formats include \".pdb\", \".pdb.gz\", \".h5\", \".lh5\", \".prmtop\", \".parm7\", \".prm7\", \".psf\", \".mol2\", \".hoomdxml\", \".gro\", \".arc\", \".hdf5\" and \".gsd\".", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[12], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmdtraj\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mmd\u001b[39;00m\n\u001b[1;32m 2\u001b[0m path \u001b[38;5;241m=\u001b[39m registry\u001b[38;5;241m.\u001b[39mget_mapped_path(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m1GZX_161339\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m traj \u001b[38;5;241m=\u001b[39m md\u001b[38;5;241m.\u001b[39mload(path)\n\u001b[1;32m 4\u001b[0m top \u001b[38;5;241m=\u001b[39m traj\u001b[38;5;241m.\u001b[39mtopology\n\u001b[1;32m 5\u001b[0m number_of_chains \u001b[38;5;241m=\u001b[39m top\u001b[38;5;241m.\u001b[39mn_chains\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/mdtraj/core/trajectory.py:396\u001b[0m, in \u001b[0;36mload\u001b[0;34m(filename_or_filenames, discard_overlapping_frames, **kwargs)\u001b[0m\n\u001b[1;32m 394\u001b[0m topkwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstride\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 395\u001b[0m topkwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstart\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m--> 396\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtop\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m _parse_topology(kwargs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtop\u001b[39m\u001b[38;5;124m\"\u001b[39m, filename_or_filenames[\u001b[38;5;241m0\u001b[39m]), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mtopkwargs)\n\u001b[1;32m 398\u001b[0m \u001b[38;5;66;03m#get the right loader\u001b[39;00m\n\u001b[1;32m 399\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 400\u001b[0m \u001b[38;5;66;03m#loader = _LoaderRegistry[extension][0]\u001b[39;00m\n", + "File \u001b[0;32m/opt/anaconda3/envs/llama-mdagent/lib/python3.11/site-packages/mdtraj/core/trajectory.py:191\u001b[0m, in \u001b[0;36m_parse_topology\u001b[0;34m(top, **kwargs)\u001b[0m\n\u001b[1;32m 189\u001b[0m topology \u001b[38;5;241m=\u001b[39m load_gsd_topology(top, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 190\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(top, (string_types, os\u001b[38;5;241m.\u001b[39mPathLike)):\n\u001b[0;32m--> 191\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mThe topology is loaded by filename extension, and the \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdetected \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m format is not supported. Supported topology \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 193\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mformats include \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m and \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m (\n\u001b[1;32m 194\u001b[0m ext, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m e \u001b[38;5;28;01mfor\u001b[39;00m e \u001b[38;5;129;01min\u001b[39;00m _TOPOLOGY_EXTS[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]]),\n\u001b[1;32m 195\u001b[0m _TOPOLOGY_EXTS[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]))\n\u001b[1;32m 196\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mA topology is required. You supplied top=\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m \u001b[38;5;241m%\u001b[39m \u001b[38;5;28mstr\u001b[39m(top))\n", + "\u001b[0;31mOSError\u001b[0m: The topology is loaded by filename extension, and the detected \".\" format is not supported. Supported topology formats include \".pdb\", \".pdb.gz\", \".h5\", \".lh5\", \".prmtop\", \".parm7\", \".prm7\", \".psf\", \".mol2\", \".hoomdxml\", \".gro\", \".arc\", \".hdf5\" and \".gsd\"." + ] + } + ], + "source": [ + "import mdtraj as md\n", + "path = registry.get_mapped_path(\"1GZX_161339\")\n", + "traj = md.load(path)\n", + "top = traj.topology\n", + "number_of_chains = top.n_chains\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",number_of_chains)\n", + "print(\"Number of sheets: \",len([i for i in secondary_structure[0] if i == 'E']))\n", + "print(\"Number of helices: \",len([i for i in secondary_structure[0] if i == 'H']))\n", + "print(\"Number of coils: \",len([i for i in secondary_structure[0] if i == 'C']))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Experiment Result:\n", + "### Completed without Exception or TimeOut Errors ✅\n", + "### Attempted all necessary steps ✅\n", + "### Logic make sense ✅\n", + "### Correct Answer ✅" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/setup.py b/setup.py index 0997dcdb..0110d064 100644 --- a/setup.py +++ b/setup.py @@ -22,8 +22,9 @@ "langchain==0.2.12", "langchain-chroma", "langchain-community", + "langchain-fireworks==0.1.7", "langchain-openai==0.1.19", - "langchain-together==0.1.4", + # "langchain-together==0.1.4", "matplotlib", "nbformat", "openai",