From 9ab3bfdc1243e3d2c5224f1f9a1fae1bd1cf43f8 Mon Sep 17 00:00:00 2001 From: Quinny Campbell Date: Fri, 18 Oct 2024 01:00:18 -0400 Subject: [PATCH] new notebooks for robustness --- .../gpt-4o-2024-05-13/prompt1.ipynb | 101 ----- .../gpt-4o-2024-08-06/prompt1.ipynb | 283 ++++++++++++ .../gpt-4o-2024-08-06/prompt10.ipynb | 404 ++++++++++++++++++ .../gpt-4o-2024-08-06/prompt2.ipynb | 368 ++++++++++++++++ .../gpt-4o-2024-08-06/prompt3.ipynb | 228 ++++++++++ .../gpt-4o-2024-08-06/prompt4.ipynb | 254 +++++++++++ .../gpt-4o-2024-08-06/prompt5.ipynb | 262 ++++++++++++ .../gpt-4o-2024-08-06/prompt6.ipynb | 302 +++++++++++++ .../gpt-4o-2024-08-06/prompt7.ipynb | 302 +++++++++++++ .../gpt-4o-2024-08-06/prompt8.ipynb | 356 +++++++++++++++ .../gpt-4o-2024-08-06/prompt9.ipynb | 356 +++++++++++++++ .../gpt-4o-2024-08-06}/prompt2.ipynb | 0 .../gpt-4o-2024-08-06}/prompt3.ipynb | 0 .../gpt-4o-2024-08-06}/prompt4.ipynb | 0 .../gpt-4o-2024-08-06}/prompt5.ipynb | 0 .../gpt-4o-2024-08-06}/prompt6.ipynb | 0 .../gpt-4o-2024-08-06}/prompt7.ipynb | 0 .../gpt-4o-2024-08-06}/prompt8.ipynb | 0 .../llama-v3p1-405b-instruct/prompt1.ipynb | 0 .../llama-v3p1-405b-instruct/prompt2.ipynb | 0 .../llama-v3p1-405b-instruct/prompt3.ipynb | 0 .../llama-v3p1-405b-instruct/prompt4.ipynb | 0 .../llama-v3p1-405b-instruct/prompt5.ipynb | 0 .../llama-v3p1-405b-instruct/prompt6.ipynb | 0 .../llama-v3p1-405b-instruct/prompt7.ipynb | 0 .../llama-v3p1-405b-instruct/prompt8.ipynb | 0 .../llama-v3p1-70b-instruct/prompt1.ipynb | 0 .../llama-v3p1-70b-instruct/prompt2.ipynb | 0 .../llama-v3p1-70b-instruct/prompt3.ipynb | 0 .../llama-v3p1-70b-instruct/prompt4.ipynb | 0 .../llama-v3p1-70b-instruct/prompt5.ipynb | 0 .../llama-v3p1-70b-instruct/prompt6.ipynb | 0 .../llama-v3p1-70b-instruct/prompt7.ipynb | 0 .../llama-v3p1-70b-instruct/prompt8.ipynb | 0 34 files changed, 3115 insertions(+), 101 deletions(-) delete mode 100644 notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt1.ipynb create mode 100644 notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt1.ipynb create mode 100644 notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt10.ipynb create mode 100644 notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt2.ipynb create mode 100644 notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt3.ipynb create mode 100644 notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt4.ipynb create mode 100644 notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt5.ipynb create mode 100644 notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt6.ipynb create mode 100644 notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt7.ipynb create mode 100644 notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt8.ipynb create mode 100644 notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt9.ipynb rename notebooks/experiments/Robustness/{gpt-4o-2024-05-13 => outdated/gpt-4o-2024-08-06}/prompt2.ipynb (100%) rename notebooks/experiments/Robustness/{gpt-4o-2024-05-13 => outdated/gpt-4o-2024-08-06}/prompt3.ipynb (100%) rename notebooks/experiments/Robustness/{gpt-4o-2024-05-13 => outdated/gpt-4o-2024-08-06}/prompt4.ipynb (100%) rename notebooks/experiments/Robustness/{gpt-4o-2024-05-13 => outdated/gpt-4o-2024-08-06}/prompt5.ipynb (100%) rename notebooks/experiments/Robustness/{gpt-4o-2024-05-13 => outdated/gpt-4o-2024-08-06}/prompt6.ipynb (100%) rename notebooks/experiments/Robustness/{gpt-4o-2024-05-13 => outdated/gpt-4o-2024-08-06}/prompt7.ipynb (100%) rename notebooks/experiments/Robustness/{gpt-4o-2024-05-13 => outdated/gpt-4o-2024-08-06}/prompt8.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-405b-instruct/prompt1.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-405b-instruct/prompt2.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-405b-instruct/prompt3.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-405b-instruct/prompt4.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-405b-instruct/prompt5.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-405b-instruct/prompt6.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-405b-instruct/prompt7.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-405b-instruct/prompt8.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-70b-instruct/prompt1.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-70b-instruct/prompt2.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-70b-instruct/prompt3.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-70b-instruct/prompt4.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-70b-instruct/prompt5.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-70b-instruct/prompt6.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-70b-instruct/prompt7.ipynb (100%) rename notebooks/experiments/Robustness/{ => outdated}/llama-v3p1-70b-instruct/prompt8.ipynb (100%) diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt1.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt1.ipynb deleted file mode 100644 index 049e8243..00000000 --- a/notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt1.ipynb +++ /dev/null @@ -1,101 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from mdagent import MDAgent\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "descriptive_prompt_1 = \"I need you to help me with MD tasks: 1.- Download the PDB file 1LYZ\"\n", - "non_descriptive_prompt_1 = \"Download the PDB file for 1LYZ\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llm_model = \"gpt-4o-2024-05-13\"\n", - "tools = \"all\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)\n", - "agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "agent_1.run(descriptive_prompt_1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "agent_2.run(non_descriptive_prompt_1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from mdagent import MDAgent\n", - "agent = MDAgent(agent_type=\"Structured\", ckpt_dir=\"ckpt_178\")\n", - "registry = agent.path_registry\n", - "print(registry.list_path_names_and_descriptions())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "mdagent2", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.9" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt1.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt1.ipynb new file mode 100644 index 00000000..77d251a8 --- /dev/null +++ b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt1.ipynb @@ -0,0 +1,283 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from mdagent import MDAgent\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "descriptive_prompt_1 = \"Complete all of the following tasks: 1. Download the PDB file 1LYZ.\"\n", + "non_descriptive_prompt_1 = \"Download the PDB file 1LYZ.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "llm_model = \"gpt-4o-2024-08-06\"\n", + "tools = \"all\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date: 2024-10-18\n", + "time: 00:49:00\n" + ] + } + ], + "source": [ + "#todays date and time\n", + "import datetime\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thought: To complete the task, I need to download the PDB file for the protein with the ID \"1LYZ\". I will use the PDBFileDownloader tool to accomplish this.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"PDBFileDownloader\",\n", + " \"action_input\": {\n", + " \"query\": \"1LYZ\"\n", + " }\n", + "}\n", + "```PDB file found with this ID: 1LYZ\n", + "To complete the task, I have successfully downloaded the PDB file for the protein with the ID \"1LYZ\". \n", + "\n", + "Final Answer: The PDB file for 1LYZ has been successfully downloaded. The file ID is 1LYZ_004903." + ] + }, + { + "data": { + "text/plain": [ + "({'input': '\\n You are an expert molecular dynamics scientist, and\\n your task is to respond to the question or\\n solve the problem to the best of your ability using\\n the provided tools.\\n\\n You can only respond with a single complete\\n \\'Thought, Action, Action Input\\' format\\n OR a single \\'Final Answer\\' format.\\n\\n Complete format:\\n Thought: (reflect on your progress and decide what to do next)\\n Action:\\n ```\\n {\\n \"action\": (the action name, it should be the name of a tool),\\n \"action_input\": (the input string for the action)\\n }\\n \\'\\'\\'\\n\\n OR\\n\\n Final Answer: (the final response to the original input\\n question, once all steps are complete)\\n\\n You are required to use the tools provided,\\n using the most specific tool\\n available for each action.\\n Your final answer should contain all information\\n necessary to answer the question and its subquestions.\\n Before you finish, reflect on your progress and make\\n sure you have addressed the question in its entirety.\\n\\n If you are asked to continue\\n or reference previous runs,\\n the context will be provided to you.\\n If context is provided, you should assume\\n you are continuing a chat.\\n\\n Here is the input:\\n Previous Context: None\\n Question: Complete all of the following tasks: 1. Download the PDB file 1LYZ. ',\n", + " 'output': 'To complete the task, I have successfully downloaded the PDB file for the protein with the ID \"1LYZ\". \\n\\nFinal Answer: The PDB file for 1LYZ has been successfully downloaded. The file ID is 1LYZ_004903.'},\n", + " 'X180EV5E')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_1.run(descriptive_prompt_1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ckpt_dir: ckpt_19\n", + "Files found in registry: 1LYZ_004903: PDB file downloaded from RSCB\n", + " PDBFile ID: 1LYZ_004903\n" + ] + } + ], + "source": [ + "registry = agent_1.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "It is asserted that file path for 1LYZ_004903 exists\n" + ] + } + ], + "source": [ + "import re\n", + "import os\n", + "match = re.search(rf\"1LYZ_\\d+\", paths_and_descriptions)\n", + "file_id = match.group(0)\n", + "pdb_path = registry.get_mapped_path(file_id)\n", + "assert os.path.exists(pdb_path)\n", + "print(f'It is asserted that file path for {file_id} exists')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# non-descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thought: To download the PDB file for the protein with the ID \"1LYZ\", I will use the PDBFileDownloader tool. \n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"PDBFileDownloader\",\n", + " \"action_input\": \"1LYZ\"\n", + "}\n", + "```PDB file found with this ID: 1LYZ\n", + "The PDB file for the protein with the ID \"1LYZ\" has been successfully downloaded. The file is named \"1LYZ_004906\".\n", + "\n", + "Final Answer: The PDB file for 1LYZ has been successfully downloaded and is named \"1LYZ_004906\"." + ] + }, + { + "data": { + "text/plain": [ + "({'input': '\\n You are an expert molecular dynamics scientist, and\\n your task is to respond to the question or\\n solve the problem to the best of your ability using\\n the provided tools.\\n\\n You can only respond with a single complete\\n \\'Thought, Action, Action Input\\' format\\n OR a single \\'Final Answer\\' format.\\n\\n Complete format:\\n Thought: (reflect on your progress and decide what to do next)\\n Action:\\n ```\\n {\\n \"action\": (the action name, it should be the name of a tool),\\n \"action_input\": (the input string for the action)\\n }\\n \\'\\'\\'\\n\\n OR\\n\\n Final Answer: (the final response to the original input\\n question, once all steps are complete)\\n\\n You are required to use the tools provided,\\n using the most specific tool\\n available for each action.\\n Your final answer should contain all information\\n necessary to answer the question and its subquestions.\\n Before you finish, reflect on your progress and make\\n sure you have addressed the question in its entirety.\\n\\n If you are asked to continue\\n or reference previous runs,\\n the context will be provided to you.\\n If context is provided, you should assume\\n you are continuing a chat.\\n\\n Here is the input:\\n Previous Context: None\\n Question: Download the PDB file 1LYZ. ',\n", + " 'output': 'The PDB file for the protein with the ID \"1LYZ\" has been successfully downloaded. The file is named \"1LYZ_004906\".\\n\\nFinal Answer: The PDB file for 1LYZ has been successfully downloaded and is named \"1LYZ_004906\".'},\n", + " 'BDDANVWX')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_2.run(non_descriptive_prompt_1)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ckpt_dir: ckpt_20\n", + "Files found in registry: 1LYZ_004906: PDB file downloaded from RSCB\n", + " PDBFile ID: 1LYZ_004906\n" + ] + } + ], + "source": [ + "registry = agent_2.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "It is asserted that file path for 1LYZ_004906 exists\n" + ] + } + ], + "source": [ + "import re\n", + "import os\n", + "match = re.search(rf\"1LYZ_\\d+\", paths_and_descriptions)\n", + "file_id = match.group(0)\n", + "pdb_path = registry.get_mapped_path(file_id)\n", + "assert os.path.exists(pdb_path)\n", + "print(f'It is asserted that file path for {file_id} exists')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent", + "language": "python", + "name": "mdagent" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt10.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt10.ipynb new file mode 100644 index 00000000..ff7e787a --- /dev/null +++ b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt10.ipynb @@ -0,0 +1,404 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mdagent import MDAgent\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "descriptive_prompt_10 = \"Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets. 3. Simulate for 1 ps at 300 K. 4. Compute the RMSD of the simulation at 300 K. 5. Compute the radius of gyration over time for the simulation at 300 K. 6. Compute the SASA (solvent accessible surface area) for the simulation at 300 K. 7. Simulate for 1 ps at 400 K. 8. Compute the RMSD of the simulation at 400 K. 9. Compute the radius of gyration over time for the simulation at 400 K. 10. Compute the SASA (solvent accessible surface area) for the simulation at 400 K.\"\n", + "non_descriptive_prompt_10 = \"Simulate 1LYZ for 1 ps at 300K and 400K. Tell me how many residues are in the chains and beta sheets, and compute the RMSD, SASA, and radius of gyration of both simulations.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_model = \"gpt-4o-2024-08-06\"\n", + "tools = \"all\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#todays date and time\n", + "import datetime\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1.run(descriptive_prompt_10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_1.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "matches = re.findall(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id1 = matches[0]\n", + "traj_id2 = matches[1]\n", + "traj_path1 = registry.get_mapped_path(traj_id1)\n", + "traj_path2 = registry.get_mapped_path(traj_id2)\n", + "assert os.path.exists(traj_path1)\n", + "assert os.path.exists(traj_path2)\n", + "\n", + "matches = re.findall(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id1 = matches[0]\n", + "top_id2 = matches[1]\n", + "top_path1 = registry.get_mapped_path(top_id1)\n", + "top_path2 = registry.get_mapped_path(top_id2)\n", + "assert os.path.exists(top_path1)\n", + "assert os.path.exists(top_path2)\n", + "print(f\"It's asserted that file paths for {traj_id1}, {top_id1}, {traj_id2} and {top_id2} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "print(\"First simulation:\")\n", + "traj_path1 = registry.get_mapped_path(traj_id1)\n", + "top_path1 = registry.get_mapped_path(top_id1)\n", + "traj = md.load(traj_path1, top=top_path1)[0] # first frame\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))\n", + "\n", + "print(\"Second simulation:\")\n", + "traj_path2 = registry.get_mapped_path(traj_id2)\n", + "top_path2 = registry.get_mapped_path(top_id2)\n", + "traj = md.load(traj_path2, top=top_path2)[0] # first frame\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "matches = re.findall(r\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id1 = matches[0]\n", + "fig_path1 = registry.get_mapped_path(fig_id1)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id1} does not exist.\"\n", + "Image(filename=fig_path1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id2 = matches[1]\n", + "fig_path2 = registry.get_mapped_path(fig_id2)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id2} does not exist.\"\n", + "Image(filename=fig_path2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id3 = matches[2]\n", + "fig_path3 = registry.get_mapped_path(fig_id3)\n", + "assert os.path.exists(fig_path3), f\"This figure {fig_id3} does not exist.\"\n", + "Image(filename=fig_path3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id4 = matches[3]\n", + "fig_path4 = registry.get_mapped_path(fig_id4)\n", + "assert os.path.exists(fig_path4), f\"This figure {fig_id4} does not exist.\"\n", + "Image(filename=fig_path4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id5 = matches[4]\n", + "fig_path5 = registry.get_mapped_path(fig_id5)\n", + "assert os.path.exists(fig_path5), f\"This figure {fig_id5} does not exist.\"\n", + "Image(filename=fig_path5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id6 = matches[5]\n", + "fig_path6 = registry.get_mapped_path(fig_id6)\n", + "assert os.path.exists(fig_path6), f\"This figure {fig_id6} does not exist.\"\n", + "Image(filename=fig_path6)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# non-descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2.run(non_descriptive_prompt_10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_2.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "matches = re.findall(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id1 = matches[0]\n", + "traj_id2 = matches[1]\n", + "traj_path1 = registry.get_mapped_path(traj_id1)\n", + "traj_path2 = registry.get_mapped_path(traj_id2)\n", + "assert os.path.exists(traj_path1)\n", + "assert os.path.exists(traj_path2)\n", + "\n", + "matches = re.findall(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id1 = matches[0]\n", + "top_id2 = matches[1]\n", + "top_path1 = registry.get_mapped_path(top_id1)\n", + "top_path2 = registry.get_mapped_path(top_id2)\n", + "assert os.path.exists(top_path1)\n", + "assert os.path.exists(top_path2)\n", + "print(f\"It's asserted that file paths for {traj_id1}, {top_id1}, {traj_id2} and {top_id2} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "print(\"First simulation:\")\n", + "traj_path1 = registry.get_mapped_path(traj_id1)\n", + "top_path1 = registry.get_mapped_path(top_id1)\n", + "traj = md.load(traj_path1, top=top_path1)[0] # first frame\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))\n", + "\n", + "print(\"Second simulation:\")\n", + "traj_path2 = registry.get_mapped_path(traj_id2)\n", + "top_path2 = registry.get_mapped_path(top_id2)\n", + "traj = md.load(traj_path2, top=top_path2)[0] # first frame\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "matches = re.findall(r\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id1 = matches[0]\n", + "fig_path1 = registry.get_mapped_path(fig_id1)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id1} does not exist.\"\n", + "Image(filename=fig_path1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id2 = matches[1]\n", + "fig_path2 = registry.get_mapped_path(fig_id2)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id2} does not exist.\"\n", + "Image(filename=fig_path2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id3 = matches[2]\n", + "fig_path3 = registry.get_mapped_path(fig_id3)\n", + "assert os.path.exists(fig_path3), f\"This figure {fig_id3} does not exist.\"\n", + "Image(filename=fig_path3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id4 = matches[3]\n", + "fig_path4 = registry.get_mapped_path(fig_id4)\n", + "assert os.path.exists(fig_path4), f\"This figure {fig_id4} does not exist.\"\n", + "Image(filename=fig_path4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id5 = matches[4]\n", + "fig_path5 = registry.get_mapped_path(fig_id5)\n", + "assert os.path.exists(fig_path5), f\"This figure {fig_id5} does not exist.\"\n", + "Image(filename=fig_path5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id6 = matches[5]\n", + "fig_path6 = registry.get_mapped_path(fig_id6)\n", + "assert os.path.exists(fig_path6), f\"This figure {fig_id6} does not exist.\"\n", + "Image(filename=fig_path6)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent", + "language": "python", + "name": "mdagent" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt2.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt2.ipynb new file mode 100644 index 00000000..9c67ebac --- /dev/null +++ b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt2.ipynb @@ -0,0 +1,368 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from mdagent import MDAgent\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "descriptive_prompt_2 = \"Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets.\"\n", + "non_descriptive_prompt_2 = \"Download the PDB file 1LYZ. Tell me how many residues are in the chains and beta sheets.\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "llm_model = \"gpt-4o-2024-08-06\"\n", + "tools = \"all\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "date: 2024-10-18\n", + "time: 00:50:38\n" + ] + } + ], + "source": [ + "#todays date and time\n", + "import datetime\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thought: To complete the tasks, I need to first download the PDB file for 1LYZ. After downloading, I will analyze the structure to determine the number of residues in the chains and beta sheets.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"PDBFileDownloader\",\n", + " \"action_input\": \"1LYZ\"\n", + "}\n", + "```PDB file found with this ID: 1LYZ\n", + "To determine the number of residues in the chains and beta sheets, I need to analyze the downloaded PDB file for 1LYZ.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"SummarizeProteinStructure\",\n", + " \"action_input\": {\n", + " \"traj_file\": \"1LYZ_005041\"\n", + " }\n", + "}\n", + "```Action:\n", + "```json\n", + "{\n", + " \"action\": \"ComputeDSSP\",\n", + " \"action_input\": {\n", + " \"traj_file\": \"1LYZ_005041\",\n", + " \"target_frames\": \"all\"\n", + " }\n", + "}\n", + "```To complete the tasks, I need to provide the number of residues in the chains and beta sheets based on the observations from the DSSP analysis.\n", + "\n", + "Final Answer: The PDB file 1LYZ contains 230 residues in total, distributed across 2 chains. According to the DSSP analysis, there are 14 residues in beta sheets (strands)." + ] + }, + { + "data": { + "text/plain": [ + "({'input': '\\n You are an expert molecular dynamics scientist, and\\n your task is to respond to the question or\\n solve the problem to the best of your ability using\\n the provided tools.\\n\\n You can only respond with a single complete\\n \\'Thought, Action, Action Input\\' format\\n OR a single \\'Final Answer\\' format.\\n\\n Complete format:\\n Thought: (reflect on your progress and decide what to do next)\\n Action:\\n ```\\n {\\n \"action\": (the action name, it should be the name of a tool),\\n \"action_input\": (the input string for the action)\\n }\\n \\'\\'\\'\\n\\n OR\\n\\n Final Answer: (the final response to the original input\\n question, once all steps are complete)\\n\\n You are required to use the tools provided,\\n using the most specific tool\\n available for each action.\\n Your final answer should contain all information\\n necessary to answer the question and its subquestions.\\n Before you finish, reflect on your progress and make\\n sure you have addressed the question in its entirety.\\n\\n If you are asked to continue\\n or reference previous runs,\\n the context will be provided to you.\\n If context is provided, you should assume\\n you are continuing a chat.\\n\\n Here is the input:\\n Previous Context: None\\n Question: Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets. ',\n", + " 'output': 'To complete the tasks, I need to provide the number of residues in the chains and beta sheets based on the observations from the DSSP analysis.\\n\\nFinal Answer: The PDB file 1LYZ contains 230 residues in total, distributed across 2 chains. According to the DSSP analysis, there are 14 residues in beta sheets (strands).'},\n", + " 'ZRVGGF82')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_1.run(descriptive_prompt_2)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ckpt_dir: ckpt_21\n", + "Files found in registry: 1LYZ_005041: PDB file downloaded from RSCB\n", + " PDBFile ID: 1LYZ_005041\n", + " rec0_005045: dssp values for trajectory with id: 1LYZ_005041\n" + ] + } + ], + "source": [ + "registry = agent_1.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "It is asserted that file path for 1LYZ_005041 exists\n", + "Number of chains: 2\n", + "Number of sheets: 14\n", + "Number of helices: 50\n", + "Number of coils: 65\n" + ] + } + ], + "source": [ + "import mdtraj as md\n", + "import os\n", + "import re\n", + "\n", + "match = re.search(rf\"1LYZ_\\d+\", paths_and_descriptions)\n", + "file_id = match.group(0)\n", + "pdb_path = registry.get_mapped_path(file_id)\n", + "assert os.path.exists(pdb_path)\n", + "print(f'It is asserted that file path for {file_id} exists')\n", + "\n", + "traj = md.load(pdb_path)\n", + "#get dssp \n", + "number_of_chains = traj.n_chains\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",number_of_chains)\n", + "print(\"Number of sheets: \",len([i for i in secondary_structure[0] if i == 'E']))\n", + "print(\"Number of helices: \",len([i for i in secondary_structure[0] if i == 'H']))\n", + "print(\"Number of coils: \",len([i for i in secondary_structure[0] if i == 'C']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# non-descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thought: To answer the question, I need to first download the PDB file for 1LYZ. Then, I will analyze the structure to determine the number of residues in the chains and beta sheets. I will start by downloading the PDB file.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"PDBFileDownloader\",\n", + " \"action_input\": \"1LYZ\"\n", + "}\n", + "```PDB file found with this ID: 1LYZ\n", + "To determine the number of residues in the chains and beta sheets, I need to analyze the downloaded PDB file. I will use the tool to get the number and location of turns, beta sheets, and helices for the protein.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"GetTurnsBetaSheetsHelices\",\n", + " \"action_input\": {\n", + " \"query\": \"1LYZ\",\n", + " \"primary_accession\": \"\"\n", + " }\n", + "}\n", + "```To accurately determine the number of residues in the chains and beta sheets, I need to analyze the PDB file directly for structural information. I will summarize the protein structure to get the number of residues in the chains and then specifically look for beta sheets.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"SummarizeProteinStructure\",\n", + " \"action_input\": {\n", + " \"traj_file\": \"1LYZ_005049\"\n", + " }\n", + "}\n", + "```To determine the number of residues in the beta sheets, I need to analyze the secondary structure of the protein. Since the previous attempt to get the beta sheets information returned empty, I will use the DSSP tool to compute the secondary structure assignment for the protein trajectory, which will help identify the beta sheets.\n", + "\n", + "Action:\n", + "```json\n", + "{\n", + " \"action\": \"ComputeDSSP\",\n", + " \"action_input\": {\n", + " \"traj_file\": \"1LYZ_005049\",\n", + " \"target_frames\": \"all\"\n", + " }\n", + "}\n", + "```To provide a complete answer, I need to summarize the findings from the analysis of the PDB file 1LYZ. The analysis revealed the following:\n", + "\n", + "- The protein has 230 residues in total.\n", + "- There are 2 chains in the protein.\n", + "- The secondary structure analysis using DSSP indicates that there are 14 residues in beta sheets (strands).\n", + "\n", + "Final Answer: The PDB file 1LYZ contains 230 residues in total, distributed across 2 chains. Among these, 14 residues are part of beta sheets." + ] + }, + { + "data": { + "text/plain": [ + "({'input': '\\n You are an expert molecular dynamics scientist, and\\n your task is to respond to the question or\\n solve the problem to the best of your ability using\\n the provided tools.\\n\\n You can only respond with a single complete\\n \\'Thought, Action, Action Input\\' format\\n OR a single \\'Final Answer\\' format.\\n\\n Complete format:\\n Thought: (reflect on your progress and decide what to do next)\\n Action:\\n ```\\n {\\n \"action\": (the action name, it should be the name of a tool),\\n \"action_input\": (the input string for the action)\\n }\\n \\'\\'\\'\\n\\n OR\\n\\n Final Answer: (the final response to the original input\\n question, once all steps are complete)\\n\\n You are required to use the tools provided,\\n using the most specific tool\\n available for each action.\\n Your final answer should contain all information\\n necessary to answer the question and its subquestions.\\n Before you finish, reflect on your progress and make\\n sure you have addressed the question in its entirety.\\n\\n If you are asked to continue\\n or reference previous runs,\\n the context will be provided to you.\\n If context is provided, you should assume\\n you are continuing a chat.\\n\\n Here is the input:\\n Previous Context: None\\n Question: Download the PDB file 1LYZ. Tell me how many residues are in the chains and beta sheets. ',\n", + " 'output': 'To provide a complete answer, I need to summarize the findings from the analysis of the PDB file 1LYZ. The analysis revealed the following:\\n\\n- The protein has 230 residues in total.\\n- There are 2 chains in the protein.\\n- The secondary structure analysis using DSSP indicates that there are 14 residues in beta sheets (strands).\\n\\nFinal Answer: The PDB file 1LYZ contains 230 residues in total, distributed across 2 chains. Among these, 14 residues are part of beta sheets.'},\n", + " 'U7AP098N')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_2.run(non_descriptive_prompt_2)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ckpt_dir: ckpt_22\n", + "Files found in registry: 1LYZ_005049: PDB file downloaded from RSCB\n", + " PDBFile ID: 1LYZ_005049\n", + " rec0_005057: dssp values for trajectory with id: 1LYZ_005049\n" + ] + } + ], + "source": [ + "registry = agent_2.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "It is asserted that file path for 1LYZ_005049 exists\n", + "Number of chains: 2\n", + "Number of sheets: 14\n", + "Number of helices: 50\n", + "Number of coils: 65\n" + ] + } + ], + "source": [ + "import mdtraj as md\n", + "import os\n", + "import re\n", + "\n", + "match = re.search(rf\"1LYZ_\\d+\", paths_and_descriptions)\n", + "file_id = match.group(0)\n", + "pdb_path = registry.get_mapped_path(file_id)\n", + "assert os.path.exists(pdb_path)\n", + "print(f'It is asserted that file path for {file_id} exists')\n", + "\n", + "traj = md.load(pdb_path)\n", + "#get dssp \n", + "number_of_chains = traj.n_chains\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",number_of_chains)\n", + "print(\"Number of sheets: \",len([i for i in secondary_structure[0] if i == 'E']))\n", + "print(\"Number of helices: \",len([i for i in secondary_structure[0] if i == 'H']))\n", + "print(\"Number of coils: \",len([i for i in secondary_structure[0] if i == 'C']))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent", + "language": "python", + "name": "mdagent" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt3.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt3.ipynb new file mode 100644 index 00000000..dcd98caa --- /dev/null +++ b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt3.ipynb @@ -0,0 +1,228 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mdagent import MDAgent\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "descriptive_prompt_3 = \"Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets. 3. Simulate for 1 ps at 300 K\"\n", + "non_descriptive_prompt_3 = \"Simulate 1LYZ for 1 ps at 300 K. Tell me how many residues are in the chains and beta sheets.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_model = \"gpt-4o-2024-08-06\"\n", + "tools = \"all\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#todays date and time\n", + "import datetime\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1.run(descriptive_prompt_3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_1.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "match = re.search(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id = match.group(0)\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "match = re.search(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id = match.group(0)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "assert os.path.exists(traj_path), \"Trajectory file doesn't exist\"\n", + "assert os.path.exists(top_path), \"Topology file doesn't exist\"\n", + "assert traj_path.endswith(\".dcd\")\n", + "assert top_path.endswith(\".pdb\")\n", + "print(f\"It's asserted that file paths for {traj_id} and {top_id} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "traj = md.load(traj_path, top=top_path)[0] # first frame\n", + "\n", + "\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# non-descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2.run(non_descriptive_prompt_3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_2.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "match = re.search(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id = match.group(0)\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "match = re.search(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id = match.group(0)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "assert os.path.exists(traj_path), \"Trajectory file doesn't exist\"\n", + "assert os.path.exists(top_path), \"Topology file doesn't exist\"\n", + "assert traj_path.endswith(\".dcd\")\n", + "assert top_path.endswith(\".pdb\")\n", + "print(f\"It's asserted that file paths for {traj_id} and {top_id} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "traj = md.load(traj_path, top=top_path)[0] # first frame\n", + "\n", + "\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent", + "language": "python", + "name": "mdagent" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt4.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt4.ipynb new file mode 100644 index 00000000..33161b22 --- /dev/null +++ b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt4.ipynb @@ -0,0 +1,254 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mdagent import MDAgent\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "descriptive_prompt_4 = \"Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets. 3. Simulate for 1 ps at 300 K. 4. Compute the RMSD.\"\n", + "non_descriptive_prompt_4 = \"Simulate 1LYZ for 1 ps at 300 K. Tell me how many residues are in the chains and beta sheets and compute the RMSD of the simulation.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_model = \"gpt-4o-2024-08-06\"\n", + "tools = \"all\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#todays date and time\n", + "import datetime\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1.run(descriptive_prompt_4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_1.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "match = re.search(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id = match.group(0)\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "match = re.search(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id = match.group(0)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "assert os.path.exists(traj_path), \"Trajectory file doesn't exist\"\n", + "assert os.path.exists(top_path), \"Topology file doesn't exist\"\n", + "assert traj_path.endswith(\".dcd\")\n", + "assert top_path.endswith(\".pdb\")\n", + "print(f\"It's asserted that file paths for {traj_id} and {top_id} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "traj = md.load(traj_path, top=top_path)[0] # first frame\n", + "\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "match = re.search(rf\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id = match.group(0)\n", + "fig_path = registry.get_mapped_path(fig_id)\n", + "assert os.path.exists(fig_path), \"This figure does not exist.\"\n", + "Image(filename=fig_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# non-descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2.run(non_descriptive_prompt_4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_2.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "match = re.search(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id = match.group(0)\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "match = re.search(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id = match.group(0)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "assert os.path.exists(traj_path), \"Trajectory file doesn't exist\"\n", + "assert os.path.exists(top_path), \"Topology file doesn't exist\"\n", + "assert traj_path.endswith(\".dcd\")\n", + "assert top_path.endswith(\".pdb\")\n", + "print(f\"It's asserted that file paths for {traj_id} and {top_id} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "traj = md.load(traj_path, top=top_path)[0] # first frame\n", + "\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "match = re.search(rf\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id = match.group(0)\n", + "fig_path = registry.get_mapped_path(fig_id)\n", + "assert os.path.exists(fig_path), \"This figure does not exist.\"\n", + "Image(filename=fig_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent", + "language": "python", + "name": "mdagent" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt5.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt5.ipynb new file mode 100644 index 00000000..ad820070 --- /dev/null +++ b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt5.ipynb @@ -0,0 +1,262 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mdagent import MDAgent\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "descriptive_prompt_5 = \"Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets. 3. Simulate for 1 ps at 300 K. 4. Compute the RMSD. 5. Compute the radius of gyration over time.\"\n", + "non_descriptive_prompt_5 = \"Simulate 1LYZ for 1 ps at 300 K. Tell me how many residues are in the chains and beta sheets, and compute the RMSD and radius of gyration of the simulation.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_model = \"gpt-4o-2024-08-06\"\n", + "tools = \"all\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#todays date and time\n", + "import datetime\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1.run(descriptive_prompt_5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_1.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "match = re.search(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id = match.group(0)\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "match = re.search(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id = match.group(0)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "assert os.path.exists(traj_path), \"Trajectory file doesn't exist\"\n", + "assert os.path.exists(top_path), \"Topology file doesn't exist\"\n", + "assert traj_path.endswith(\".dcd\")\n", + "assert top_path.endswith(\".pdb\")\n", + "print(f\"It's asserted that file paths for {traj_id} and {top_id} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "traj = md.load(traj_path, top=top_path)[0] # first frame\n", + "\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "matches = re.findall(r\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id1 = matches[0]\n", + "fig_id2 = matches[1]\n", + "fig_path1 = registry.get_mapped_path(fig_id1)\n", + "fig_path2 = registry.get_mapped_path(fig_id2)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id1} does not exist.\"\n", + "assert os.path.exists(fig_path2), f\"This figure {fig_id2} does not exist.\"\n", + "Image(filename=fig_path1)\n", + "Image(filename=fig_path2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# non-descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2.run(non_descriptive_prompt_5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_2.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "match = re.search(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id = match.group(0)\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "match = re.search(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id = match.group(0)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "assert os.path.exists(traj_path), \"Trajectory file doesn't exist\"\n", + "assert os.path.exists(top_path), \"Topology file doesn't exist\"\n", + "assert traj_path.endswith(\".dcd\")\n", + "assert top_path.endswith(\".pdb\")\n", + "print(f\"It's asserted that file paths for {traj_id} and {top_id} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "traj = md.load(traj_path, top=top_path)[0] # first frame\n", + "\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "matches = re.findall(r\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id1 = matches[0]\n", + "fig_id2 = matches[1]\n", + "fig_path1 = registry.get_mapped_path(fig_id1)\n", + "fig_path2 = registry.get_mapped_path(fig_id2)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id1} does not exist.\"\n", + "assert os.path.exists(fig_path2), f\"This figure {fig_id2} does not exist.\"\n", + "Image(filename=fig_path1)\n", + "Image(filename=fig_path2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent", + "language": "python", + "name": "mdagent" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt6.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt6.ipynb new file mode 100644 index 00000000..9d83441e --- /dev/null +++ b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt6.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mdagent import MDAgent\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "descriptive_prompt_6 = \"Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets. 3. Simulate for 1 ps at 300 K. 4. Compute the RMSD. 5. Compute the radius of gyration over time. 6. Compute the SASA (solvent accessible surface area).\"\n", + "non_descriptive_prompt_6 = \"Simulate 1LYZ for 1 ps at 300 K. Tell me how many residues are in the chains and beta sheets, and compute the RMSD, SASA, and radius of gyration of the simulation.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_model = \"gpt-4o-2024-08-06\"\n", + "tools = \"all\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#todays date and time\n", + "import datetime\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1.run(descriptive_prompt_6)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_1.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "match = re.search(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id = match.group(0)\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "match = re.search(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id = match.group(0)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "assert os.path.exists(traj_path), \"Trajectory file doesn't exist\"\n", + "assert os.path.exists(top_path), \"Topology file doesn't exist\"\n", + "assert traj_path.endswith(\".dcd\")\n", + "assert top_path.endswith(\".pdb\")\n", + "print(f\"It's asserted that file paths for {traj_id} and {top_id} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "traj = md.load(traj_path, top=top_path)[0] # first frame\n", + "\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "matches = re.findall(r\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id1 = matches[0]\n", + "fig_path1 = registry.get_mapped_path(fig_id1)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id1} does not exist.\"\n", + "Image(filename=fig_path1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id2 = matches[1]\n", + "fig_path2 = registry.get_mapped_path(fig_id2)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id2} does not exist.\"\n", + "Image(filename=fig_path2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id3 = matches[2]\n", + "fig_path3 = registry.get_mapped_path(fig_id3)\n", + "assert os.path.exists(fig_path3), f\"This figure {fig_id3} does not exist.\"\n", + "Image(filename=fig_path3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# non-descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2.run(non_descriptive_prompt_6)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_2.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "match = re.search(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id = match.group(0)\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "match = re.search(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id = match.group(0)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "assert os.path.exists(traj_path), \"Trajectory file doesn't exist\"\n", + "assert os.path.exists(top_path), \"Topology file doesn't exist\"\n", + "assert traj_path.endswith(\".dcd\")\n", + "assert top_path.endswith(\".pdb\")\n", + "print(f\"It's asserted that file paths for {traj_id} and {top_id} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "traj = md.load(traj_path, top=top_path)[0] # first frame\n", + "\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "matches = re.findall(r\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id1 = matches[0]\n", + "fig_path1 = registry.get_mapped_path(fig_id1)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id1} does not exist.\"\n", + "Image(filename=fig_path1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id2 = matches[1]\n", + "fig_path2 = registry.get_mapped_path(fig_id2)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id2} does not exist.\"\n", + "Image(filename=fig_path2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id3 = matches[2]\n", + "fig_path3 = registry.get_mapped_path(fig_id3)\n", + "assert os.path.exists(fig_path3), f\"This figure {fig_id3} does not exist.\"\n", + "Image(filename=fig_path3)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent", + "language": "python", + "name": "mdagent" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt7.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt7.ipynb new file mode 100644 index 00000000..75f35b19 --- /dev/null +++ b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt7.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mdagent import MDAgent\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "descriptive_prompt_7 = \"Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets. 3. Find any known binding or active sites. 4. Simulate for 1 ps at 300 K. 5. Compute the RMSD. 6. Compute the radius of gyration over time. 7. Compute the SASA (solvent accessible surface area).\"\n", + "non_descriptive_prompt_7 = \"Simulate 1LYZ for 1 ps at 300 K. Tell me how many residues are in the chains and beta sheets and any known binding or active sites. Compute the RMSD, SASA, and radius of gyration of the simulation.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_model = \"gpt-4o-2024-08-06\"\n", + "tools = \"all\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#todays date and time\n", + "import datetime\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1.run(descriptive_prompt_7)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_1.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "match = re.search(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id = match.group(0)\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "match = re.search(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id = match.group(0)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "assert os.path.exists(traj_path), \"Trajectory file doesn't exist\"\n", + "assert os.path.exists(top_path), \"Topology file doesn't exist\"\n", + "assert traj_path.endswith(\".dcd\")\n", + "assert top_path.endswith(\".pdb\")\n", + "print(f\"It's asserted that file paths for {traj_id} and {top_id} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "traj = md.load(traj_path, top=top_path)[0] # first frame\n", + "\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "matches = re.findall(r\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id1 = matches[0]\n", + "fig_path1 = registry.get_mapped_path(fig_id1)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id1} does not exist.\"\n", + "Image(filename=fig_path1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id2 = matches[1]\n", + "fig_path2 = registry.get_mapped_path(fig_id2)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id2} does not exist.\"\n", + "Image(filename=fig_path2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id3 = matches[2]\n", + "fig_path3 = registry.get_mapped_path(fig_id3)\n", + "assert os.path.exists(fig_path3), f\"This figure {fig_id3} does not exist.\"\n", + "Image(filename=fig_path3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# non-descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2.run(non_descriptive_prompt_7)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_2.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "match = re.search(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id = match.group(0)\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "match = re.search(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id = match.group(0)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "assert os.path.exists(traj_path), \"Trajectory file doesn't exist\"\n", + "assert os.path.exists(top_path), \"Topology file doesn't exist\"\n", + "assert traj_path.endswith(\".dcd\")\n", + "assert top_path.endswith(\".pdb\")\n", + "print(f\"It's asserted that file paths for {traj_id} and {top_id} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "traj_path = registry.get_mapped_path(traj_id)\n", + "top_path = registry.get_mapped_path(top_id)\n", + "traj = md.load(traj_path, top=top_path)[0] # first frame\n", + "\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "matches = re.findall(r\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id1 = matches[0]\n", + "fig_path1 = registry.get_mapped_path(fig_id1)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id1} does not exist.\"\n", + "Image(filename=fig_path1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id2 = matches[1]\n", + "fig_path2 = registry.get_mapped_path(fig_id2)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id2} does not exist.\"\n", + "Image(filename=fig_path2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id3 = matches[2]\n", + "fig_path3 = registry.get_mapped_path(fig_id3)\n", + "assert os.path.exists(fig_path3), f\"This figure {fig_id3} does not exist.\"\n", + "Image(filename=fig_path3)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent", + "language": "python", + "name": "mdagent" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt8.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt8.ipynb new file mode 100644 index 00000000..8733049e --- /dev/null +++ b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt8.ipynb @@ -0,0 +1,356 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mdagent import MDAgent\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "descriptive_prompt_8 = \"Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets. 3. Simulate for 1 ps at 300 K. 4. Compute the RMSD of the simulation at 300 K. 5. Compute the radius of gyration over time for the simulation at 300 K. 6. Simulate for 1 ps at 400 K. 7. Compute the RMSD of the simulation at 400 K. 8. Compute the radius of gyration over time for the simulation at 400 K.\"\n", + "non_descriptive_prompt_8 = \"Simulate 1LYZ for 1 ps at 300K and 400K. Tell me how many residues are in the chains and beta sheets, and compute the RMSD and radius of gyration of both simulations.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_model = \"gpt-4o-2024-08-06\"\n", + "tools = \"all\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#todays date and time\n", + "import datetime\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1.run(descriptive_prompt_8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_1.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "matches = re.findall(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id1 = matches[0]\n", + "traj_id2 = matches[1]\n", + "traj_path1 = registry.get_mapped_path(traj_id1)\n", + "traj_path2 = registry.get_mapped_path(traj_id2)\n", + "assert os.path.exists(traj_path1)\n", + "assert os.path.exists(traj_path2)\n", + "\n", + "matches = re.findall(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id1 = matches[0]\n", + "top_id2 = matches[1]\n", + "top_path1 = registry.get_mapped_path(top_id1)\n", + "top_path2 = registry.get_mapped_path(top_id2)\n", + "assert os.path.exists(top_path1)\n", + "assert os.path.exists(top_path2)\n", + "print(f\"It's asserted that file paths for {traj_id1}, {top_id1}, {traj_id2} and {top_id2} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "print(\"First simulation:\")\n", + "traj_path1 = registry.get_mapped_path(traj_id1)\n", + "top_path1 = registry.get_mapped_path(top_id1)\n", + "traj = md.load(traj_path1, top=top_path1)[0] # first frame\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))\n", + "\n", + "print(\"Second simulation:\")\n", + "traj_path2 = registry.get_mapped_path(traj_id2)\n", + "top_path2 = registry.get_mapped_path(top_id2)\n", + "traj = md.load(traj_path2, top=top_path2)[0] # first frame\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "matches = re.findall(r\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id1 = matches[0]\n", + "fig_path1 = registry.get_mapped_path(fig_id1)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id1} does not exist.\"\n", + "Image(filename=fig_path1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id2 = matches[1]\n", + "fig_path2 = registry.get_mapped_path(fig_id2)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id2} does not exist.\"\n", + "Image(filename=fig_path2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id3 = matches[2]\n", + "fig_path3 = registry.get_mapped_path(fig_id3)\n", + "assert os.path.exists(fig_path3), f\"This figure {fig_id3} does not exist.\"\n", + "Image(filename=fig_path3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id4 = matches[3]\n", + "fig_path4 = registry.get_mapped_path(fig_id4)\n", + "assert os.path.exists(fig_path4), f\"This figure {fig_id4} does not exist.\"\n", + "Image(filename=fig_path4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# non-descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2.run(non_descriptive_prompt_8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_2.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "matches = re.findall(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id1 = matches[0]\n", + "traj_id2 = matches[1]\n", + "traj_path1 = registry.get_mapped_path(traj_id1)\n", + "traj_path2 = registry.get_mapped_path(traj_id2)\n", + "assert os.path.exists(traj_path1)\n", + "assert os.path.exists(traj_path2)\n", + "\n", + "matches = re.findall(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id1 = matches[0]\n", + "top_id2 = matches[1]\n", + "top_path1 = registry.get_mapped_path(top_id1)\n", + "top_path2 = registry.get_mapped_path(top_id2)\n", + "assert os.path.exists(top_path1)\n", + "assert os.path.exists(top_path2)\n", + "print(f\"It's asserted that file paths for {traj_id1}, {top_id1}, {traj_id2} and {top_id2} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "print(\"First simulation:\")\n", + "traj_path1 = registry.get_mapped_path(traj_id1)\n", + "top_path1 = registry.get_mapped_path(top_id1)\n", + "traj = md.load(traj_path1, top=top_path1)[0] # first frame\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))\n", + "\n", + "print(\"Second simulation:\")\n", + "traj_path2 = registry.get_mapped_path(traj_id2)\n", + "top_path2 = registry.get_mapped_path(top_id2)\n", + "traj = md.load(traj_path2, top=top_path2)[0] # first frame\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "matches = re.findall(r\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id1 = matches[0]\n", + "fig_path1 = registry.get_mapped_path(fig_id1)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id1} does not exist.\"\n", + "Image(filename=fig_path1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id2 = matches[1]\n", + "fig_path2 = registry.get_mapped_path(fig_id2)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id2} does not exist.\"\n", + "Image(filename=fig_path2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id3 = matches[2]\n", + "fig_path3 = registry.get_mapped_path(fig_id3)\n", + "assert os.path.exists(fig_path3), f\"This figure {fig_id3} does not exist.\"\n", + "Image(filename=fig_path3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id4 = matches[3]\n", + "fig_path4 = registry.get_mapped_path(fig_id4)\n", + "assert os.path.exists(fig_path4), f\"This figure {fig_id4} does not exist.\"\n", + "Image(filename=fig_path4)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent", + "language": "python", + "name": "mdagent" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt9.ipynb b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt9.ipynb new file mode 100644 index 00000000..efc892d8 --- /dev/null +++ b/notebooks/experiments/Robustness/gpt-4o-2024-08-06/prompt9.ipynb @@ -0,0 +1,356 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mdagent import MDAgent\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "descriptive_prompt_9 = \"Complete all of the following tasks: 1. Download the PDB file 1LYZ. 2. Tell me how many residues are in the chains and beta sheets. 3. Find any known binding or active sites. 4. Simulate for 1 ps at 300 K. 5. Compute the RMSD of the simulation at 300 K. 6. Compute the radius of gyration over time for the simulation at 300 K. 7. Simulate for 1 ps at 400 K. 8. Compute the RMSD of the simulation at 400 K. 9. Compute the radius of gyration over time for the simulation at 400 K.\" \n", + "non_descriptive_prompt_9 = \"Simulate 1LYZ for 1 ps at 300K and 400K. Tell me how many residues are in the chains and beta sheets and any known binding or active sites. Compute RMSD and radius of gyration of both simulations.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_model = \"gpt-4o-2024-08-06\"\n", + "tools = \"all\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#todays date and time\n", + "import datetime\n", + "now = datetime.datetime.now()\n", + "date = now.strftime(\"%Y-%m-%d\")\n", + "print(\"date:\",date)\n", + "time = now.strftime(\"%H:%M:%S\")\n", + "print(\"time:\",time)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_1.run(descriptive_prompt_9)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_1.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "matches = re.findall(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id1 = matches[0]\n", + "traj_id2 = matches[1]\n", + "traj_path1 = registry.get_mapped_path(traj_id1)\n", + "traj_path2 = registry.get_mapped_path(traj_id2)\n", + "assert os.path.exists(traj_path1)\n", + "assert os.path.exists(traj_path2)\n", + "\n", + "matches = re.findall(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id1 = matches[0]\n", + "top_id2 = matches[1]\n", + "top_path1 = registry.get_mapped_path(top_id1)\n", + "top_path2 = registry.get_mapped_path(top_id2)\n", + "assert os.path.exists(top_path1)\n", + "assert os.path.exists(top_path2)\n", + "print(f\"It's asserted that file paths for {traj_id1}, {top_id1}, {traj_id2} and {top_id2} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "print(\"First simulation:\")\n", + "traj_path1 = registry.get_mapped_path(traj_id1)\n", + "top_path1 = registry.get_mapped_path(top_id1)\n", + "traj = md.load(traj_path1, top=top_path1)[0] # first frame\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))\n", + "\n", + "print(\"Second simulation:\")\n", + "traj_path2 = registry.get_mapped_path(traj_id2)\n", + "top_path2 = registry.get_mapped_path(top_id2)\n", + "traj = md.load(traj_path2, top=top_path2)[0] # first frame\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "matches = re.findall(r\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id1 = matches[0]\n", + "fig_path1 = registry.get_mapped_path(fig_id1)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id1} does not exist.\"\n", + "Image(filename=fig_path1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id2 = matches[1]\n", + "fig_path2 = registry.get_mapped_path(fig_id2)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id2} does not exist.\"\n", + "Image(filename=fig_path2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id3 = matches[2]\n", + "fig_path3 = registry.get_mapped_path(fig_id3)\n", + "assert os.path.exists(fig_path3), f\"This figure {fig_id3} does not exist.\"\n", + "Image(filename=fig_path3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id4 = matches[3]\n", + "fig_path4 = registry.get_mapped_path(fig_id4)\n", + "assert os.path.exists(fig_path4), f\"This figure {fig_id4} does not exist.\"\n", + "Image(filename=fig_path4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# non-descriptive prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2 = MDAgent(agent_type=\"Structured\", model=llm_model, top_k_tools=tools)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agent_2.run(non_descriptive_prompt_9)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "registry = agent_2.path_registry\n", + "print(\"ckpt_dir:\",os.path.basename(registry.ckpt_dir))\n", + "paths_and_descriptions = registry.list_path_names_and_descriptions()\n", + "print(\"\\n\".join(paths_and_descriptions.split(\",\")))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "import os\n", + "matches = re.findall(r\"rec0_\\d+\", paths_and_descriptions)\n", + "traj_id1 = matches[0]\n", + "traj_id2 = matches[1]\n", + "traj_path1 = registry.get_mapped_path(traj_id1)\n", + "traj_path2 = registry.get_mapped_path(traj_id2)\n", + "assert os.path.exists(traj_path1)\n", + "assert os.path.exists(traj_path2)\n", + "\n", + "matches = re.findall(r\"top_sim0_\\d+\", paths_and_descriptions)\n", + "top_id1 = matches[0]\n", + "top_id2 = matches[1]\n", + "top_path1 = registry.get_mapped_path(top_id1)\n", + "top_path2 = registry.get_mapped_path(top_id2)\n", + "assert os.path.exists(top_path1)\n", + "assert os.path.exists(top_path2)\n", + "print(f\"It's asserted that file paths for {traj_id1}, {top_id1}, {traj_id2} and {top_id2} exist\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this checks DSSP for the first frame, change to the last frame if needed\n", + "import mdtraj as md\n", + "\n", + "print(\"First simulation:\")\n", + "traj_path1 = registry.get_mapped_path(traj_id1)\n", + "top_path1 = registry.get_mapped_path(top_id1)\n", + "traj = md.load(traj_path1, top=top_path1)[0] # first frame\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))\n", + "\n", + "print(\"Second simulation:\")\n", + "traj_path2 = registry.get_mapped_path(traj_id2)\n", + "top_path2 = registry.get_mapped_path(top_id2)\n", + "traj = md.load(traj_path2, top=top_path2)[0] # first frame\n", + "secondary_structure = md.compute_dssp(traj,simplified=True)\n", + "print(\"Number of chains: \",traj.n_chains)\n", + "print(\"Number of residues in sheets: \",len([i for i in secondary_structure if i == 'E']))\n", + "print(\"Number of residues in helices: \",len([i for i in secondary_structure if i == 'H']))\n", + "print(\"Number of residues in coils: \",len([i for i in secondary_structure if i == 'C']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "matches = re.findall(r\"fig0_\\d+\", paths_and_descriptions)\n", + "fig_id1 = matches[0]\n", + "fig_path1 = registry.get_mapped_path(fig_id1)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id1} does not exist.\"\n", + "Image(filename=fig_path1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id2 = matches[1]\n", + "fig_path2 = registry.get_mapped_path(fig_id2)\n", + "assert os.path.exists(fig_path1), f\"This figure {fig_id2} does not exist.\"\n", + "Image(filename=fig_path2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id3 = matches[2]\n", + "fig_path3 = registry.get_mapped_path(fig_id3)\n", + "assert os.path.exists(fig_path3), f\"This figure {fig_id3} does not exist.\"\n", + "Image(filename=fig_path3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig_id4 = matches[3]\n", + "fig_path4 = registry.get_mapped_path(fig_id4)\n", + "assert os.path.exists(fig_path4), f\"This figure {fig_id4} does not exist.\"\n", + "Image(filename=fig_path4)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mdagent", + "language": "python", + "name": "mdagent" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt2.ipynb b/notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt2.ipynb similarity index 100% rename from notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt2.ipynb rename to notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt2.ipynb diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt3.ipynb b/notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt3.ipynb similarity index 100% rename from notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt3.ipynb rename to notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt3.ipynb diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt4.ipynb b/notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt4.ipynb similarity index 100% rename from notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt4.ipynb rename to notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt4.ipynb diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt5.ipynb b/notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt5.ipynb similarity index 100% rename from notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt5.ipynb rename to notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt5.ipynb diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt6.ipynb b/notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt6.ipynb similarity index 100% rename from notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt6.ipynb rename to notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt6.ipynb diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt7.ipynb b/notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt7.ipynb similarity index 100% rename from notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt7.ipynb rename to notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt7.ipynb diff --git a/notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt8.ipynb b/notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt8.ipynb similarity index 100% rename from notebooks/experiments/Robustness/gpt-4o-2024-05-13/prompt8.ipynb rename to notebooks/experiments/Robustness/outdated/gpt-4o-2024-08-06/prompt8.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt1.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt1.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt1.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt1.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt2.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt2.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt2.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt2.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt3.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt3.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt3.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt3.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt4.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt4.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt4.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt4.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt5.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt5.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt5.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt5.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt6.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt6.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt6.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt6.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt7.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt7.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt7.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt7.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt8.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt8.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-405b-instruct/prompt8.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-405b-instruct/prompt8.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt1.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt1.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt1.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt1.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt2.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt2.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt2.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt2.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt3.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt3.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt3.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt3.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt4.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt4.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt4.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt4.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt5.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt5.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt5.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt5.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt6.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt6.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt6.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt6.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt7.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt7.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt7.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt7.ipynb diff --git a/notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt8.ipynb b/notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt8.ipynb similarity index 100% rename from notebooks/experiments/Robustness/llama-v3p1-70b-instruct/prompt8.ipynb rename to notebooks/experiments/Robustness/outdated/llama-v3p1-70b-instruct/prompt8.ipynb