From e1f1c484bfa50f46c94b6bdf6b63ffbb5e27570b Mon Sep 17 00:00:00 2001
From: Samantha Cox <samantha.wright822@gmail.com>
Date: Mon, 5 Aug 2024 23:07:33 -0700
Subject: [PATCH] claude 3 opus exp 2 and 3

---
 .../experiment_k1/claude_3_opus/exp_2.ipynb   | 176 ++++++++++
 .../experiment_k1/claude_3_opus/exp_3.ipynb   | 304 ++++++++++++++++++
 2 files changed, 480 insertions(+)
 create mode 100644 notebooks/experiments/experiment_k1/claude_3_opus/exp_2.ipynb
 create mode 100644 notebooks/experiments/experiment_k1/claude_3_opus/exp_3.ipynb

diff --git a/notebooks/experiments/experiment_k1/claude_3_opus/exp_2.ipynb b/notebooks/experiments/experiment_k1/claude_3_opus/exp_2.ipynb
new file mode 100644
index 00000000..51d5b707
--- /dev/null
+++ b/notebooks/experiments/experiment_k1/claude_3_opus/exp_2.ipynb
@@ -0,0 +1,176 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from mdagent import MDAgent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prompt2 = \"Download the pdb file for PDB ID 1LYZ.\"\n",
+    "agent = MDAgent(agent_type=\"Structured\", model=\"claude-3-opus-20240229\", top_k_tools=\"all\", use_memory=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Num of tokens: 351\n",
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Thought: To download the PDB file for 1LYZ, I can use the PDBFileDownloader tool.\n",
+      "\n",
+      "Action:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"PDBFileDownloader\",\n",
+      "  \"action_input\": \"1LYZ\"\n",
+      "}\n",
+      "```\n",
+      "\n",
+      "\u001b[32;1m\u001b[1;3mThought: To download the PDB file for 1LYZ, I can use the PDBFileDownloader tool.\n",
+      "\n",
+      "Action:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"PDBFileDownloader\",\n",
+      "  \"action_input\": \"1LYZ\"\n",
+      "}\n",
+      "```\n",
+      "\n",
+      "\u001b[0mPDB file found with this ID: 1LYZ\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-08-05 22:48:30.873 \n",
+      "  \u001b[33m\u001b[1mWarning:\u001b[0m to view this Streamlit app on a browser, run it with the following\n",
+      "  command:\n",
+      "\n",
+      "    streamlit run /opt/anaconda3/envs/mda_jun15/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Observation: \u001b[38;5;200m\u001b[1;3mSucceeded. Downloaded the PDB file:1LYZ_224831\u001b[0m\n",
+      "Thought:I have successfully downloaded the PDB file for 1LYZ using the PDBFileDownloader tool. No further actions are needed.\n",
+      "\n",
+      "Action:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"Final Answer\",\n",
+      "  \"action_input\": \"I have downloaded the PDB file for the protein with PDB ID 1LYZ. The file has been saved with the ID 1LYZ_224831.\"\n",
+      "}\n",
+      "```\u001b[32;1m\u001b[1;3mI have successfully downloaded the PDB file for 1LYZ using the PDBFileDownloader tool. No further actions are needed.\n",
+      "\n",
+      "Action:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"Final Answer\",\n",
+      "  \"action_input\": \"I have downloaded the PDB file for the protein with PDB ID 1LYZ. The file has been saved with the ID 1LYZ_224831.\"\n",
+      "}\n",
+      "```\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "({'input': '\\n    You are an expert molecular dynamics scientist and\\n    your task is to respond to the question or\\n    solve the problem to the best of your ability using\\n    the provided tools.\\n\\n    You can only respond with a single complete\\n    Thought, Action, Action Input\\' format\\n    OR a single \\'Final Answer\\' format.\\n\\n    Complete format:\\n    Thought: (reflect on your progress and decide what \" \"to do next)\\n    Action: (the action name, should be the name of a tool)\\n    Action Input: (the input string to the action)\\n\\n    OR\\n\\n    Final Answer: (the final response to the original input\\n    question, when all steps are complete)\\n\\n    You are required to use the tools provided,\\n    using the most specific tool\\n    available for each action.\\n    Your final answer should contain all information\\n    necessary to answer the question and subquestions.\\n    Before you finish, reflect on your progress and make\\n    sure you have addressed the question in its entirety.\\n\\n    If you are asked to continue\\n    or reference previous runs,\\n    the context will be provided to you.\\n    If context is provided, you should assume\\n    you are continuing a chat.\\n\\n    Here is the input:\\n    Previous Context: None\\n    Question: Download the pdb file for PDB ID 1LYZ. ',\n",
+       "  'output': 'I have downloaded the PDB file for the protein with PDB ID 1LYZ. The file has been saved with the ID 1LYZ_224831.'},\n",
+       " '4NMLS7MI')"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "agent.run(prompt2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Files found in registry: 1LYZ_224831: PDB file downloaded from RSCB, PDBFile ID: 1LYZ_224831\n"
+     ]
+    }
+   ],
+   "source": [
+    "registry = agent.path_registry\n",
+    "print(registry.list_path_names_and_descriptions())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "mdagent2",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/experiments/experiment_k1/claude_3_opus/exp_3.ipynb b/notebooks/experiments/experiment_k1/claude_3_opus/exp_3.ipynb
new file mode 100644
index 00000000..1b6f4ae6
--- /dev/null
+++ b/notebooks/experiments/experiment_k1/claude_3_opus/exp_3.ipynb
@@ -0,0 +1,304 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from mdagent import MDAgent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prompt3 = \"Download the PDB file for PDB ID 1GZX. Then, analyze the secondary structure of the protein and tell me how many chains, sheets, etc. there are.\"\n",
+    "agent = MDAgent(agent_type=\"Structured\", model=\"claude-3-opus-20240229\", top_k_tools=\"all\", use_memory=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Num of tokens: 374\n",
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Thought: To answer this question, I will first need to download the PDB file for the protein with PDB ID 1GZX. Then I can analyze the secondary structure of the protein to determine the number of chains, sheets, helices, and other structural elements.\n",
+      "\n",
+      "Action:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"PDBFileDownloader\",\n",
+      "  \"action_input\": \"1GZX\"\n",
+      "}\n",
+      "```\n",
+      "\n",
+      "\u001b[32;1m\u001b[1;3mThought: To answer this question, I will first need to download the PDB file for the protein with PDB ID 1GZX. Then I can analyze the secondary structure of the protein to determine the number of chains, sheets, helices, and other structural elements.\n",
+      "\n",
+      "Action:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"PDBFileDownloader\",\n",
+      "  \"action_input\": \"1GZX\"\n",
+      "}\n",
+      "```\n",
+      "\n",
+      "\u001b[0mPDB file found with this ID: 1GZX\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-08-05 22:50:15.108 \n",
+      "  \u001b[33m\u001b[1mWarning:\u001b[0m to view this Streamlit app on a browser, run it with the following\n",
+      "  command:\n",
+      "\n",
+      "    streamlit run /opt/anaconda3/envs/mda_jun15/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Observation: \u001b[38;5;200m\u001b[1;3mSucceeded. Downloaded the PDB file:1GZX_225015\u001b[0m\n",
+      "Thought:Thought: Now that I have downloaded the PDB file for 1GZX, I can analyze the secondary structure to determine the number of chains, sheets, helices, and turns.\n",
+      "\n",
+      "Action:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"GetTurnsBetaSheetsHelices\",\n",
+      "  \"action_input\": {\n",
+      "    \"query\": \"1GZX\",\n",
+      "    \"primary_accession\": \"P0A7B8\"\n",
+      "  }\n",
+      "}\n",
+      "```\n",
+      "\n",
+      "\u001b[32;1m\u001b[1;3mThought: Now that I have downloaded the PDB file for 1GZX, I can analyze the secondary structure to determine the number of chains, sheets, helices, and turns.\n",
+      "\n",
+      "Action:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"GetTurnsBetaSheetsHelices\",\n",
+      "  \"action_input\": {\n",
+      "    \"query\": \"1GZX\",\n",
+      "    \"primary_accession\": \"P0A7B8\"\n",
+      "  }\n",
+      "}\n",
+      "```\n",
+      "\n",
+      "\u001b[0mThe primary accession number provided does not match any entry in the data, using the first entry instead.\n",
+      "The primary accession number provided does not match any entry in the data, using the first entry instead.\n",
+      "The primary accession number provided does not match any entry in the data, using the first entry instead.\n",
+      "\n",
+      "Observation: \u001b[38;5;200m\u001b[1;3mTurns: [{'start': 78, 'start_modifier': 'EXACT', 'end': 80, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '1IRD'}]}]\n",
+      "Beta sheets: []\n",
+      "Helices: [{'start': 6, 'start_modifier': 'EXACT', 'end': 17, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 21, 'start_modifier': 'EXACT', 'end': 35, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 37, 'start_modifier': 'EXACT', 'end': 42, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 44, 'start_modifier': 'EXACT', 'end': 46, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '1IRD'}]}, {'start': 52, 'start_modifier': 'EXACT', 'end': 57, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 59, 'start_modifier': 'EXACT', 'end': 75, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 82, 'start_modifier': 'EXACT', 'end': 95, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 102, 'start_modifier': 'EXACT', 'end': 119, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 120, 'start_modifier': 'EXACT', 'end': 122, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 125, 'start_modifier': 'EXACT', 'end': 143, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 144, 'start_modifier': 'EXACT', 'end': 146, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '1IRD'}]}]\u001b[0m\n",
+      "Thought:Thought: The UniProt API provided useful information about the secondary structure elements, but to fully answer the question I still need to determine the number of chains in the PDB structure. I can use a tool to summarize the key structural properties of the PDB file.\n",
+      "\n",
+      "Action:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"SummarizeProteinStructure\",\n",
+      "  \"action_input\": {\n",
+      "    \"traj_file\": \"1GZX_225015\",\n",
+      "    \"top_file\": null\n",
+      "  }\n",
+      "}\n",
+      "```\n",
+      "\n",
+      "\u001b[32;1m\u001b[1;3mThought: The UniProt API provided useful information about the secondary structure elements, but to fully answer the question I still need to determine the number of chains in the PDB structure. I can use a tool to summarize the key structural properties of the PDB file.\n",
+      "\n",
+      "Action:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"SummarizeProteinStructure\",\n",
+      "  \"action_input\": {\n",
+      "    \"traj_file\": \"1GZX_225015\",\n",
+      "    \"top_file\": null\n",
+      "  }\n",
+      "}\n",
+      "```\n",
+      "\n",
+      "\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3m{'n_atoms': 4769, 'n_residues': 787, 'n_chains': 12, 'n_frames': 1, 'n_bonds': 4716}\u001b[0m\n",
+      "Thought:Based on the analysis, here is a summary of the secondary structure for the protein with PDB ID 1GZX:\n",
+      "\n",
+      "The PDB structure contains 12 chains total. \n",
+      "\n",
+      "In terms of secondary structure elements, there are:\n",
+      "- 1 turn, spanning residues 78-80\n",
+      "- 0 beta sheets \n",
+      "- 11 helices, located at residues:\n",
+      "    6-17, 21-35, 37-42, 44-46, 52-57, 59-75, 82-95, 102-119, 120-122, 125-143, 144-146\n",
+      "\n",
+      "Overall, the 1GZX structure is composed of 4769 atoms, 787 residues, and 4716 bonds. The helical regions make up the majority of the secondary structure, with only one short turn and no beta sheets present.\n",
+      "\n",
+      "Let me know if you need any other details about the 1GZX protein structure!\u001b[32;1m\u001b[1;3mBased on the analysis, here is a summary of the secondary structure for the protein with PDB ID 1GZX:\n",
+      "\n",
+      "The PDB structure contains 12 chains total. \n",
+      "\n",
+      "In terms of secondary structure elements, there are:\n",
+      "- 1 turn, spanning residues 78-80\n",
+      "- 0 beta sheets \n",
+      "- 11 helices, located at residues:\n",
+      "    6-17, 21-35, 37-42, 44-46, 52-57, 59-75, 82-95, 102-119, 120-122, 125-143, 144-146\n",
+      "\n",
+      "Overall, the 1GZX structure is composed of 4769 atoms, 787 residues, and 4716 bonds. The helical regions make up the majority of the secondary structure, with only one short turn and no beta sheets present.\n",
+      "\n",
+      "Let me know if you need any other details about the 1GZX protein structure!\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "({'input': '\\n    You are an expert molecular dynamics scientist and\\n    your task is to respond to the question or\\n    solve the problem to the best of your ability using\\n    the provided tools.\\n\\n    You can only respond with a single complete\\n    Thought, Action, Action Input\\' format\\n    OR a single \\'Final Answer\\' format.\\n\\n    Complete format:\\n    Thought: (reflect on your progress and decide what \" \"to do next)\\n    Action: (the action name, should be the name of a tool)\\n    Action Input: (the input string to the action)\\n\\n    OR\\n\\n    Final Answer: (the final response to the original input\\n    question, when all steps are complete)\\n\\n    You are required to use the tools provided,\\n    using the most specific tool\\n    available for each action.\\n    Your final answer should contain all information\\n    necessary to answer the question and subquestions.\\n    Before you finish, reflect on your progress and make\\n    sure you have addressed the question in its entirety.\\n\\n    If you are asked to continue\\n    or reference previous runs,\\n    the context will be provided to you.\\n    If context is provided, you should assume\\n    you are continuing a chat.\\n\\n    Here is the input:\\n    Previous Context: None\\n    Question: Download the PDB file for PDB ID 1GZX. Then, analyze the secondary structure of the protein and tell me how many chains, sheets, etc. there are. ',\n",
+       "  'output': 'Based on the analysis, here is a summary of the secondary structure for the protein with PDB ID 1GZX:\\n\\nThe PDB structure contains 12 chains total. \\n\\nIn terms of secondary structure elements, there are:\\n- 1 turn, spanning residues 78-80\\n- 0 beta sheets \\n- 11 helices, located at residues:\\n    6-17, 21-35, 37-42, 44-46, 52-57, 59-75, 82-95, 102-119, 120-122, 125-143, 144-146\\n\\nOverall, the 1GZX structure is composed of 4769 atoms, 787 residues, and 4716 bonds. The helical regions make up the majority of the secondary structure, with only one short turn and no beta sheets present.\\n\\nLet me know if you need any other details about the 1GZX protein structure!'},\n",
+       " 'OT71PIM7')"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "agent.run(prompt3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Files found in registry: 1GZX_225015: PDB file downloaded from RSCB, PDBFile ID: 1GZX_225015\n"
+     ]
+    }
+   ],
+   "source": [
+    "registry = agent.path_registry\n",
+    "print(registry.list_path_names_and_descriptions())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of chains:  12\n",
+      "Number of sheets:  0\n",
+      "Number of helices:  444\n",
+      "Number of coils:  130\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mdtraj as md\n",
+    "path = registry.get_mapped_path(\"1GZX_225015\")\n",
+    "traj = md.load(path)\n",
+    "top = traj.topology\n",
+    "number_of_chains = top.n_chains\n",
+    "secondary_structure = md.compute_dssp(traj,simplified=True)\n",
+    "print(\"Number of chains: \",number_of_chains)\n",
+    "print(\"Number of sheets: \",len([i for i in secondary_structure[0] if i == 'E']))\n",
+    "print(\"Number of helices: \",len([i for i in secondary_structure[0] if i == 'H']))\n",
+    "print(\"Number of coils: \",len([i for i in secondary_structure[0] if i == 'C']))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/anaconda3/envs/mda_jun15/lib/python3.12/site-packages/langchain_core/_api/deprecation.py:139: LangChainDeprecationWarning: The method `BaseTool.__call__` was deprecated in langchain-core 0.1.47 and will be removed in 0.3.0. Use invoke instead.\n",
+      "  warn_deprecated(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Turns: [{'start': 78, 'start_modifier': 'EXACT', 'end': 80, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '1IRD'}]}]\n",
+      "Beta sheets: []\n",
+      "Helices: [{'start': 6, 'start_modifier': 'EXACT', 'end': 17, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 21, 'start_modifier': 'EXACT', 'end': 35, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 37, 'start_modifier': 'EXACT', 'end': 42, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 44, 'start_modifier': 'EXACT', 'end': 46, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '1IRD'}]}, {'start': 52, 'start_modifier': 'EXACT', 'end': 57, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 59, 'start_modifier': 'EXACT', 'end': 75, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 82, 'start_modifier': 'EXACT', 'end': 95, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 102, 'start_modifier': 'EXACT', 'end': 119, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 120, 'start_modifier': 'EXACT', 'end': 122, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 125, 'start_modifier': 'EXACT', 'end': 143, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '2W72'}]}, {'start': 144, 'start_modifier': 'EXACT', 'end': 146, 'end_modifier': 'EXACT', 'evidences': [{'evidenceCode': 'ECO:0007829', 'source': 'PDB', 'id': '1IRD'}]}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from mdagent.tools.base_tools.preprocess_tools.uniprot import GetTurnsBetaSheetsHelices\n",
+    "\n",
+    "analysis = GetTurnsBetaSheetsHelices()\n",
+    "print (analysis(\"1GZX\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "mdagent2",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}