Skip to content

Commit

Permalink
updated exps for llama models (more steps & PQA2)
Browse files Browse the repository at this point in the history
  • Loading branch information
qcampbel committed Oct 8, 2024
1 parent d46b514 commit 28e114e
Show file tree
Hide file tree
Showing 14 changed files with 41,950 additions and 4,946 deletions.
3,308 changes: 3,058 additions & 250 deletions notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_12.ipynb

Large diffs are not rendered by default.

2,854 changes: 2,531 additions & 323 deletions notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_14.ipynb

Large diffs are not rendered by default.

3,059 changes: 2,859 additions & 200 deletions notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_15.ipynb

Large diffs are not rendered by default.

1,781 changes: 1,672 additions & 109 deletions notebooks/experiments/experiment_k1/llama-v3p1-405b-instruct/exp_18.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
"source": [
"import datetime\n",
"import os\n",
"from mdagent import MDAgent\n",
"import matplotlib.pyplot as plt"
"from mdagent import MDAgent"
]
},
{
Expand All @@ -21,15 +20,15 @@
"name": "stdout",
"output_type": "stream",
"text": [
"date and time: 2024-08-19\n",
"time: 13:54:32\n",
"date and time: 2024-09-28\n",
"time: 11:34:52\n",
"LLM: accounts/fireworks/models/llama-v3p1-405b-instruct \n",
"Temperature: 0.1\n"
]
}
],
"source": [
"prompt3 = \"Download the PDB file for protein 1GZX. Then, analyze the secondary structure of the protein and provide information on how many helices, sheets, and other components are present.\"\n",
"prompt3 = \"Download the PDB file for protein 1GZX. Then, analyze the secondary structure of the protein and provide information on how many helices, sheets, and other components are present. Get the gene names for this protein.\"\n",
"llm_var = \"accounts/fireworks/models/llama-v3p1-405b-instruct\"\n",
"tools = \"all\"\n",
"agent = MDAgent(agent_type=\"Structured\", model=llm_var, top_k_tools=tools)\n",
Expand All @@ -50,7 +49,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Thought: First, I need to download the PDB file for protein 1GZX. Then, I can analyze the secondary structure of the protein.\n",
"Thought: First, I need to download the PDB file for protein 1GZX. Then, I can analyze the secondary structure of the protein and provide information on how many helices, sheets, and other components are present. Finally, I will get the gene names for this protein.\n",
"\n",
"Action:\n",
"```\n",
Expand All @@ -59,49 +58,51 @@
" \"action_input\": \"1GZX\"\n",
"}\n",
"```\n",
"\n",
"PDB file found with this ID: 1GZX\n",
"Now that I have the PDB file, I can analyze the secondary structure of the protein.\n",
"Now that I have the PDB file, I can analyze the secondary structure of the protein and provide information on how many helices, sheets, and other components are present.\n",
"\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"ComputeDSSP\",\n",
" \"action_input\": {\n",
" \"traj_file\": \"1GZX_135435\",\n",
" \"top_file\": \"1GZX_135435\",\n",
" \"traj_file\": \"1GZX_113456\",\n",
" \"top_file\": \"1GZX_113456\",\n",
" \"target_frames\": \"all\"\n",
" }\n",
"}\n",
"```Thought: Now that I have the secondary structure analysis, I can provide information on how many helices, sheets, and other components are present.\n",
"```\n",
"\n",
"(I will wait for the result of this action before proceeding)Now that I have the secondary structure analysis, I can get the gene names for this protein.\n",
"\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"GetTurnsBetaSheetsHelices\",\n",
" \"action\": \"GetGeneNames\",\n",
" \"action_input\": {\n",
" \"query\": \"1GZX\",\n",
" \"primary_accession\": \"\"\n",
" }\n",
"}\n",
"```\n",
"Thought: Now that I have the secondary structure analysis and the information on helices, sheets, and other components, I can provide a final answer.\n",
"\n",
"(I will wait for the result of this action before proceeding)I have all the necessary information to answer the question. I will now compile the results into a single response.\n",
"\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"Final Answer\",\n",
" \"action_input\": \"The PDB file for protein 1GZX has been downloaded. The secondary structure analysis shows that there are 444 residues in helix, 0 residues in strand, 130 residues in coil, and 213 residues not assigned or not a protein residue. The protein has 11 helices, 0 beta sheets, and 1 turn.\"\n",
" \"action_input\": \"The PDB file for protein 1GZX has been downloaded. The secondary structure analysis shows that there are 444 residues in helix, 0 residues in strand, 130 residues in coil, and 213 residues not assigned or not a protein residue. The gene names associated with this protein are HBB, HBA1, and HBA2.\"\n",
"}\n",
"```"
]
},
{
"data": {
"text/plain": [
"({'input': '\\n You are an expert molecular dynamics scientist, and\\n your task is to respond to the question or\\n solve the problem to the best of your ability using\\n the provided tools.\\n\\n You can only respond with a single complete\\n \\'Thought, Action, Action Input\\' format\\n OR a single \\'Final Answer\\' format.\\n\\n Complete format:\\n Thought: (reflect on your progress and decide what to do next)\\n Action:\\n ```\\n {\\n \"action\": (the action name, it should be the name of a tool),\\n \"action_input\": (the input string for the action)\\n }\\n \\'\\'\\'\\n\\n OR\\n\\n Final Answer: (the final response to the original input\\n question, once all steps are complete)\\n\\n You are required to use the tools provided,\\n using the most specific tool\\n available for each action.\\n Your final answer should contain all information\\n necessary to answer the question and its subquestions.\\n Before you finish, reflect on your progress and make\\n sure you have addressed the question in its entirety.\\n\\n If you are asked to continue\\n or reference previous runs,\\n the context will be provided to you.\\n If context is provided, you should assume\\n you are continuing a chat.\\n\\n Here is the input:\\n Previous Context: None\\n Question: Download the PDB file for protein 1GZX. Then, analyze the secondary structure of the protein and provide information on how many helices, sheets, and other components are present. ',\n",
" 'output': 'The PDB file for protein 1GZX has been downloaded. The secondary structure analysis shows that there are 444 residues in helix, 0 residues in strand, 130 residues in coil, and 213 residues not assigned or not a protein residue. The protein has 11 helices, 0 beta sheets, and 1 turn.'},\n",
" 'DC2FTA1W')"
"({'input': '\\n You are an expert molecular dynamics scientist, and\\n your task is to respond to the question or\\n solve the problem to the best of your ability using\\n the provided tools.\\n\\n You can only respond with a single complete\\n \\'Thought, Action, Action Input\\' format\\n OR a single \\'Final Answer\\' format.\\n\\n Complete format:\\n Thought: (reflect on your progress and decide what to do next)\\n Action:\\n ```\\n {\\n \"action\": (the action name, it should be the name of a tool),\\n \"action_input\": (the input string for the action)\\n }\\n \\'\\'\\'\\n\\n OR\\n\\n Final Answer: (the final response to the original input\\n question, once all steps are complete)\\n\\n You are required to use the tools provided,\\n using the most specific tool\\n available for each action.\\n Your final answer should contain all information\\n necessary to answer the question and its subquestions.\\n Before you finish, reflect on your progress and make\\n sure you have addressed the question in its entirety.\\n\\n If you are asked to continue\\n or reference previous runs,\\n the context will be provided to you.\\n If context is provided, you should assume\\n you are continuing a chat.\\n\\n Here is the input:\\n Previous Context: None\\n Question: Download the PDB file for protein 1GZX. Then, analyze the secondary structure of the protein and provide information on how many helices, sheets, and other components are present. Get the gene names for this protein. ',\n",
" 'output': 'The PDB file for protein 1GZX has been downloaded. The secondary structure analysis shows that there are 444 residues in helix, 0 residues in strand, 130 residues in coil, and 213 residues not assigned or not a protein residue. The gene names associated with this protein are HBB, HBA1, and HBA2.'},\n",
" 'KWTR7LC2')"
]
},
"execution_count": 3,
Expand All @@ -122,8 +123,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"date and time: 2024-08-19\n",
"time: 13:54:42\n"
"date and time: 2024-09-28\n",
"time: 11:35:02\n"
]
}
],
Expand All @@ -137,28 +138,30 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Files found in registry: 1GZX_135435: PDB file downloaded from RSCB\n",
" PDBFile ID: 1GZX_135435\n",
" rec0_135436: dssp values for trajectory with id: 1GZX_135435\n"
"ckpt: ckpt_12\n",
"Files found in registry: 1GZX_113456: PDB file downloaded from RSCB\n",
" PDBFile ID: 1GZX_113456\n",
" rec0_113459: dssp values for trajectory with id: 1GZX_113456\n"
]
}
],
"source": [
"registry = agent.path_registry\n",
"print('ckpt:',os.path.basename(registry.ckpt_dir))\n",
"paths_and_descriptions = registry.list_path_names_and_descriptions()\n",
"print(\"\\n\".join(paths_and_descriptions.split(\",\")))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand All @@ -173,7 +176,7 @@
],
"source": [
"import mdtraj as md\n",
"path = registry.get_mapped_path(\"1GZX_135435\")\n",
"path = registry.get_mapped_path(\"1GZX_113456\")\n",
"traj = md.load(path)\n",
"top = traj.topology\n",
"\n",
Expand All @@ -182,18 +185,6 @@
"print(\"Number of residues in helices: \",len([i for i in secondary_structure[0] if i == 'H']))\n",
"print(\"Number of residues in coils: \",len([i for i in secondary_structure[0] if i == 'C']))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Experiment Result:\n",
"### Completed without Exception or TimeOut Errors ✅\n",
"### Attempted all necessary steps ✅\n",
"### Completed without Hallucination ✅\n",
"### Logic make sense ✅\n",
"### Correct Answer ✅"
]
}
],
"metadata": {
Expand Down
Loading

0 comments on commit 28e114e

Please sign in to comment.