-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
803 additions
and
0 deletions.
There are no files selected for viewing
390 changes: 390 additions & 0 deletions
390
notebooks/experiments/experiment_k2/gpt-4o-2024-05-13/exp_23.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,390 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import datetime\n", | ||
"import os\n", | ||
"from mdagent import MDAgent\n", | ||
"import matplotlib.pyplot as plt" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"date and time: 2024-07-25\n", | ||
"time: 16:14:13\n", | ||
"LLM: gpt-4o-2024-05-13 \n", | ||
"Temperature: 0.1\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"prompt23 = \"Perform a short simulation of protein 1PQ2\"\n", | ||
"llm_var = \"gpt-4o-2024-05-13\"\n", | ||
"tools = \"all\"\n", | ||
"agent = MDAgent(agent_type=\"Structured\", model=llm_var, top_k_tools=tools)\n", | ||
"now = datetime.datetime.now()\n", | ||
"date = now.strftime(\"%Y-%m-%d\")\n", | ||
"print(\"date and time:\",date)\n", | ||
"time = now.strftime(\"%H:%M:%S\")\n", | ||
"print(\"time:\",time)\n", | ||
"print(\"LLM: \",agent.llm.model_name,\"\\nTemperature: \",agent.llm.temperature)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Thought: To perform a short simulation of the protein 1PQ2, I will need to set up the simulation parameters, including the forcefield files, system parameters, integrator parameters, and simulation parameters. I will use the `SetUpandRunFunction` tool to accomplish this.\n", | ||
"\n", | ||
"Action: \n", | ||
"```\n", | ||
"{\n", | ||
" \"action\": \"SetUpandRunFunction\",\n", | ||
" \"action_input\": {\n", | ||
" \"pdb_id\": \"1PQ2\",\n", | ||
" \"forcefield_files\": [],\n", | ||
" \"save\": true,\n", | ||
" \"system_params\": {\n", | ||
" \"nonbondedMethod\": \"NoCutoff\",\n", | ||
" \"nonbondedCutoff\": \"1 * nanometers\",\n", | ||
" \"ewaldErrorTolerance\": null,\n", | ||
" \"constraints\": \"None\",\n", | ||
" \"rigidWater\": false,\n", | ||
" \"constraintTolerance\": null,\n", | ||
" \"solvate\": false\n", | ||
" },\n", | ||
" \"integrator_params\": {\n", | ||
" \"integrator_type\": \"LangevinMiddle\",\n", | ||
" \"Temperature\": \"300 * kelvin\",\n", | ||
" \"Friction\": \"1.0 / picoseconds\",\n", | ||
" \"Timestep\": \"0.002 * picoseconds\",\n", | ||
" \"Pressure\": \"1.0 * bar\"\n", | ||
" },\n", | ||
" \"simulation_params\": {\n", | ||
" \"Ensemble\": \"NVT\",\n", | ||
" \"Number of Steps\": 5000,\n", | ||
" \"record_interval_steps\": 100,\n", | ||
" \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", | ||
" }\n", | ||
" }\n", | ||
"}\n", | ||
"```Setting default forcefields v2\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"2024-07-25 16:14:18.977 \n", | ||
" \u001b[33m\u001b[1mWarning:\u001b[0m to view this Streamlit app on a browser, run it with the following\n", | ||
" command:\n", | ||
"\n", | ||
" streamlit run /scratch/qcampbe2/myenvs/mdagent/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]\n" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"error found: Invalid ewaldErrorTolerance: float() argument must be a string or a real number, not 'NoneType'. If you are using null or None, just dont include it as part of the parameters.\n", | ||
"Invalid constraintTolerance: float() argument must be a string or a real number, not 'NoneType'. If constraintTolerance is null or None, just dont include as part of the parameters.\n", | ||
"\n", | ||
" Correct this and try again. \n", | ||
" Everthing else is fine\n", | ||
"To correct the errors, I will remove the `ewaldErrorTolerance` and `constraintTolerance` parameters from the input since they are not required when set to `null` or `None`.\n", | ||
"\n", | ||
"Action:\n", | ||
"```\n", | ||
"{\n", | ||
" \"action\": \"SetUpandRunFunction\",\n", | ||
" \"action_input\": {\n", | ||
" \"pdb_id\": \"1PQ2\",\n", | ||
" \"forcefield_files\": [],\n", | ||
" \"save\": true,\n", | ||
" \"system_params\": {\n", | ||
" \"nonbondedMethod\": \"NoCutoff\",\n", | ||
" \"nonbondedCutoff\": \"1 * nanometers\",\n", | ||
" \"constraints\": \"None\",\n", | ||
" \"rigidWater\": false,\n", | ||
" \"solvate\": false\n", | ||
" },\n", | ||
" \"integrator_params\": {\n", | ||
" \"integrator_type\": \"LangevinMiddle\",\n", | ||
" \"Temperature\": \"300 * kelvin\",\n", | ||
" \"Friction\": \"1.0 / picoseconds\",\n", | ||
" \"Timestep\": \"0.002 * picoseconds\",\n", | ||
" \"Pressure\": \"1.0 * bar\"\n", | ||
" },\n", | ||
" \"simulation_params\": {\n", | ||
" \"Ensemble\": \"NVT\",\n", | ||
" \"Number of Steps\": 5000,\n", | ||
" \"record_interval_steps\": 100,\n", | ||
" \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", | ||
" }\n", | ||
" }\n", | ||
"}\n", | ||
"```Setting default forcefields v2\n", | ||
"To correct the issue, I need to download the PDB file for the protein 1PQ2 first and then use the file ID in the simulation setup.\n", | ||
"\n", | ||
"Action:\n", | ||
"```\n", | ||
"{\n", | ||
" \"action\": \"PDBFileDownloader\",\n", | ||
" \"action_input\": {\n", | ||
" \"query\": \"1PQ2\"\n", | ||
" }\n", | ||
"}\n", | ||
"```PDB file found with this ID: 1PQ2\n", | ||
"Now that I have the PDB file ID for protein 1PQ2, I will proceed with setting up and running the short simulation using the correct file ID.\n", | ||
"\n", | ||
"Action:\n", | ||
"```\n", | ||
"{\n", | ||
" \"action\": \"SetUpandRunFunction\",\n", | ||
" \"action_input\": {\n", | ||
" \"pdb_id\": \"1PQ2_161425\",\n", | ||
" \"forcefield_files\": [],\n", | ||
" \"save\": true,\n", | ||
" \"system_params\": {\n", | ||
" \"nonbondedMethod\": \"NoCutoff\",\n", | ||
" \"nonbondedCutoff\": \"1 * nanometers\",\n", | ||
" \"constraints\": \"None\",\n", | ||
" \"rigidWater\": false,\n", | ||
" \"solvate\": false\n", | ||
" },\n", | ||
" \"integrator_params\": {\n", | ||
" \"integrator_type\": \"LangevinMiddle\",\n", | ||
" \"Temperature\": \"300 * kelvin\",\n", | ||
" \"Friction\": \"1.0 / picoseconds\",\n", | ||
" \"Timestep\": \"0.002 * picoseconds\",\n", | ||
" \"Pressure\": \"1.0 * bar\"\n", | ||
" },\n", | ||
" \"simulation_params\": {\n", | ||
" \"Ensemble\": \"NVT\",\n", | ||
" \"Number of Steps\": 5000,\n", | ||
" \"record_interval_steps\": 100,\n", | ||
" \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", | ||
" }\n", | ||
" }\n", | ||
"}\n", | ||
"```Setting default forcefields v2\n", | ||
"Building system...\n", | ||
"About to create system...\n", | ||
"Attempts at creating system: 0/3\n", | ||
"adding system without solvent\n", | ||
"Trying to add component to Forcefield...\n", | ||
"Residue code: LYS\n", | ||
"Residue code not in solvent list. Adding forcefield not supported.\n", | ||
"To address the issue with the missing template for residue 1 (LYS), I will use the `CleaningToolFunction` to add missing atoms and hydrogens to the PDB file. This should resolve the issue with the missing atoms.\n", | ||
"\n", | ||
"Action:\n", | ||
"```\n", | ||
"{\n", | ||
" \"action\": \"CleaningToolFunction\",\n", | ||
" \"action_input\": {\n", | ||
" \"pdb_id\": \"1PQ2_161425\",\n", | ||
" \"replace_nonstandard_residues\": true,\n", | ||
" \"add_missing_atoms\": true,\n", | ||
" \"remove_heterogens\": true,\n", | ||
" \"remove_water\": true,\n", | ||
" \"add_hydrogens\": true,\n", | ||
" \"add_hydrogens_ph\": 7.0\n", | ||
" }\n", | ||
"}\n", | ||
"```\n", | ||
"file path: /gpfs/fs2/scratch/qcampbe2/repos/md-agent/ckpt/ckpt_67/pdb/1PQ2_raw_20240725_161425.pdb\n", | ||
"Now that the PDB file has been cleaned and the missing atoms and hydrogens have been added, I will proceed with setting up and running the short simulation using the cleaned file ID.\n", | ||
"\n", | ||
"Action:\n", | ||
"```\n", | ||
"{\n", | ||
" \"action\": \"SetUpandRunFunction\",\n", | ||
" \"action_input\": {\n", | ||
" \"pdb_id\": \"1PQ2_161544\",\n", | ||
" \"forcefield_files\": [],\n", | ||
" \"save\": true,\n", | ||
" \"system_params\": {\n", | ||
" \"nonbondedMethod\": \"NoCutoff\",\n", | ||
" \"nonbondedCutoff\": \"1 * nanometers\",\n", | ||
" \"constraints\": \"None\",\n", | ||
" \"rigidWater\": false,\n", | ||
" \"solvate\": false\n", | ||
" },\n", | ||
" \"integrator_params\": {\n", | ||
" \"integrator_type\": \"LangevinMiddle\",\n", | ||
" \"Temperature\": \"300 * kelvin\",\n", | ||
" \"Friction\": \"1.0 / picoseconds\",\n", | ||
" \"Timestep\": \"0.002 * picoseconds\",\n", | ||
" \"Pressure\": \"1.0 * bar\"\n", | ||
" },\n", | ||
" \"simulation_params\": {\n", | ||
" \"Ensemble\": \"NVT\",\n", | ||
" \"Number of Steps\": 5000,\n", | ||
" \"record_interval_steps\": 100,\n", | ||
" \"record_params\": [\"step\", \"potentialEnergy\", \"temperature\"]\n", | ||
" }\n", | ||
" }\n", | ||
"}\n", | ||
"```\n", | ||
"Setting default forcefields v2\n", | ||
"Building system...\n", | ||
"About to create system...\n", | ||
"Attempts at creating system: 0/3\n", | ||
"adding system without solvent\n", | ||
"System built successfully\n", | ||
"<openmm.openmm.System; proxy of <Swig Object of type 'OpenMM::System *' at 0x7f66b2426130> >\n", | ||
"Setting up integrator...\n", | ||
"Creating simulation...\n", | ||
"simulation set!\n", | ||
"Performing energy minimization...\n", | ||
"Minimization complete!\n", | ||
"Initial Positions saved to initial_positions.pdb\n", | ||
"Equilibrating...\n", | ||
"Simulating...\n", | ||
"Done!\n", | ||
"Standalone simulation script written to /gpfs/fs2/scratch/qcampbe2/repos/md-agent/ckpt/ckpt_67/simulations/NVT_1PQ2_161544_20240725_161549.py\n", | ||
"['exp_10.ipynb', 'TRAJ_sim0_155435_2YXF_155429_20240725_155437.dcd', 'exp_13.ipynb', 'exp_22.ipynb', 'exp_8.ipynb', 'TRAJ_sim0_161523_1A3N_161517_20240725_161529.dcd', 'exp_3.ipynb', 'exp_1.ipynb', 'TRAJ_sim0_161549_1PQ2_161544_20240725_161551.dcd', '.ipynb_checkpoints', 'exp_4.ipynb', 'LOG_sim0_161549_1PQ2_161544_20240725_161551.txt', 'packmol.inp', 'exp_24.ipynb', 'exp_12.ipynb', 'exp_19.ipynb', 'TOP_sim0_155435_2YXF_155429_20240725_155437.pdb', 'exp_9.ipynb', 'exp_5.ipynb', 'exp_7.ipynb', 'exp_2.ipynb', 'exp_18.ipynb', 'exp_14.ipynb', 'exp_6.ipynb', 'exp_20.ipynb', 'exp_23.ipynb', 'exp_11.ipynb', 'exp_21.ipynb', 'TOP_sim0_161549_1PQ2_161544_20240725_161551.pdb', 'exp_25.ipynb', 'LOG_sim0_161523_1A3N_161517_20240725_161529.txt', 'LOG_sim0_155435_2YXF_155429_20240725_155437.txt', 'TOP_sim0_161523_1A3N_161517_20240725_161529.pdb']\n", | ||
"Final Answer: The short simulation of protein 1PQ2 has been successfully completed. Here are the details:\n", | ||
"\n", | ||
"- **Simulation trajectory file ID**: rec0_161551\n", | ||
"- **Simulation state log file ID**: rec1_161551\n", | ||
"- **Simulation PDB frames file ID**: rec2_161551\n", | ||
"- **Standalone script file ID**: sim0_161549\n", | ||
"- **Initial topology file ID**: top_sim0_161549\n", | ||
"\n", | ||
"These files contain the results of the simulation and can be used for further analysis or reproduction of the simulation.The agent's attempts to solve the problem involved conducting a short simulation of protein 1PQ2. The process included the following steps:\n", | ||
"\n", | ||
"1. Obtaining the standalone script file ID: sim0_161549\n", | ||
"2. Obtaining the initial topology file ID: top_sim0_161549\n", | ||
"3. Conducting the simulation and generating the simulation trajectory file ID: rec0_161551\n", | ||
"4. Generating the simulation state log file ID: rec1_161551\n", | ||
"5. Generating the simulation PDB frames file ID: rec2_161551\n", | ||
"\n", | ||
"After completing these steps, the agent successfully completed the simulation of protein 1PQ2. The final solution was the successful completion of the simulation, with the results stored in the mentioned files for further analysis or reproduction of the simulation.Your run id is: YV3TGHOP\n" | ||
] | ||
}, | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"('Final Answer: The short simulation of protein 1PQ2 has been successfully completed. Here are the details:\\n\\n- **Simulation trajectory file ID**: rec0_161551\\n- **Simulation state log file ID**: rec1_161551\\n- **Simulation PDB frames file ID**: rec2_161551\\n- **Standalone script file ID**: sim0_161549\\n- **Initial topology file ID**: top_sim0_161549\\n\\nThese files contain the results of the simulation and can be used for further analysis or reproduction of the simulation.',\n", | ||
" 'YV3TGHOP')" | ||
] | ||
}, | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"agent.run(prompt23)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"date and time: 2024-07-25\n", | ||
"time: 16:19:18\n", | ||
"Files found in registry: 1PQ2_161425: PDB file downloaded from RSCB\n", | ||
" PDBFile ID: 1PQ2_161425\n", | ||
" 1PQ2_161544: Cleaned File: Removed Heterogens\n", | ||
" and Water Removed. Replaced Nonstandard Residues. Added Hydrogens at pH 7.0. Missing Atoms Added and replaces nonstandard residues. \n", | ||
" top_sim0_161549: Initial positions for simulation sim0_161549\n", | ||
" sim0_161549: Basic Simulation of Protein 1PQ2_161544\n", | ||
" rec0_161551: Simulation trajectory for protein 1PQ2_161544 and simulation sim0_161549\n", | ||
" rec1_161551: Simulation state log for protein 1PQ2_161544 and simulation sim0_161549\n", | ||
" rec2_161551: Simulation pdb frames for protein 1PQ2_161544 and simulation sim0_161549\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"now = datetime.datetime.now()\n", | ||
"date = now.strftime(\"%Y-%m-%d\")\n", | ||
"print(\"date and time:\",date)\n", | ||
"time = now.strftime(\"%H:%M:%S\")\n", | ||
"print(\"time:\",time)\n", | ||
"registry = agent.path_registry\n", | ||
"paths_and_descriptions = registry.list_path_names_and_descriptions()\n", | ||
"print(\"\\n\".join(paths_and_descriptions.split(\",\")))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Simulation with :15234 for 60 frames. Total time: 0.11800000000000001 ps\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"trajectory_path = registry.get_mapped_path(\"rec0_161551\")\n", | ||
"topology_path = registry.get_mapped_path(\"top_sim0_161549\")\n", | ||
"assert os.path.exists(trajectory_path), \"Trajectory file not found\"\n", | ||
"assert os.path.exists(topology_path), \"Topology file not found\"\n", | ||
"\n", | ||
"import mdtraj as md\n", | ||
"traj = md.load(trajectory_path, top=topology_path)\n", | ||
"print(f\"Simulation with :{traj.n_atoms} for {traj.n_frames} frames. Total time: {traj.time[-1]*0.002} ps\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Experiment Result:\n", | ||
"### Completed without Exception or TimeOut Errors ✅ \n", | ||
"### Attempted all necessary steps ✅ \n", | ||
"### Completed without Hallucination ✅ \n", | ||
"### Logic make sense ✅ \n", | ||
"### Correct Answer ✅ " | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "mdagent", | ||
"language": "python", | ||
"name": "mdagent" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
Oops, something went wrong.