experiments for llm writing python codes

ur-whitelab · Oct 28, 2024 · a58b7ef · a58b7ef
1 parent a9404bf
commit a58b7ef
Show file tree

Hide file tree

Showing 52 changed files with 657,471 additions and 0 deletions.
diff --git a/notebooks/experiments/python_code/llm_only/exp_1.ipynb b/notebooks/experiments/python_code/llm_only/exp_1.ipynb
diff --git a/notebooks/experiments/python_code/llm_only/exp_10.ipynb b/notebooks/experiments/python_code/llm_only/exp_10.ipynb
diff --git a/notebooks/experiments/python_code/llm_only/exp_11.ipynb b/notebooks/experiments/python_code/llm_only/exp_11.ipynb
diff --git a/notebooks/experiments/python_code/llm_only/exp_12.ipynb b/notebooks/experiments/python_code/llm_only/exp_12.ipynb
diff --git a/notebooks/experiments/python_code/llm_only/exp_13.ipynb b/notebooks/experiments/python_code/llm_only/exp_13.ipynb
diff --git a/notebooks/experiments/python_code/llm_only/exp_13_test_code.ipynb b/notebooks/experiments/python_code/llm_only/exp_13_test_code.ipynb
@@ -0,0 +1,192 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "ec74ca93-a94f-4f7c-8a78-bb7a8f364861",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-10-18T05:16:54.397990Z",
+     "iopub.status.busy": "2024-10-18T05:16:54.397720Z",
+     "iopub.status.idle": "2024-10-18T06:43:20.177030Z",
+     "shell.execute_reply": "2024-10-18T06:43:20.176197Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Warning: importing 'simtk.openmm' is deprecated.  Import 'openmm' instead.\n"
+     ]
+    },
+    {
+     "ename": "OpenMMException",
+     "evalue": "The periodic box size has decreased to less than twice the nonbonded cutoff.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mOpenMMException\u001b[0m                           Traceback (most recent call last)",
+      "\u001b[0;32m/local_scratch/26056286/ipykernel_22176/4166751366.py\u001b[0m in \u001b[0;36m?\u001b[0;34m()\u001b[0m\n\u001b[1;32m     56\u001b[0m \u001b[0;31m# Run low pressure simulation\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     57\u001b[0m \u001b[0mrun_simulation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfixer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matmospheres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'low_pressure.dcd'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'low_pressure.log'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     58\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     59\u001b[0m \u001b[0;31m# Step 4: Set up and run the simulation at high pressure (2000 atm)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 60\u001b[0;31m \u001b[0mrun_simulation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfixer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2000\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0matmospheres\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'high_pressure.dcd'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'high_pressure.log'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     61\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     62\u001b[0m \u001b[0;31m# Step 5: Analyze the RMSF for both simulations\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     63\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcalculate_rmsf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdcd_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpdb_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/local_scratch/26056286/ipykernel_22176/4166751366.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(fixer, pressure, output_dcd, output_log)\u001b[0m\n\u001b[1;32m     50\u001b[0m     \u001b[0msimulation\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreporters\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mapp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDCDReporter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_dcd\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     51\u001b[0m     simulation.reporters.append(app.StateDataReporter(output_log, 1000, step=True, \n\u001b[1;32m     52\u001b[0m                                                       \u001b[0mpotentialEnergy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtemperature\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdensity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     53\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 54\u001b[0;31m     \u001b[0msimulation\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_steps\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m/scratch/qcampbe2/myenvs/mdagent/lib/python3.12/site-packages/openmm/app/simulation.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, steps)\u001b[0m\n\u001b[1;32m    145\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msteps\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    146\u001b[0m         \u001b[0;34m\"\"\"Advance the simulation by integrating a specified number of time steps.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 147\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_simulate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mendStep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcurrentStep\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0msteps\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m/scratch/qcampbe2/myenvs/mdagent/lib/python3.12/site-packages/openmm/app/simulation.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, endStep, endTime)\u001b[0m\n\u001b[1;32m    208\u001b[0m                     \u001b[0mnextSteps\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnextReport\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    209\u001b[0m                     \u001b[0manyReport\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    210\u001b[0m             \u001b[0mstepsToGo\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnextSteps\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    211\u001b[0m             \u001b[0;32mwhile\u001b[0m \u001b[0mstepsToGo\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 212\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mintegrator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Only take 10 steps at a time, to give Python more chances to respond to a control-c.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    213\u001b[0m                 \u001b[0mstepsToGo\u001b[0m \u001b[0;34m-=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    214\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mendTime\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0mendTime\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    215\u001b[0m                     \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/scratch/qcampbe2/myenvs/mdagent/lib/python3.12/site-packages/openmm/openmm.py\u001b[0m in \u001b[0;36m?\u001b[0;34m(self, steps)\u001b[0m\n\u001b[1;32m   6858\u001b[0m         \u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6859\u001b[0m         \u001b[0msteps\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6860\u001b[0m             \u001b[0mthe\u001b[0m \u001b[0mnumber\u001b[0m \u001b[0mof\u001b[0m \u001b[0mtime\u001b[0m \u001b[0msteps\u001b[0m \u001b[0mto\u001b[0m \u001b[0mtake\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6861\u001b[0m         \"\"\"\n\u001b[0;32m-> 6862\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_openmm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLangevinIntegrator_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msteps\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mOpenMMException\u001b[0m: The periodic box size has decreased to less than twice the nonbonded cutoff."
+     ]
+    }
+   ],
+   "source": [
+    "# TEST THE CODE\n",
+    "\n",
+    "import os\n",
+    "import requests\n",
+    "from simtk.openmm import app\n",
+    "import simtk.openmm as mm\n",
+    "from simtk import unit\n",
+    "from pdbfixer import PDBFixer\n",
+    "import mdtraj as md\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Step 1: Download the PDB file for 1UBQ\n",
+    "pdb_id = \"1UBQ\"\n",
+    "pdb_url = f\"https://files.rcsb.org/download/{pdb_id}.pdb\"\n",
+    "pdb_filename = f\"{pdb_id}.pdb\"\n",
+    "\n",
+    "if not os.path.exists(pdb_filename):\n",
+    "    response = requests.get(pdb_url)\n",
+    "    with open(pdb_filename, 'wb') as f:\n",
+    "        f.write(response.content)\n",
+    "\n",
+    "# Step 2: Prepare the system using PDBFixer\n",
+    "fixer = PDBFixer(filename=pdb_filename)\n",
+    "fixer.findMissingResidues()\n",
+    "fixer.findMissingAtoms()\n",
+    "fixer.addMissingAtoms()\n",
+    "fixer.addMissingHydrogens()\n",
+    "\n",
+    "# Define simulation parameters\n",
+    "temperature = 300 * unit.kelvin\n",
+    "timestep = 2.0 * unit.femtoseconds\n",
+    "simulation_time = 1 * unit.nanoseconds\n",
+    "n_steps = int(simulation_time / timestep)\n",
+    "\n",
+    "# Step 3: Set up and run the simulation at low pressure (1 atm)\n",
+    "def run_simulation(fixer, pressure, output_dcd, output_log):\n",
+    "    forcefield = app.ForceField('amber99sb.xml', 'tip3p.xml')\n",
+    "    system = forcefield.createSystem(fixer.topology, nonbondedMethod=app.PME, \n",
+    "                                     nonbondedCutoff=1.0*unit.nanometers, constraints=app.HBonds)\n",
+    "    system.addForce(mm.MonteCarloBarostat(pressure, temperature))\n",
+    "\n",
+    "    integrator = mm.LangevinIntegrator(temperature, 1.0/unit.picoseconds, timestep)\n",
+    "    simulation = app.Simulation(fixer.topology, system, integrator)\n",
+    "    simulation.context.setPositions(fixer.positions)\n",
+    "\n",
+    "    simulation.minimizeEnergy()\n",
+    "    simulation.context.setVelocitiesToTemperature(temperature)\n",
+    "\n",
+    "    simulation.reporters.append(app.DCDReporter(output_dcd, 1000))\n",
+    "    simulation.reporters.append(app.StateDataReporter(output_log, 1000, step=True, \n",
+    "                                                      potentialEnergy=True, temperature=True, density=True))\n",
+    "\n",
+    "    simulation.step(n_steps)\n",
+    "\n",
+    "# Run low pressure simulation\n",
+    "run_simulation(fixer, 1 * unit.atmospheres, 'low_pressure.dcd', 'low_pressure.log')\n",
+    "\n",
+    "# Step 4: Set up and run the simulation at high pressure (2000 atm)\n",
+    "run_simulation(fixer, 2000 * unit.atmospheres, 'high_pressure.dcd', 'high_pressure.log')\n",
+    "\n",
+    "# Step 5: Analyze the RMSF for both simulations\n",
+    "def calculate_rmsf(dcd_file, pdb_file):\n",
+    "    traj = md.load(dcd_file, top=pdb_file)\n",
+    "    traj.superpose(traj, 0)\n",
+    "    rmsf = md.rmsf(traj, traj, 0)\n",
+    "    return rmsf\n",
+    "\n",
+    "rmsf_low = calculate_rmsf('low_pressure.dcd', pdb_filename)\n",
+    "rmsf_high = calculate_rmsf('high_pressure.dcd', pdb_filename)\n",
+    "\n",
+    "# Plot RMSF\n",
+    "plt.figure()\n",
+    "plt.plot(rmsf_low, label='Low Pressure (1 atm)')\n",
+    "plt.plot(rmsf_high, label='High Pressure (2000 atm)')\n",
+    "plt.xlabel('Residue')\n",
+    "plt.ylabel('RMSF (nm)')\n",
+    "plt.legend()\n",
+    "plt.title('RMSF Comparison')\n",
+    "plt.show()\n",
+    "\n",
+    "# Step 6: Calculate and plot the moments of inertia over time for both simulations\n",
+    "def calculate_moments_of_inertia(traj):\n",
+    "    moments = []\n",
+    "    for frame in traj:\n",
+    "        inertia_tensor = md.geometry.compute_inertia_tensor(frame)\n",
+    "        eigenvalues = np.linalg.eigvals(inertia_tensor)\n",
+    "        moments.append(eigenvalues)\n",
+    "    return np.array(moments)\n",
+    "\n",
+    "traj_low = md.load('low_pressure.dcd', top=pdb_filename)\n",
+    "traj_high = md.load('high_pressure.dcd', top=pdb_filename)\n",
+    "\n",
+    "moments_low = calculate_moments_of_inertia(traj_low)\n",
+    "moments_high = calculate_moments_of_inertia(traj_high)\n",
+    "\n",
+    "# Plot moments of inertia\n",
+    "plt.figure()\n",
+    "plt.plot(moments_low[:, 0], label='Low Pressure I1')\n",
+    "plt.plot(moments_low[:, 1], label='Low Pressure I2')\n",
+    "plt.plot(moments_low[:, 2], label='Low Pressure I3')\n",
+    "plt.plot(moments_high[:, 0], label='High Pressure I1', linestyle='--')\n",
+    "plt.plot(moments_high[:, 1], label='High Pressure I2', linestyle='--')\n",
+    "plt.plot(moments_high[:, 2], label='High Pressure I3', linestyle='--')\n",
+    "plt.xlabel('Frame')\n",
+    "plt.ylabel('Moment of Inertia (amu*nm^2)')\n",
+    "plt.legend()\n",
+    "plt.title('Moments of Inertia Over Time')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "da937d45-b987-453e-b770-3119721af105",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1UBQ.pdb      exp_13_test_code.ipynb  high_pressure.log  low_pressure.log\n",
+      "exp_13.ipynb  high_pressure.dcd       low_pressure.dcd\n"
+     ]
+    }
+   ],
+   "source": [
+    "!ls"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "mdagent",
+   "language": "python",
+   "name": "mdagent"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}