From f129bb49ee49b726b47f1fd63da92f4eca3f4613 Mon Sep 17 00:00:00 2001 From: Andrea Rizzi Date: Fri, 28 Apr 2017 13:57:29 -0400 Subject: [PATCH 1/4] Implement function packmol.standardize_water. The function takes an MDTraj Trajectory and if it water it standardizes its residue and atom names so that MDTraj can automatically perceive its bonds. --- openmoltools/packmol.py | 52 ++++++++++++++++++++++++++++++ openmoltools/tests/test_packmol.py | 40 +++++++++++++++++++++-- 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/openmoltools/packmol.py b/openmoltools/packmol.py index 50c1581..d313cca 100644 --- a/openmoltools/packmol.py +++ b/openmoltools/packmol.py @@ -27,6 +27,57 @@ end structure """ + +def standardize_water(mol_traj): + """Ensure that a water molecule has the correct MDTraj Topology. + + The PDB format doesn't require CONECT records for a water molecule, + but MDTraj correctly recognize water molecules bonds only if they + adopt specific residue and atom names. This function standardize + the names to ensure the Topology is correctly connected. + + Parameters + ---------- + mol_traj : mdtraj.Trajectory + A trajectory object describing a single water molecule. If the + trajectory doesn't describe a water molecule, nothing happens. + The residue name and atom names are modified to adhere to MDTraj + standard definition, if this is a water molecule. + + Returns + ------- + bool + True if this was a water molecule, False otherwise. + + """ + if mol_traj.topology.n_atoms != 3 or mol_traj.topology.n_residues != 1: + # This is not a water molecule. + return False + + # Count oxygen and hydrogens in molecule and save their indices. + atom_element_ids = {'O': [], 'H': []} + for atom_index, atom in enumerate(mol_traj.topology.atoms): + try: + atom_element_ids[atom.element.symbol].append(atom_index) + except KeyError: + # There's an element different than oxygen or hydrogen. + return False + + # This is water if there are two hydrogens and an oxygen. + if not (len(atom_element_ids['O']) == 1 and len(atom_element_ids['H']) == 2): + return False + + # Rename residue and atoms. + mol_traj.topology.residue(0).name = 'HOH' + [o_index], [h1_index, h2_index] = atom_element_ids['O'], atom_element_ids['H'] + for index, std_name in zip([o_index, h1_index, h2_index], ['O', 'H1', 'H2']): + mol_traj.topology.atom(index).name = std_name + + # Update bonds now that water residue is standard. + mol_traj.topology.create_standard_bonds() + return True + + def pack_box(pdb_filenames_or_trajectories, n_molecules_list, tolerance=2.0, box_size=None): """Run packmol to generate a box containing a mixture of molecules. @@ -125,6 +176,7 @@ def pack_box(pdb_filenames_or_trajectories, n_molecules_list, tolerance=2.0, box return trj + def approximate_volume(pdb_filenames, n_molecules_list, box_scaleup_factor=2.0): """Approximate the appropriate box size based on the number and types of atoms present. diff --git a/openmoltools/tests/test_packmol.py b/openmoltools/tests/test_packmol.py index 2b0d150..b976ce4 100644 --- a/openmoltools/tests/test_packmol.py +++ b/openmoltools/tests/test_packmol.py @@ -1,9 +1,8 @@ +import os import tempfile -import numpy as np import mdtraj as md from unittest import skipIf import logging -from mdtraj.testing import eq from openmoltools import utils, packmol import simtk.unit as u from simtk.openmm import app @@ -17,6 +16,43 @@ from rdkit.Chem import AllChem except ImportError: HAVE_RDKIT = False + + +def test_standardize_water(): + """Test utility function standardize_water. + + The water bonds must be recognized even when residue names do not + match the standard definition in mdtraj.formats.pdb.data.residues.xml. + + """ + water_filepath = utils.get_data_filename("chemicals/water/water.mol2") + water_traj = md.load(water_filepath) + + # Store in pdb format and lose CONECT records. + water_pdb_filepath = tempfile.mktemp(suffix='.pdb') + water_traj.save_pdb(water_pdb_filepath) + with open(water_pdb_filepath, 'r') as f: + pdb_lines = f.readlines() + with open(water_pdb_filepath, 'w') as f: + for line in pdb_lines: + if line[:6] != 'CONECT': + f.write(line) + + # Test pre-condition: MDTraj cannot detect water bonds automatically. + water_traj = md.load(water_pdb_filepath) + assert water_traj.topology.n_bonds == 0 + + # The function modifies the Trajectory and bonds are now recognized. + assert packmol.standardize_water(water_traj) is True + assert water_traj.topology.n_bonds == 2 + + # The second time, the Trajectory object is not modified. + assert packmol.standardize_water(water_traj) is False + + # Remove temporary file. + os.remove(water_pdb_filepath) + + @skipIf(not HAVE_RDKIT, "Skipping testing of packmol conversion because rdkit not found.") @skipIf(packmol.PACKMOL_PATH is None, "Skipping testing of packmol conversion because packmol not found.") def test_packmol_simulation_ternary(): From 5e645081769a54b6b94a07ffde875b61b90e1d76 Mon Sep 17 00:00:00 2001 From: Andrea Rizzi Date: Fri, 28 Apr 2017 14:26:15 -0400 Subject: [PATCH 2/4] Add documentation about MDTraj problem with non-standard water atom naming --- openmoltools/packmol.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/openmoltools/packmol.py b/openmoltools/packmol.py index d313cca..bdfa034 100644 --- a/openmoltools/packmol.py +++ b/openmoltools/packmol.py @@ -84,9 +84,11 @@ def pack_box(pdb_filenames_or_trajectories, n_molecules_list, tolerance=2.0, box Parameters ---------- pdb_filenames_or_trajectories : list({str, Trajectory}) - List of pdb filenames or trajectories for each component of mixture. If this is - a list of trajectories, the trajectories will be saved to as - temporary files to be run in packmol. + List of pdb filenames or trajectories for each component of mixture. + If this is a list of trajectories, the trajectories will be saved to + as temporary files to be run in packmol. Water molecules must have + MDTraj-standard residue name (HOH) and atom names (O, H1, H2), otherwise + MDtraj won't be able to perceive the bonds. n_molecules_list : list(int) The number of molecules of each mixture component. tolerance : float, optional, default=2.0 @@ -103,11 +105,20 @@ def pack_box(pdb_filenames_or_trajectories, n_molecules_list, tolerance=2.0, box Notes ----- + Water molecules must have MDTraj-standard residue name (HOH) and atom + names (O, H1, H2), otherwise MDTraj won't be able to perceive the bonds + and the Topology of the returned Trajectory will be incorrect. Be aware that MDTraj uses nanometers internally, but packmol uses angstrom - units. The present function takes `tolerance` and `box_size` in - angstrom units, but the output trajectory will have data in nm. + units. The present function takes `tolerance` and `box_size` in angstrom + units, but the output trajectory will have data in nm. Also note that OpenMM is pretty picky about the format of unit cell input, so use the example in tests/test_packmol.py to ensure that you do the right thing. + + See Also + -------- + standardize_water + Standardize residue and atom names of a water molecule. + """ assert len(pdb_filenames_or_trajectories) == len(n_molecules_list), "Must input same number of pdb filenames as num molecules" From c436982f25321bff3473bcc777f23fe3e10c93a7 Mon Sep 17 00:00:00 2001 From: Andrea Rizzi Date: Fri, 28 Apr 2017 14:49:04 -0400 Subject: [PATCH 3/4] Fix outdated test --- openmoltools/tests/test_packmol.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/openmoltools/tests/test_packmol.py b/openmoltools/tests/test_packmol.py index b976ce4..3f79928 100644 --- a/openmoltools/tests/test_packmol.py +++ b/openmoltools/tests/test_packmol.py @@ -46,9 +46,6 @@ def test_standardize_water(): assert packmol.standardize_water(water_traj) is True assert water_traj.topology.n_bonds == 2 - # The second time, the Trajectory object is not modified. - assert packmol.standardize_water(water_traj) is False - # Remove temporary file. os.remove(water_pdb_filepath) From 5fa18f689d8e23203a952b0ee13f63a40af7ba66 Mon Sep 17 00:00:00 2001 From: Andrea Rizzi Date: Fri, 28 Apr 2017 16:32:33 -0400 Subject: [PATCH 4/4] Correct documentation on standard water names in MDTraj --- openmoltools/packmol.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/openmoltools/packmol.py b/openmoltools/packmol.py index bdfa034..0d7442c 100644 --- a/openmoltools/packmol.py +++ b/openmoltools/packmol.py @@ -87,8 +87,8 @@ def pack_box(pdb_filenames_or_trajectories, n_molecules_list, tolerance=2.0, box List of pdb filenames or trajectories for each component of mixture. If this is a list of trajectories, the trajectories will be saved to as temporary files to be run in packmol. Water molecules must have - MDTraj-standard residue name (HOH) and atom names (O, H1, H2), otherwise - MDtraj won't be able to perceive the bonds. + MDTraj-standard residue and atom names as defined in + mdtraj/formats/pdb/data/pdbNames.xml. n_molecules_list : list(int) The number of molecules of each mixture component. tolerance : float, optional, default=2.0 @@ -105,9 +105,9 @@ def pack_box(pdb_filenames_or_trajectories, n_molecules_list, tolerance=2.0, box Notes ----- - Water molecules must have MDTraj-standard residue name (HOH) and atom - names (O, H1, H2), otherwise MDTraj won't be able to perceive the bonds - and the Topology of the returned Trajectory will be incorrect. + Water molecules must have MDTraj-standard residue and atom names as defined + in mdtraj/formats/pdb/data/pdbNames.xml, otherwise MDTraj won't be able to + perceive the bonds and the Topology of the returned Trajectory will be incorrect. Be aware that MDTraj uses nanometers internally, but packmol uses angstrom units. The present function takes `tolerance` and `box_size` in angstrom units, but the output trajectory will have data in nm.