Skip to content

Commit

Permalink
Merge pull request #254 from choderalab/standardize-water
Browse files Browse the repository at this point in the history
Implement function to standardize_water
  • Loading branch information
andrrizzi authored May 1, 2017
2 parents a8cd338 + 5fa18f6 commit 9f66bfe
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 7 deletions.
73 changes: 68 additions & 5 deletions openmoltools/packmol.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,68 @@
end structure
"""


def standardize_water(mol_traj):
"""Ensure that a water molecule has the correct MDTraj Topology.
The PDB format doesn't require CONECT records for a water molecule,
but MDTraj correctly recognize water molecules bonds only if they
adopt specific residue and atom names. This function standardize
the names to ensure the Topology is correctly connected.
Parameters
----------
mol_traj : mdtraj.Trajectory
A trajectory object describing a single water molecule. If the
trajectory doesn't describe a water molecule, nothing happens.
The residue name and atom names are modified to adhere to MDTraj
standard definition, if this is a water molecule.
Returns
-------
bool
True if this was a water molecule, False otherwise.
"""
if mol_traj.topology.n_atoms != 3 or mol_traj.topology.n_residues != 1:
# This is not a water molecule.
return False

# Count oxygen and hydrogens in molecule and save their indices.
atom_element_ids = {'O': [], 'H': []}
for atom_index, atom in enumerate(mol_traj.topology.atoms):
try:
atom_element_ids[atom.element.symbol].append(atom_index)
except KeyError:
# There's an element different than oxygen or hydrogen.
return False

# This is water if there are two hydrogens and an oxygen.
if not (len(atom_element_ids['O']) == 1 and len(atom_element_ids['H']) == 2):
return False

# Rename residue and atoms.
mol_traj.topology.residue(0).name = 'HOH'
[o_index], [h1_index, h2_index] = atom_element_ids['O'], atom_element_ids['H']
for index, std_name in zip([o_index, h1_index, h2_index], ['O', 'H1', 'H2']):
mol_traj.topology.atom(index).name = std_name

# Update bonds now that water residue is standard.
mol_traj.topology.create_standard_bonds()
return True


def pack_box(pdb_filenames_or_trajectories, n_molecules_list, tolerance=2.0, box_size=None):
"""Run packmol to generate a box containing a mixture of molecules.
Parameters
----------
pdb_filenames_or_trajectories : list({str, Trajectory})
List of pdb filenames or trajectories for each component of mixture. If this is
a list of trajectories, the trajectories will be saved to as
temporary files to be run in packmol.
List of pdb filenames or trajectories for each component of mixture.
If this is a list of trajectories, the trajectories will be saved to
as temporary files to be run in packmol. Water molecules must have
MDTraj-standard residue and atom names as defined in
mdtraj/formats/pdb/data/pdbNames.xml.
n_molecules_list : list(int)
The number of molecules of each mixture component.
tolerance : float, optional, default=2.0
Expand All @@ -52,11 +105,20 @@ def pack_box(pdb_filenames_or_trajectories, n_molecules_list, tolerance=2.0, box
Notes
-----
Water molecules must have MDTraj-standard residue and atom names as defined
in mdtraj/formats/pdb/data/pdbNames.xml, otherwise MDTraj won't be able to
perceive the bonds and the Topology of the returned Trajectory will be incorrect.
Be aware that MDTraj uses nanometers internally, but packmol uses angstrom
units. The present function takes `tolerance` and `box_size` in
angstrom units, but the output trajectory will have data in nm.
units. The present function takes `tolerance` and `box_size` in angstrom
units, but the output trajectory will have data in nm.
Also note that OpenMM is pretty picky about the format of unit cell input,
so use the example in tests/test_packmol.py to ensure that you do the right thing.
See Also
--------
standardize_water
Standardize residue and atom names of a water molecule.
"""
assert len(pdb_filenames_or_trajectories) == len(n_molecules_list), "Must input same number of pdb filenames as num molecules"

Expand Down Expand Up @@ -125,6 +187,7 @@ def pack_box(pdb_filenames_or_trajectories, n_molecules_list, tolerance=2.0, box

return trj


def approximate_volume(pdb_filenames, n_molecules_list, box_scaleup_factor=2.0):
"""Approximate the appropriate box size based on the number and types of atoms present.
Expand Down
37 changes: 35 additions & 2 deletions openmoltools/tests/test_packmol.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import os
import tempfile
import numpy as np
import mdtraj as md
from unittest import skipIf
import logging
from mdtraj.testing import eq
from openmoltools import utils, packmol
import simtk.unit as u
from simtk.openmm import app
Expand All @@ -17,6 +16,40 @@
from rdkit.Chem import AllChem
except ImportError:
HAVE_RDKIT = False


def test_standardize_water():
"""Test utility function standardize_water.
The water bonds must be recognized even when residue names do not
match the standard definition in mdtraj.formats.pdb.data.residues.xml.
"""
water_filepath = utils.get_data_filename("chemicals/water/water.mol2")
water_traj = md.load(water_filepath)

# Store in pdb format and lose CONECT records.
water_pdb_filepath = tempfile.mktemp(suffix='.pdb')
water_traj.save_pdb(water_pdb_filepath)
with open(water_pdb_filepath, 'r') as f:
pdb_lines = f.readlines()
with open(water_pdb_filepath, 'w') as f:
for line in pdb_lines:
if line[:6] != 'CONECT':
f.write(line)

# Test pre-condition: MDTraj cannot detect water bonds automatically.
water_traj = md.load(water_pdb_filepath)
assert water_traj.topology.n_bonds == 0

# The function modifies the Trajectory and bonds are now recognized.
assert packmol.standardize_water(water_traj) is True
assert water_traj.topology.n_bonds == 2

# Remove temporary file.
os.remove(water_pdb_filepath)


@skipIf(not HAVE_RDKIT, "Skipping testing of packmol conversion because rdkit not found.")
@skipIf(packmol.PACKMOL_PATH is None, "Skipping testing of packmol conversion because packmol not found.")
def test_packmol_simulation_ternary():
Expand Down

0 comments on commit 9f66bfe

Please sign in to comment.