Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Protein mutation support in Neq Cycling Setup Unit #106

Draft
wants to merge 11 commits into
base: protein-mutation-protocol
Choose a base branch
from
11 changes: 9 additions & 2 deletions devtools/conda-envs/test_env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,20 @@ dependencies:
# Base depends
- gufe >=0.9.5
- numpy
- openfe >=0.15 # TODO: Remove once we don't depend on openfe
# - openfe >=1.0 # TODO: Remove once we don't depend on openfe
- openff-units
- openmm
- openmmforcefields >=0.14.1 # TODO: remove when upstream deps fix this
- pymbar <4
- openmmtools >=0.23.0
- pymbar ~=3.0
- pydantic >=1.10.17
- python
# openfe branch with protein mutation support (TEMPORARY)
- pip:
- "git+https://github.com/OpenFreeEnergy/openfe.git@protein-mutation-support"
# Dependencies for openfe branch (temporary)
- lomap2
- kartograf

# Testing (optional deps)
- espaloma_charge # To us Espaloma FF in tests
Expand Down
123 changes: 65 additions & 58 deletions feflow/protocols/nonequilibrium_cycling.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pickle
import time

from gufe import SolventComponent, ProteinComponent
from gufe.settings import Settings
from gufe.chemicalsystem import ChemicalSystem
from gufe.mapping import ComponentMapping
Expand All @@ -31,6 +32,11 @@

from ..settings import NonEquilibriumCyclingSettings
from ..utils.data import serialize, deserialize
from ..utils.misc import (
generate_omm_top_from_component,
get_residue_index_from_atom_index,
get_positions_from_component,
)

# Specific instance of logger for this module
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -175,9 +181,15 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
from openmmtools.integrators import PeriodicNonequilibriumIntegrator
from gufe.components import SmallMoleculeComponent
from openfe.protocols.openmm_rfe import _rfe_utils
from openfe.protocols.openmm_utils.system_validation import get_components
from openfe.protocols.openmm_utils.system_validation import (
get_alchemical_components,
)
from feflow.utils.hybrid_topology import HybridTopologyFactory
from feflow.utils.charge import get_alchemical_charge_difference
from feflow.utils.misc import (
get_typed_components,
register_ff_parameters_template,
)

# Check compatibility between states (same receptor and solvent)
self._check_states_compatibility(state_a, state_b)
Expand All @@ -187,13 +199,14 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
) # infer phase from systems and components

# Get receptor components from systems if found (None otherwise)
solvent_comp, receptor_comp, small_mols_a = get_components(state_a)
solvent_comp_a = get_typed_components(state_a, SolventComponent)
protein_comps_a = get_typed_components(state_a, ProteinComponent)
small_mols_a = get_typed_components(state_a, SmallMoleculeComponent)

# Get ligand/small-mol components
ligand_mapping = mapping
ligand_a = ligand_mapping.componentA
ligand_b = ligand_mapping.componentB
# Get alchemical components
alchemical_comps = get_alchemical_components(state_a, state_b)

# TODO: Do we need to change something in the settings? Does the Protein mutation protocol require specific settings?
# Get all the relevant settings
settings: NonEquilibriumCyclingSettings = protocol.settings
# Get settings for system generator
Expand All @@ -215,49 +228,28 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
thermo_settings=thermodynamic_settings,
integrator_settings=integrator_settings,
cache=ffcache,
has_solvent=solvent_comp is not None,
has_solvent=bool(solvent_comp_a),
)

# Parameterizing small molecules
self.logger.info("Parameterizing molecules")
# The following creates a dictionary with all the small molecules in the states, with the structure:
# Dict[SmallMoleculeComponent, openff.toolkit.Molecule]
# Alchemical small mols
alchemical_small_mols_a = {ligand_a: ligand_a.to_openff()}
alchemical_small_mols_b = {ligand_b: ligand_b.to_openff()}
all_alchemical_mols = alchemical_small_mols_a | alchemical_small_mols_b
# non-alchemical common small mols
common_small_mols = {}
for comp in state_a.components.values():
# TODO: Refactor if/when gufe provides the functionality https://github.com/OpenFreeEnergy/gufe/issues/251
# NOTE: This relies on gufe key for "equality", important to keep in mind
if (
isinstance(comp, SmallMoleculeComponent)
and comp not in all_alchemical_mols
):
common_small_mols[comp] = comp.to_openff()

# Assign partial charges to all small mols
all_openff_mols = list(
chain(all_alchemical_mols.values(), common_small_mols.values())
# Get small molecules from states
# TODO: Refactor if/when gufe provides the functionality https://github.com/OpenFreeEnergy/gufe/issues/251
state_a_small_mols = get_typed_components(state_a, SmallMoleculeComponent)
state_b_small_mols = get_typed_components(state_b, SmallMoleculeComponent)
all_small_mols = state_a_small_mols | state_b_small_mols

# Generate and register FF parameters in the system generator template
all_openff_mols = [comp.to_openff() for comp in all_small_mols]
register_ff_parameters_template(
system_generator, charge_settings, all_openff_mols
)
self._assign_openff_partial_charges(
charge_settings=charge_settings, off_small_mols=all_openff_mols
)

# Force the creation of parameters
# This is necessary because we need to have the FF templates
# registered ahead of solvating the system.
for off_mol in all_openff_mols:
system_generator.create_system(
off_mol.to_topology().to_openmm(), molecules=[off_mol]
)

# c. get OpenMM Modeller + a dictionary of resids for each component
state_a_modeller, comp_resids = system_creation.get_omm_modeller(
protein_comp=receptor_comp,
solvent_comp=solvent_comp,
small_mols=alchemical_small_mols_a | common_small_mols,
state_a_modeller, _ = system_creation.get_omm_modeller(
protein_comps=protein_comps_a,
solvent_comp=solvent_comp_a,
small_mols=small_mols_a,
omm_forcefield=system_generator.forcefield,
solvent_settings=solvation_settings,
)
Expand All @@ -268,37 +260,51 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
state_a_positions = to_openmm(from_openmm(state_a_modeller.getPositions()))

# e. create the stateA System
# Note: If there are no small mols ommffs requires a None
state_a_system = system_generator.create_system(
state_a_modeller.topology,
molecules=list(
chain(alchemical_small_mols_a.values(), common_small_mols.values())
molecules=(
[mol.to_openff() for mol in state_a_small_mols]
if state_a_small_mols
else None
),
)

# 2. Get stateB system
# a. get the topology
# a. Generate topology reusing state A topology as possible
# Note: We are only dealing with single alchemical components
state_b_alchem_top = generate_omm_top_from_component(
alchemical_comps["stateB"][0]
)
state_b_alchem_pos = get_positions_from_component(alchemical_comps["stateB"][0])
# We get the residue index from the mapping unique atom indices
# NOTE: We assume single residue/point/component mutation here
state_a_alchem_resindex = [
get_residue_index_from_atom_index(
state_a_topology, next(mapping.componentA_unique)
)
]
(
state_b_topology,
state_b_alchem_resids,
) = _rfe_utils.topologyhelpers.combined_topology(
state_a_topology,
ligand_b.to_openff().to_topology().to_openmm(),
exclude_resids=comp_resids[ligand_a],
state_b_alchem_top,
exclude_resids=iter(state_a_alchem_resindex),
)

state_b_system = system_generator.create_system(
state_b_topology,
molecules=list(
chain(alchemical_small_mols_b.values(), common_small_mols.values())
),
molecules=[mol.to_openff() for mol in state_b_small_mols],
)

# c. Define correspondence mappings between the two systems
# TODO: This doesn't have to be a ligand mapping. i.e. for protein mutation.
# c. Define correspondence mappings between the two systems
ligand_mappings = _rfe_utils.topologyhelpers.get_system_mappings(
mapping.componentA_to_componentB,
state_a_system,
state_a_topology,
comp_resids[ligand_a],
state_a_alchem_resindex,
state_b_system,
state_b_topology,
state_b_alchem_resids,
Expand All @@ -313,7 +319,8 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
mapping,
forcefield_settings.nonbonded_method,
alchemical_settings.explicit_charge_correction,
solvent_comp,
# TODO: I don't understand why this isn't erroring when it's vacuum leg. review
solvent_comp_a, # Solvent comp in a is expected to be the same as in b
)

if alchemical_settings.explicit_charge_correction:
Expand All @@ -329,18 +336,16 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
state_b_system,
ligand_mappings,
charge_difference,
solvent_comp,
solvent_comp_a,
)

# d. Finally get the positions
# d. Finally get the positions
state_b_positions = _rfe_utils.topologyhelpers.set_and_check_new_positions(
ligand_mappings,
state_a_topology,
state_b_topology,
old_positions=ensure_quantity(state_a_positions, "openmm"),
insert_positions=ensure_quantity(
ligand_b.to_openff().conformers[0], "openmm"
),
insert_positions=state_b_alchem_pos,
)

# TODO: handle the literals directly in the HTF object (issue #42)
Expand All @@ -349,6 +354,8 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
softcore_LJ_v2 = True
elif alchemical_settings.softcore_LJ.lower() == "beutler":
softcore_LJ_v2 = False
# TODO: We need to test HTF for protein mutation cases, probably.
# What are ways to quickly check an HTF is correct?
# Now we can create the HTF from the previous objects
hybrid_factory = HybridTopologyFactory(
state_a_system,
Expand Down
2 changes: 1 addition & 1 deletion feflow/tests/test_hybrid_topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def tip4p_benzene_to_toluene_htf(

# Create state A model & get relevant OpenMM objects
benz_model, comp_resids = system_creation.get_omm_modeller(
protein_comp=None,
protein_comps=None,
solvent_comp=SolventComponent(),
small_mols={benzene: benz_off},
omm_forcefield=tip4p_system_generator.forcefield,
Expand Down
8 changes: 4 additions & 4 deletions feflow/tests/test_protein_mutation.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,13 +545,13 @@ def test_proline_mutation_fails(
ala_to_pro_mapping : LigandAtomMapping
Mapping object representing the atom mapping from ALA to PRO.
"""
from feflow.utils.exceptions import MethodConstraintError
from feflow.utils.exceptions import MethodLimitationtError

settings = ProteinMutationProtocol.default_settings()
protocol = ProteinMutationProtocol(settings=settings)

# Expect an error when trying to create the DAG with this invalid transformation
with pytest.raises(MethodConstraintError, match="proline.*not supported"):
with pytest.raises(MethodLimitationtError, match="proline.*not supported"):
protocol.create(
stateA=ala_capped_system,
stateB=pro_capped_system,
Expand Down Expand Up @@ -580,13 +580,13 @@ def test_double_charge_fails(
lys_to_glu_mapping : LigandAtomMapping
Atom mapping defining the correspondence between atoms in the lysine and glutamate systems.
"""
from feflow.utils.exceptions import NotSupportedError
from feflow.utils.exceptions import ProtocolSupportError

settings = ProteinMutationProtocol.default_settings()
protocol = ProteinMutationProtocol(settings=settings)

# Expect an error when trying to create the DAG with this invalid transformation
with pytest.raises(NotSupportedError, match="double charge.*not supported"):
with pytest.raises(ProtocolSupportError, match="double charge.*not supported"):
protocol.create(
stateA=lys_capped_system,
stateB=glu_capped_system,
Expand Down
78 changes: 78 additions & 0 deletions feflow/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""
Module to test utility functions in feflow.utils
"""

from gufe.components import SmallMoleculeComponent, ProteinComponent, SolventComponent
from feflow.utils.misc import get_typed_components, register_ff_parameters_template


def test_get_typed_components_vacuum(benzene_vacuum_system):
"""Test extracting typed components from a vacuum phase chemical system.
One that only has a SmallMoleculeComponent.
"""
small_mol_comps = get_typed_components(
benzene_vacuum_system, SmallMoleculeComponent
)
protein_comps = get_typed_components(benzene_vacuum_system, ProteinComponent)
solvent_comps = get_typed_components(benzene_vacuum_system, SolventComponent)

assert (
len(small_mol_comps) == 1
), f"Expected one (1) small molecule component in solvent system. Found {len(small_mol_comps)}"
assert (
len(protein_comps) == 0
), "Found protein component(s) in vacuum system. Expected none."
assert (
len(solvent_comps) == 0
), "Found solvent component(s) in vacuum system. Expected none."


def test_get_typed_components_solvent(benzene_solvent_system):
"""Test extracting typed components from a solvent phase chemical system.
One that has a single SmallMoleculeComponent and a single SolventComponent.
"""
small_mol_comps = get_typed_components(
benzene_solvent_system, SmallMoleculeComponent
)
protein_comps = get_typed_components(benzene_solvent_system, ProteinComponent)
solvent_comps = get_typed_components(benzene_solvent_system, SolventComponent)

assert (
len(small_mol_comps) == 1
), f"Expected one (1) small molecule component in vacuum system. Found {len(small_mol_comps)}."
assert (
len(protein_comps) == 0
), "Found protein component(s) in solvent system. Expected none."
assert (
len(solvent_comps) == 1
), f"Expected one (1) solvent component in solvent system. Found {len(solvent_comps)}."


def test_register_ff_parameters_template(
toluene_solvent_system, short_settings, tmp_path
):
from openff.toolkit import Molecule
from openfe.protocols.openmm_utils import system_creation
from openmmforcefields.generators import SystemGenerator
from feflow.settings import OpenFFPartialChargeSettings as ChargeSettings
from openfe.protocols.openmm_utils.system_validation import get_components

solvent_comp, receptor_comp, small_mols_a = get_components(toluene_solvent_system)

system_generator = system_creation.get_system_generator(
forcefield_settings=short_settings.forcefield_settings,
thermo_settings=short_settings.thermo_settings,
integrator_settings=short_settings.integrator_settings,
has_solvent=solvent_comp is not None,
cache=tmp_path,
)

system_generator = SystemGenerator(small_molecule_forcefield="openff-2.1.0")
charge_settings = ChargeSettings(
partial_charge_method="am1bcc",
off_toolkit_backend="ambertools",
number_of_conformers=1,
nagl_model=None,
)
openff_mols = [Molecule.from_smiles("CCO"), Molecule.from_smiles("CCN")]
register_ff_parameters_template(system_generator, charge_settings, openff_mols)
Loading
Loading