From fb4b088c90124804b30a7e62729914ab541e734f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Pulido?= <2949729+ijpulidos@users.noreply.github.com> Date: Thu, 7 Nov 2024 16:04:35 -0500 Subject: [PATCH 01/11] ADding TODO/Comments for things to refactor/review. --- feflow/protocols/nonequilibrium_cycling.py | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/feflow/protocols/nonequilibrium_cycling.py b/feflow/protocols/nonequilibrium_cycling.py index bc52fcb..961fa37 100644 --- a/feflow/protocols/nonequilibrium_cycling.py +++ b/feflow/protocols/nonequilibrium_cycling.py @@ -187,13 +187,20 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): ) # infer phase from systems and components # Get receptor components from systems if found (None otherwise) + # TODO: test that this work with components from protein mutations, including protein protein interactions. solvent_comp, receptor_comp, small_mols_a = get_components(state_a) # Get ligand/small-mol components + # TODO: Is it be possible to infer the alchemical component from mapping? We would only + # provide a mapping for components that are going through alchemical transformations. + # Mapping wouldn't be a LigandAtomMapping but rather a ProteinMapping of some sort. + # Right now we are assuming that the ligand is the alchemical component, but we don't have to + # hardcode that. ligand_mapping = mapping ligand_a = ligand_mapping.componentA ligand_b = ligand_mapping.componentB + # TODO: Do we need to change something in the settings? Does the Protein mutation protocol require specific settings? # Get all the relevant settings settings: NonEquilibriumCyclingSettings = protocol.settings # Get settings for system generator @@ -210,6 +217,7 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): else: ffcache = None + system_generator = system_creation.get_system_generator( forcefield_settings=forcefield_settings, thermo_settings=thermodynamic_settings, @@ -222,6 +230,7 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): self.logger.info("Parameterizing molecules") # The following creates a dictionary with all the small molecules in the states, with the structure: # Dict[SmallMoleculeComponent, openff.toolkit.Molecule] + # TODO: This should rely again on getting the alchemical components from the mapping # Alchemical small mols alchemical_small_mols_a = {ligand_a: ligand_a.to_openff()} alchemical_small_mols_b = {ligand_b: ligand_b.to_openff()} @@ -237,6 +246,7 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): ): common_small_mols[comp] = comp.to_openff() + # TODO: We should parametrize all the small mols anyway. Shouldn't change in protein mutation. # Assign partial charges to all small mols all_openff_mols = list( chain(all_alchemical_mols.values(), common_small_mols.values()) @@ -253,6 +263,9 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): off_mol.to_topology().to_openmm(), molecules=[off_mol] ) + # TODO: get_omm_modeller would need to be adapted to deal with protein mutation cases. ex. protein-protein. + # Protein comps no longer optional, but others could be. How to generalize this? + # Refactor function to handle multiple protein components, similarly to what we do with small mols. # c. get OpenMM Modeller + a dictionary of resids for each component state_a_modeller, comp_resids = system_creation.get_omm_modeller( protein_comp=receptor_comp, @@ -267,6 +280,8 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): state_a_topology = state_a_modeller.getTopology() state_a_positions = to_openmm(from_openmm(state_a_modeller.getPositions())) + # TODO: system_generator and openmmforcefields don't really know how to deal with empty lists, but they use None. + # We would need to refactor accordingly, maybe we need yet another helper function here. Refactoring omm forcefields is unlikely. # e. create the stateA System state_a_system = system_generator.create_system( state_a_modeller.topology, @@ -275,6 +290,12 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): ), ) + # TODO: Here we would need to exclude the resids of the topology that contains the alchemical component + # How do we know that? Is a map between topology objects and components needed? + # Assumptions here: + # * Components in A have the same positions of components in B EXCEPT for the alchemical residue + # * Only the alchemical component is changed, all the other components are shared. + # Specifically, we need a way to get a map from component to resids, that way we get the resids that we want to exclude easily. # 2. Get stateB system # a. get the topology ( @@ -294,6 +315,7 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): ) # c. Define correspondence mappings between the two systems + # TODO: According to docs this should already work. Double check. ligand_mappings = _rfe_utils.topologyhelpers.get_system_mappings( mapping.componentA_to_componentB, state_a_system, @@ -306,6 +328,7 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): fix_constraints=True, ) + # TODO: This should also be working as is. Maybe review docstring in openfe function. # Handle charge corrections/transformations # Get the change difference between the end states # and check if the charge correction used is appropriate @@ -316,6 +339,7 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): solvent_comp, ) + # TODO: Should also already work as is... if alchemical_settings.explicit_charge_correction: alchem_water_resids = _rfe_utils.topologyhelpers.get_alchemical_waters( state_a_topology, @@ -332,6 +356,7 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): solvent_comp, ) + # TODO: According to docs this should work as well. # d. Finally get the positions state_b_positions = _rfe_utils.topologyhelpers.set_and_check_new_positions( ligand_mappings, @@ -349,6 +374,8 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): softcore_LJ_v2 = True elif alchemical_settings.softcore_LJ.lower() == "beutler": softcore_LJ_v2 = False + # TODO: We need to test HTF for protein mutation cases, probably. + # What are ways to quickly check an HTF is correct? # Now we can create the HTF from the previous objects hybrid_factory = HybridTopologyFactory( state_a_system, From a6a1705c5772bc010bceff9089ed07239a708d92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Pulido?= <2949729+ijpulidos@users.noreply.github.com> Date: Wed, 20 Nov 2024 11:29:35 -0500 Subject: [PATCH 02/11] Removing unneeded todo --- feflow/protocols/nonequilibrium_cycling.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/feflow/protocols/nonequilibrium_cycling.py b/feflow/protocols/nonequilibrium_cycling.py index 961fa37..e309846 100644 --- a/feflow/protocols/nonequilibrium_cycling.py +++ b/feflow/protocols/nonequilibrium_cycling.py @@ -290,12 +290,6 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): ), ) - # TODO: Here we would need to exclude the resids of the topology that contains the alchemical component - # How do we know that? Is a map between topology objects and components needed? - # Assumptions here: - # * Components in A have the same positions of components in B EXCEPT for the alchemical residue - # * Only the alchemical component is changed, all the other components are shared. - # Specifically, we need a way to get a map from component to resids, that way we get the resids that we want to exclude easily. # 2. Get stateB system # a. get the topology ( From 24ccc77ccaa4397d4913d415c91a7db07e732707 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Pulido?= <2949729+ijpulidos@users.noreply.github.com> Date: Wed, 20 Nov 2024 11:35:14 -0500 Subject: [PATCH 03/11] Fix custom exceptions in tests --- feflow/tests/test_protein_mutation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/feflow/tests/test_protein_mutation.py b/feflow/tests/test_protein_mutation.py index fcee48c..880eaab 100644 --- a/feflow/tests/test_protein_mutation.py +++ b/feflow/tests/test_protein_mutation.py @@ -545,13 +545,13 @@ def test_proline_mutation_fails( ala_to_pro_mapping : LigandAtomMapping Mapping object representing the atom mapping from ALA to PRO. """ - from feflow.utils.exceptions import MethodConstraintError + from feflow.utils.exceptions import MethodLimitationtError settings = ProteinMutationProtocol.default_settings() protocol = ProteinMutationProtocol(settings=settings) # Expect an error when trying to create the DAG with this invalid transformation - with pytest.raises(MethodConstraintError, match="proline.*not supported"): + with pytest.raises(MethodLimitationtError, match="proline.*not supported"): protocol.create( stateA=ala_capped_system, stateB=pro_capped_system, @@ -580,13 +580,13 @@ def test_double_charge_fails( lys_to_glu_mapping : LigandAtomMapping Atom mapping defining the correspondence between atoms in the lysine and glutamate systems. """ - from feflow.utils.exceptions import NotSupportedError + from feflow.utils.exceptions import ProtocolSupportError settings = ProteinMutationProtocol.default_settings() protocol = ProteinMutationProtocol(settings=settings) # Expect an error when trying to create the DAG with this invalid transformation - with pytest.raises(NotSupportedError, match="double charge.*not supported"): + with pytest.raises(ProtocolSupportError, match="double charge.*not supported"): protocol.create( stateA=lys_capped_system, stateB=glu_capped_system, From 086f2466ccb2e5b2e8b1aa80d447cf6769d98da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Pulido?= <2949729+ijpulidos@users.noreply.github.com> Date: Thu, 5 Dec 2024 09:34:29 -0500 Subject: [PATCH 04/11] Refined dependencies --- devtools/conda-envs/test_env.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 7868626..7a115d3 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -6,11 +6,12 @@ dependencies: # Base depends - gufe >=0.9.5 - numpy - - openfe >=0.15 # TODO: Remove once we don't depend on openfe + - openfe >=1.0 # TODO: Remove once we don't depend on openfe - openff-units - openmm - openmmforcefields >=0.14.1 # TODO: remove when upstream deps fix this - - pymbar <4 + - openmmtools >=0.23.0 + - pymbar ~=3.0 - pydantic >=1.10.17 - python From e997aea081dd281407c05b98e00fd846303f6731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Pulido?= <2949729+ijpulidos@users.noreply.github.com> Date: Tue, 21 Jan 2025 16:31:20 -0500 Subject: [PATCH 05/11] Misc utility functions and tests --- feflow/tests/test_utils.py | 59 +++++++++++++++++++++ feflow/utils/misc.py | 103 +++++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 feflow/tests/test_utils.py create mode 100644 feflow/utils/misc.py diff --git a/feflow/tests/test_utils.py b/feflow/tests/test_utils.py new file mode 100644 index 0000000..cdae9b6 --- /dev/null +++ b/feflow/tests/test_utils.py @@ -0,0 +1,59 @@ +""" +Module to test utility functions in feflow.utils +""" +from gufe.components import SmallMoleculeComponent, ProteinComponent, SolventComponent +from feflow.utils.misc import get_typed_components, register_ff_parameters_template + +def test_get_typed_components_vacuum(benzene_vacuum_system): + """Test extracting typed components from a vacuum phase chemical system. + One that only has a SmallMoleculeComponent. + """ + small_mol_comps = get_typed_components(benzene_vacuum_system, SmallMoleculeComponent) + protein_comps = get_typed_components(benzene_vacuum_system, ProteinComponent) + solvent_comps = get_typed_components(benzene_vacuum_system, SolventComponent) + + assert len(small_mol_comps) == 1, f"Expected one (1) small molecule component in solvent system. Found {len(small_mol_comps)}" + assert len(protein_comps) == 0, "Found protein component(s) in vacuum system. Expected none." + assert len(solvent_comps) == 0, "Found solvent component(s) in vacuum system. Expected none." + + +def test_get_typed_components_solvent(benzene_solvent_system): + """Test extracting typed components from a solvent phase chemical system. + One that has a single SmallMoleculeComponent and a single SolventComponent. + """ + small_mol_comps = get_typed_components(benzene_solvent_system, SmallMoleculeComponent) + protein_comps = get_typed_components(benzene_solvent_system, ProteinComponent) + solvent_comps = get_typed_components(benzene_solvent_system, SolventComponent) + + assert len(small_mol_comps) == 1, f"Expected one (1) small molecule component in vacuum system. Found {len(small_mol_comps)}." + assert len(protein_comps) == 0, "Found protein component(s) in solvent system. Expected none." + assert len(solvent_comps) == 1, f"Expected one (1) solvent component in solvent system. Found {len(solvent_comps)}." + + + +def test_register_ff_parameters_template(toluene_solvent_system, short_settings, tmp_path): + from openff.toolkit import Molecule + from openfe.protocols.openmm_utils import system_creation + from openmmforcefields.generators import SystemGenerator + from feflow.settings import OpenFFPartialChargeSettings as ChargeSettings + from openfe.protocols.openmm_utils.system_validation import get_components + + solvent_comp, receptor_comp, small_mols_a = get_components(toluene_solvent_system) + + system_generator = system_creation.get_system_generator( + forcefield_settings=short_settings.forcefield_settings, + thermo_settings=short_settings.thermo_settings, + integrator_settings=short_settings.integrator_settings, + has_solvent=solvent_comp is not None, + cache=tmp_path, + ) + + system_generator = SystemGenerator(small_molecule_forcefield="openff-2.1.0") + charge_settings = ChargeSettings( + partial_charge_method="am1bcc", + off_toolkit_backend="ambertools", + number_of_conformers=1, + nagl_model=None + ) + openff_mols = [Molecule.from_smiles("CCO"), Molecule.from_smiles("CCN")] + register_ff_parameters_template(system_generator, charge_settings, openff_mols) \ No newline at end of file diff --git a/feflow/utils/misc.py b/feflow/utils/misc.py new file mode 100644 index 0000000..3699944 --- /dev/null +++ b/feflow/utils/misc.py @@ -0,0 +1,103 @@ +""" +Miscellaneous utility functions to extract data from gufe objects (and others) +""" + +from typing import Type +import gufe + + +# TODO: should this be a method for the gufe.ChemicalSystem class? +def get_typed_components(system: gufe.ChemicalSystem, comptype: Type[gufe.Component]) -> set[ + gufe.Component]: + """ + Retrieve all components of a specific type from a `gufe.ChemicalSystem`. + + This function searches the components within the provided chemical system + and returns a list of all components matching the specified type. + + Parameters + ---------- + system : gufe.ChemicalSystem + The chemical system from which to extract components. + comptype : Type[gufe.Component] + The type of component to search for, such as `ProteinComponent`, + `SmallMoleculeComponent`, or `SolventComponent`. + + Returns + ------- + set[gufe.Component] + A set of unique components matching the specified type. If no components + of the given type are found, an empty set is returned. + + """ + if not issubclass(comptype, gufe.Component): + raise TypeError(f"`comptype` must be a subclass of `gufe.Component`. Got: {comptype}") + + ret_comps = {comp for comp in system.values() + if isinstance(comp, comptype)} + + return ret_comps + + +def register_ff_parameters_template(system_generator, charge_settings, openff_mols): + """ + Register force field parameters in the system generator using provided charge settings + and OpenFF molecules. + + This utility function assigns partial charges to the specified OpenFF molecules using + the provided charge settings, then forces the creation of force field parameters by + registering the templates with the system generator. This ensures the required force field + templates are available prior to solvating the system. + + Parameters + ---------- + system_generator : openmmforcefields.generators.SystemGenerator + The system generator used to create force field parameters for the molecules. + charge_settings : feflow.settings.ChargeSettings + Settings for partial charge assignment, including the method, toolkit backend, + number of conformers to generate, and optional NAGL model. + openff_mols : list[openff.toolkit.Molecule] + List of OpenFF molecules for which force field parameters are registered. + + Notes + ----- + - Partial charges are assigned to the molecules using the OpenFF Toolkit based on the + specified `charge_settings`. + - Force field templates are registered by creating a system for each molecule with + the system generator. This is necessary to ensure templates are available before + solvating or otherwise processing the system. + + Examples + -------- + >>> from openmmforcefields.generators import SystemGenerator + >>> from openff.toolkit import Molecule + >>> from feflow.settings import OpenFFPartialChargeSettings as ChargeSettings + >>> + >>> system_generator = SystemGenerator(small_molecule_forcefield="openff-2.1.0") + >>> charge_settings = ChargeSettings( + >>> partial_charge_method="am1bcc", + >>> off_toolkit_backend="openeye", + >>> number_of_conformers=1, + >>> nagl_model=None + >>> ) + >>> openff_mols = [Molecule.from_smiles("CCO"), Molecule.from_smiles("CCN")] + >>> register_ff_parameters_template(system_generator, charge_settings, openff_mols) + """ + from feflow.utils.charge import assign_offmol_partial_charges + + # Assign partial charges to all small mols -- we use openff for that + for mol in openff_mols: + assign_offmol_partial_charges( + offmol=mol, + overwrite=False, + method=charge_settings.partial_charge_method, + toolkit_backend=charge_settings.off_toolkit_backend, + generate_n_conformers=charge_settings.number_of_conformers, + nagl_model=charge_settings.nagl_model, + ) + # Force the creation of parameters + # This is necessary because we need to have the FF templates + # registered ahead of solvating the system. + system_generator.create_system( + mol.to_topology().to_openmm(), molecules=[mol] + ) From 8fd3877043e1200e80677763c9da406b6a07d391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Pulido?= <2949729+ijpulidos@users.noreply.github.com> Date: Wed, 29 Jan 2025 19:01:40 -0500 Subject: [PATCH 06/11] Adding miscellaneous utility functions --- feflow/utils/misc.py | 109 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/feflow/utils/misc.py b/feflow/utils/misc.py index 3699944..6ff533d 100644 --- a/feflow/utils/misc.py +++ b/feflow/utils/misc.py @@ -101,3 +101,112 @@ def register_ff_parameters_template(system_generator, charge_settings, openff_mo system_generator.create_system( mol.to_topology().to_openmm(), molecules=[mol] ) + + +# TODO: Maybe this needs to be in another module with a more telling name. Also, overkill? +def generate_omm_top_from_component(comp: gufe.SmallMoleculeComponent | gufe.ProteinComponent): + """ + Generate an OpenMM `Topology` object from a given `SmallMoleculeComponent` or + `ProteinComponent`. + + This function attempts to generate an OpenMM `Topology` object from the provided + component. It handles both components that directly support conversion to an + OpenMM topology (`to_openmm_topology`) and those that require an intermediate + conversion through OpenFF (`to_openff().to_topology().to_openmm()`). + + Parameters + ---------- + comp : gufe.SmallMoleculeComponent | gufe.ProteinComponent + The component to be converted into an OpenMM `Topology`. Supported components include + `SmallMoleculeComponent` and `ProteinComponent`. + + Returns + ------- + openmm.app.Topology + The corresponding OpenMM `Topology` object for the given component. + + Raises + ------ + AttributeError + If the component does not support the necessary conversion methods. + """ + + try: + topology = comp.to_openmm_topology() + except AttributeError: + topology = comp.to_openff().to_topology().to_openmm() + + return topology + + +def get_positions_from_component(comp: gufe.SmallMoleculeComponent | gufe.ProteinComponent): + """ + Retrieve the positions of atoms in a component as an OpenMM Quantity. + + This function tries to get the atomic positions from the component. If the component has + a method `to_openmm_positions()`, it uses that to fetch the positions. If the component + doesn't have that method (i.e., it doesn't support OpenMM directly), it falls back to + extracting the positions from the OpenFF conformers and takes the first conformer. + + Parameters + ---------- + comp : gufe.SmallMoleculeComponent | gufe.ProteinComponent + The component (small molecule or protein) for which atomic positions are required. + + Returns + ------- + openmm.Quantity + A quantity representing the atomic positions in OpenMM format. + + Raises + ------ + AttributeError + If neither `to_openmm_positions()` nor OpenFF conformers are available. + """ + # NOTE: Could potentially be done with rdkit if we want to rely solely on it, something like: + # # Retrieve the first conformer (if multiple conformers exist) + # mol = comp.to_rdkit() + # conformer = mol.GetConformer(0) + # conformer.GetPositions() + from openff.units import ensure_quantity + + try: + positions = comp.to_openmm_positions() + except AttributeError: + positions = comp.to_openff().conformers[0] + + return ensure_quantity(positions, "openmm") + + +# TODO: This is probably something that should go in openmmtools/openmm +def get_residue_index_from_atom_index(topology, atom_index): + """ + Retrieve the residue index for a given atom index in an OpenMM topology. + + This function iterates through the residues and their atoms in the topology + to locate the residue that contains the specified atom index. + + Parameters + ---------- + topology : openmm.app.Topology + The OpenMM topology object containing residues and atoms. + atom_index : int + The index of the atom whose residue ID is to be found. + + Returns + ------- + int + The index of the residue that contains the specified atom. + + Raises + ------ + ValueError + If the atom index is not found in the topology. + """ + for residue in topology.residues(): + for atom in residue.atoms(): + if atom.index == atom_index: + return residue.index + + # If the loop completes without finding the atom, raise the ValueError + raise ValueError(f"Atom index {atom_index} not found in topology.") \ No newline at end of file From ad4ef62048ff3cc3f2a457c16387aa63a5aa844e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Pulido?= <2949729+ijpulidos@users.noreply.github.com> Date: Wed, 29 Jan 2025 19:05:22 -0500 Subject: [PATCH 07/11] Using new utility funcs from both feflow and openfe (branch) --- feflow/protocols/nonequilibrium_cycling.py | 124 ++++++++------------- feflow/tests/test_hybrid_topology.py | 2 +- 2 files changed, 47 insertions(+), 79 deletions(-) diff --git a/feflow/protocols/nonequilibrium_cycling.py b/feflow/protocols/nonequilibrium_cycling.py index e309846..f25f176 100644 --- a/feflow/protocols/nonequilibrium_cycling.py +++ b/feflow/protocols/nonequilibrium_cycling.py @@ -9,6 +9,7 @@ import pickle import time +from gufe import SolventComponent, ProteinComponent from gufe.settings import Settings from gufe.chemicalsystem import ChemicalSystem from gufe.mapping import ComponentMapping @@ -31,6 +32,8 @@ from ..settings import NonEquilibriumCyclingSettings from ..utils.data import serialize, deserialize +from ..utils.misc import generate_omm_top_from_component, get_residue_index_from_atom_index, \ + get_positions_from_component # Specific instance of logger for this module logger = logging.getLogger(__name__) @@ -175,9 +178,10 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): from openmmtools.integrators import PeriodicNonequilibriumIntegrator from gufe.components import SmallMoleculeComponent from openfe.protocols.openmm_rfe import _rfe_utils - from openfe.protocols.openmm_utils.system_validation import get_components + from openfe.protocols.openmm_utils.system_validation import get_alchemical_components from feflow.utils.hybrid_topology import HybridTopologyFactory from feflow.utils.charge import get_alchemical_charge_difference + from feflow.utils.misc import get_typed_components, register_ff_parameters_template # Check compatibility between states (same receptor and solvent) self._check_states_compatibility(state_a, state_b) @@ -187,18 +191,12 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): ) # infer phase from systems and components # Get receptor components from systems if found (None otherwise) - # TODO: test that this work with components from protein mutations, including protein protein interactions. - solvent_comp, receptor_comp, small_mols_a = get_components(state_a) - - # Get ligand/small-mol components - # TODO: Is it be possible to infer the alchemical component from mapping? We would only - # provide a mapping for components that are going through alchemical transformations. - # Mapping wouldn't be a LigandAtomMapping but rather a ProteinMapping of some sort. - # Right now we are assuming that the ligand is the alchemical component, but we don't have to - # hardcode that. - ligand_mapping = mapping - ligand_a = ligand_mapping.componentA - ligand_b = ligand_mapping.componentB + solvent_comp_a = get_typed_components(state_a, SolventComponent) + protein_comps_a = get_typed_components(state_a, ProteinComponent) + small_mols_a = get_typed_components(state_a, SmallMoleculeComponent) + + # Get alchemical components + alchemical_comps = get_alchemical_components(state_a, state_b) # TODO: Do we need to change something in the settings? Does the Protein mutation protocol require specific settings? # Get all the relevant settings @@ -217,60 +215,31 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): else: ffcache = None - system_generator = system_creation.get_system_generator( forcefield_settings=forcefield_settings, thermo_settings=thermodynamic_settings, integrator_settings=integrator_settings, cache=ffcache, - has_solvent=solvent_comp is not None, + has_solvent=bool(solvent_comp_a), ) # Parameterizing small molecules self.logger.info("Parameterizing molecules") - # The following creates a dictionary with all the small molecules in the states, with the structure: - # Dict[SmallMoleculeComponent, openff.toolkit.Molecule] - # TODO: This should rely again on getting the alchemical components from the mapping - # Alchemical small mols - alchemical_small_mols_a = {ligand_a: ligand_a.to_openff()} - alchemical_small_mols_b = {ligand_b: ligand_b.to_openff()} - all_alchemical_mols = alchemical_small_mols_a | alchemical_small_mols_b - # non-alchemical common small mols - common_small_mols = {} - for comp in state_a.components.values(): - # TODO: Refactor if/when gufe provides the functionality https://github.com/OpenFreeEnergy/gufe/issues/251 - # NOTE: This relies on gufe key for "equality", important to keep in mind - if ( - isinstance(comp, SmallMoleculeComponent) - and comp not in all_alchemical_mols - ): - common_small_mols[comp] = comp.to_openff() - - # TODO: We should parametrize all the small mols anyway. Shouldn't change in protein mutation. - # Assign partial charges to all small mols - all_openff_mols = list( - chain(all_alchemical_mols.values(), common_small_mols.values()) - ) - self._assign_openff_partial_charges( - charge_settings=charge_settings, off_small_mols=all_openff_mols - ) + # Get small molecules from states + # TODO: Refactor if/when gufe provides the functionality https://github.com/OpenFreeEnergy/gufe/issues/251 + state_a_small_mols = get_typed_components(state_a, SmallMoleculeComponent) + state_b_small_mols = get_typed_components(state_b, SmallMoleculeComponent) + all_small_mols = state_a_small_mols | state_b_small_mols - # Force the creation of parameters - # This is necessary because we need to have the FF templates - # registered ahead of solvating the system. - for off_mol in all_openff_mols: - system_generator.create_system( - off_mol.to_topology().to_openmm(), molecules=[off_mol] - ) + # Generate and register FF parameters in the system generator template + all_openff_mols = [comp.to_openff() for comp in all_small_mols] + register_ff_parameters_template(system_generator, charge_settings, all_openff_mols) - # TODO: get_omm_modeller would need to be adapted to deal with protein mutation cases. ex. protein-protein. - # Protein comps no longer optional, but others could be. How to generalize this? - # Refactor function to handle multiple protein components, similarly to what we do with small mols. # c. get OpenMM Modeller + a dictionary of resids for each component - state_a_modeller, comp_resids = system_creation.get_omm_modeller( - protein_comp=receptor_comp, - solvent_comp=solvent_comp, - small_mols=alchemical_small_mols_a | common_small_mols, + state_a_modeller, _ = system_creation.get_omm_modeller( + protein_comps=protein_comps_a, + solvent_comp=solvent_comp_a, + small_mols=small_mols_a, omm_forcefield=system_generator.forcefield, solvent_settings=solvation_settings, ) @@ -280,41 +249,44 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): state_a_topology = state_a_modeller.getTopology() state_a_positions = to_openmm(from_openmm(state_a_modeller.getPositions())) - # TODO: system_generator and openmmforcefields don't really know how to deal with empty lists, but they use None. - # We would need to refactor accordingly, maybe we need yet another helper function here. Refactoring omm forcefields is unlikely. # e. create the stateA System + # Note: If there are no small mols ommffs requires a None state_a_system = system_generator.create_system( state_a_modeller.topology, - molecules=list( - chain(alchemical_small_mols_a.values(), common_small_mols.values()) - ), + molecules=[mol.to_openff() for mol in + state_a_small_mols] if state_a_small_mols else None, ) # 2. Get stateB system - # a. get the topology + # a. Generate topology reusing state A topology as possible + # Note: We are only dealing with single alchemical components + state_b_alchem_top = generate_omm_top_from_component(alchemical_comps["stateB"][0]) + state_b_alchem_pos = get_positions_from_component(alchemical_comps["stateB"][0]) + # We get the residue index from the mapping unique atom indices + # NOTE: We assume single residue/point/component mutation here + state_a_alchem_resindex = [get_residue_index_from_atom_index(state_a_topology, + next(mapping.componentA_unique))] ( state_b_topology, state_b_alchem_resids, ) = _rfe_utils.topologyhelpers.combined_topology( state_a_topology, - ligand_b.to_openff().to_topology().to_openmm(), - exclude_resids=comp_resids[ligand_a], + state_b_alchem_top, + exclude_resids=iter(state_a_alchem_resindex), ) state_b_system = system_generator.create_system( state_b_topology, - molecules=list( - chain(alchemical_small_mols_b.values(), common_small_mols.values()) - ), + molecules=[mol.to_openff() for mol in state_b_small_mols], ) - # c. Define correspondence mappings between the two systems - # TODO: According to docs this should already work. Double check. + # TODO: This doesn't have to be a ligand mapping. i.e. for protein mutation. + # c. Define correspondence mappings between the two systems ligand_mappings = _rfe_utils.topologyhelpers.get_system_mappings( mapping.componentA_to_componentB, state_a_system, state_a_topology, - comp_resids[ligand_a], + state_a_alchem_resindex, state_b_system, state_b_topology, state_b_alchem_resids, @@ -322,7 +294,6 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): fix_constraints=True, ) - # TODO: This should also be working as is. Maybe review docstring in openfe function. # Handle charge corrections/transformations # Get the change difference between the end states # and check if the charge correction used is appropriate @@ -330,10 +301,10 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): mapping, forcefield_settings.nonbonded_method, alchemical_settings.explicit_charge_correction, - solvent_comp, + # TODO: I don't understand why this isn't erroring when it's vacuum leg. review + solvent_comp_a, # Solvent comp in a is expected to be the same as in b ) - # TODO: Should also already work as is... if alchemical_settings.explicit_charge_correction: alchem_water_resids = _rfe_utils.topologyhelpers.get_alchemical_waters( state_a_topology, @@ -347,19 +318,16 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): state_b_system, ligand_mappings, charge_difference, - solvent_comp, + solvent_comp_a, ) - # TODO: According to docs this should work as well. - # d. Finally get the positions + # d. Finally get the positions state_b_positions = _rfe_utils.topologyhelpers.set_and_check_new_positions( ligand_mappings, state_a_topology, state_b_topology, old_positions=ensure_quantity(state_a_positions, "openmm"), - insert_positions=ensure_quantity( - ligand_b.to_openff().conformers[0], "openmm" - ), + insert_positions=state_b_alchem_pos, ) # TODO: handle the literals directly in the HTF object (issue #42) diff --git a/feflow/tests/test_hybrid_topology.py b/feflow/tests/test_hybrid_topology.py index 429f11c..fdc6d4f 100644 --- a/feflow/tests/test_hybrid_topology.py +++ b/feflow/tests/test_hybrid_topology.py @@ -247,7 +247,7 @@ def tip4p_benzene_to_toluene_htf( # Create state A model & get relevant OpenMM objects benz_model, comp_resids = system_creation.get_omm_modeller( - protein_comp=None, + protein_comps=None, solvent_comp=SolventComponent(), small_mols={benzene: benz_off}, omm_forcefield=tip4p_system_generator.forcefield, From 7b8ab831464267e48da67b632dbde75713b4886a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 30 Jan 2025 17:34:14 +0000 Subject: [PATCH 08/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- feflow/protocols/nonequilibrium_cycling.py | 38 ++++++++++++++----- feflow/tests/test_utils.py | 43 ++++++++++++++++------ feflow/utils/misc.py | 26 +++++++------ 3 files changed, 74 insertions(+), 33 deletions(-) diff --git a/feflow/protocols/nonequilibrium_cycling.py b/feflow/protocols/nonequilibrium_cycling.py index f25f176..52ffd55 100644 --- a/feflow/protocols/nonequilibrium_cycling.py +++ b/feflow/protocols/nonequilibrium_cycling.py @@ -32,8 +32,11 @@ from ..settings import NonEquilibriumCyclingSettings from ..utils.data import serialize, deserialize -from ..utils.misc import generate_omm_top_from_component, get_residue_index_from_atom_index, \ - get_positions_from_component +from ..utils.misc import ( + generate_omm_top_from_component, + get_residue_index_from_atom_index, + get_positions_from_component, +) # Specific instance of logger for this module logger = logging.getLogger(__name__) @@ -178,10 +181,15 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): from openmmtools.integrators import PeriodicNonequilibriumIntegrator from gufe.components import SmallMoleculeComponent from openfe.protocols.openmm_rfe import _rfe_utils - from openfe.protocols.openmm_utils.system_validation import get_alchemical_components + from openfe.protocols.openmm_utils.system_validation import ( + get_alchemical_components, + ) from feflow.utils.hybrid_topology import HybridTopologyFactory from feflow.utils.charge import get_alchemical_charge_difference - from feflow.utils.misc import get_typed_components, register_ff_parameters_template + from feflow.utils.misc import ( + get_typed_components, + register_ff_parameters_template, + ) # Check compatibility between states (same receptor and solvent) self._check_states_compatibility(state_a, state_b) @@ -233,7 +241,9 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): # Generate and register FF parameters in the system generator template all_openff_mols = [comp.to_openff() for comp in all_small_mols] - register_ff_parameters_template(system_generator, charge_settings, all_openff_mols) + register_ff_parameters_template( + system_generator, charge_settings, all_openff_mols + ) # c. get OpenMM Modeller + a dictionary of resids for each component state_a_modeller, _ = system_creation.get_omm_modeller( @@ -253,19 +263,27 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs): # Note: If there are no small mols ommffs requires a None state_a_system = system_generator.create_system( state_a_modeller.topology, - molecules=[mol.to_openff() for mol in - state_a_small_mols] if state_a_small_mols else None, + molecules=( + [mol.to_openff() for mol in state_a_small_mols] + if state_a_small_mols + else None + ), ) # 2. Get stateB system # a. Generate topology reusing state A topology as possible # Note: We are only dealing with single alchemical components - state_b_alchem_top = generate_omm_top_from_component(alchemical_comps["stateB"][0]) + state_b_alchem_top = generate_omm_top_from_component( + alchemical_comps["stateB"][0] + ) state_b_alchem_pos = get_positions_from_component(alchemical_comps["stateB"][0]) # We get the residue index from the mapping unique atom indices # NOTE: We assume single residue/point/component mutation here - state_a_alchem_resindex = [get_residue_index_from_atom_index(state_a_topology, - next(mapping.componentA_unique))] + state_a_alchem_resindex = [ + get_residue_index_from_atom_index( + state_a_topology, next(mapping.componentA_unique) + ) + ] ( state_b_topology, state_b_alchem_resids, diff --git a/feflow/tests/test_utils.py b/feflow/tests/test_utils.py index cdae9b6..0eb6120 100644 --- a/feflow/tests/test_utils.py +++ b/feflow/tests/test_utils.py @@ -1,37 +1,56 @@ """ Module to test utility functions in feflow.utils """ + from gufe.components import SmallMoleculeComponent, ProteinComponent, SolventComponent from feflow.utils.misc import get_typed_components, register_ff_parameters_template + def test_get_typed_components_vacuum(benzene_vacuum_system): """Test extracting typed components from a vacuum phase chemical system. One that only has a SmallMoleculeComponent. """ - small_mol_comps = get_typed_components(benzene_vacuum_system, SmallMoleculeComponent) + small_mol_comps = get_typed_components( + benzene_vacuum_system, SmallMoleculeComponent + ) protein_comps = get_typed_components(benzene_vacuum_system, ProteinComponent) solvent_comps = get_typed_components(benzene_vacuum_system, SolventComponent) - assert len(small_mol_comps) == 1, f"Expected one (1) small molecule component in solvent system. Found {len(small_mol_comps)}" - assert len(protein_comps) == 0, "Found protein component(s) in vacuum system. Expected none." - assert len(solvent_comps) == 0, "Found solvent component(s) in vacuum system. Expected none." + assert ( + len(small_mol_comps) == 1 + ), f"Expected one (1) small molecule component in solvent system. Found {len(small_mol_comps)}" + assert ( + len(protein_comps) == 0 + ), "Found protein component(s) in vacuum system. Expected none." + assert ( + len(solvent_comps) == 0 + ), "Found solvent component(s) in vacuum system. Expected none." def test_get_typed_components_solvent(benzene_solvent_system): """Test extracting typed components from a solvent phase chemical system. One that has a single SmallMoleculeComponent and a single SolventComponent. """ - small_mol_comps = get_typed_components(benzene_solvent_system, SmallMoleculeComponent) + small_mol_comps = get_typed_components( + benzene_solvent_system, SmallMoleculeComponent + ) protein_comps = get_typed_components(benzene_solvent_system, ProteinComponent) solvent_comps = get_typed_components(benzene_solvent_system, SolventComponent) - assert len(small_mol_comps) == 1, f"Expected one (1) small molecule component in vacuum system. Found {len(small_mol_comps)}." - assert len(protein_comps) == 0, "Found protein component(s) in solvent system. Expected none." - assert len(solvent_comps) == 1, f"Expected one (1) solvent component in solvent system. Found {len(solvent_comps)}." - + assert ( + len(small_mol_comps) == 1 + ), f"Expected one (1) small molecule component in vacuum system. Found {len(small_mol_comps)}." + assert ( + len(protein_comps) == 0 + ), "Found protein component(s) in solvent system. Expected none." + assert ( + len(solvent_comps) == 1 + ), f"Expected one (1) solvent component in solvent system. Found {len(solvent_comps)}." -def test_register_ff_parameters_template(toluene_solvent_system, short_settings, tmp_path): +def test_register_ff_parameters_template( + toluene_solvent_system, short_settings, tmp_path +): from openff.toolkit import Molecule from openfe.protocols.openmm_utils import system_creation from openmmforcefields.generators import SystemGenerator @@ -53,7 +72,7 @@ def test_register_ff_parameters_template(toluene_solvent_system, short_settings, partial_charge_method="am1bcc", off_toolkit_backend="ambertools", number_of_conformers=1, - nagl_model=None + nagl_model=None, ) openff_mols = [Molecule.from_smiles("CCO"), Molecule.from_smiles("CCN")] - register_ff_parameters_template(system_generator, charge_settings, openff_mols) \ No newline at end of file + register_ff_parameters_template(system_generator, charge_settings, openff_mols) diff --git a/feflow/utils/misc.py b/feflow/utils/misc.py index 6ff533d..ab4f97d 100644 --- a/feflow/utils/misc.py +++ b/feflow/utils/misc.py @@ -7,8 +7,9 @@ # TODO: should this be a method for the gufe.ChemicalSystem class? -def get_typed_components(system: gufe.ChemicalSystem, comptype: Type[gufe.Component]) -> set[ - gufe.Component]: +def get_typed_components( + system: gufe.ChemicalSystem, comptype: type[gufe.Component] +) -> set[gufe.Component]: """ Retrieve all components of a specific type from a `gufe.ChemicalSystem`. @@ -31,10 +32,11 @@ def get_typed_components(system: gufe.ChemicalSystem, comptype: Type[gufe.Compon """ if not issubclass(comptype, gufe.Component): - raise TypeError(f"`comptype` must be a subclass of `gufe.Component`. Got: {comptype}") + raise TypeError( + f"`comptype` must be a subclass of `gufe.Component`. Got: {comptype}" + ) - ret_comps = {comp for comp in system.values() - if isinstance(comp, comptype)} + ret_comps = {comp for comp in system.values() if isinstance(comp, comptype)} return ret_comps @@ -98,13 +100,13 @@ def register_ff_parameters_template(system_generator, charge_settings, openff_mo # Force the creation of parameters # This is necessary because we need to have the FF templates # registered ahead of solvating the system. - system_generator.create_system( - mol.to_topology().to_openmm(), molecules=[mol] - ) + system_generator.create_system(mol.to_topology().to_openmm(), molecules=[mol]) # TODO: Maybe this needs to be in another module with a more telling name. Also, overkill? -def generate_omm_top_from_component(comp: gufe.SmallMoleculeComponent | gufe.ProteinComponent): +def generate_omm_top_from_component( + comp: gufe.SmallMoleculeComponent | gufe.ProteinComponent, +): """ Generate an OpenMM `Topology` object from a given `SmallMoleculeComponent` or `ProteinComponent`. @@ -139,7 +141,9 @@ def generate_omm_top_from_component(comp: gufe.SmallMoleculeComponent | gufe.Pro return topology -def get_positions_from_component(comp: gufe.SmallMoleculeComponent | gufe.ProteinComponent): +def get_positions_from_component( + comp: gufe.SmallMoleculeComponent | gufe.ProteinComponent, +): """ Retrieve the positions of atoms in a component as an OpenMM Quantity. @@ -209,4 +213,4 @@ def get_residue_index_from_atom_index(topology, atom_index): return residue.index # If the loop completes without finding the atom, raise the ValueError - raise ValueError(f"Atom index {atom_index} not found in topology.") \ No newline at end of file + raise ValueError(f"Atom index {atom_index} not found in topology.") From 08aedfe147230d6171af5a9fc75333a99880c74d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Pulido?= <2949729+ijpulidos@users.noreply.github.com> Date: Thu, 30 Jan 2025 13:34:36 -0500 Subject: [PATCH 09/11] Using protein mutation support openfe branch for now --- devtools/conda-envs/test_env.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 7a115d3..2d754f1 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -14,6 +14,9 @@ dependencies: - pymbar ~=3.0 - pydantic >=1.10.17 - python + # openfe branch with protein mutation support (TEMPORARY) + - pip: + - "git+https://github.com/OpenFreeEnergy/openfe.git@protein-mutation-support" # Testing (optional deps) - espaloma_charge # To us Espaloma FF in tests From 9ad227a777b02da97f55be34decd62093359a470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Pulido?= <2949729+ijpulidos@users.noreply.github.com> Date: Thu, 30 Jan 2025 13:42:30 -0500 Subject: [PATCH 10/11] remove conda-forge openfe dependency for now --- devtools/conda-envs/test_env.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 2d754f1..e540b10 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -6,7 +6,7 @@ dependencies: # Base depends - gufe >=0.9.5 - numpy - - openfe >=1.0 # TODO: Remove once we don't depend on openfe +# - openfe >=1.0 # TODO: Remove once we don't depend on openfe - openff-units - openmm - openmmforcefields >=0.14.1 # TODO: remove when upstream deps fix this From 5134755ef1177fe4bbffe2d48c36c7b0e9f5bf3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Pulido?= <2949729+ijpulidos@users.noreply.github.com> Date: Thu, 30 Jan 2025 15:57:49 -0500 Subject: [PATCH 11/11] Adding temporary dependencies --- devtools/conda-envs/test_env.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index e540b10..7b28a1d 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -17,6 +17,9 @@ dependencies: # openfe branch with protein mutation support (TEMPORARY) - pip: - "git+https://github.com/OpenFreeEnergy/openfe.git@protein-mutation-support" + # Dependencies for openfe branch (temporary) + - lomap2 + - kartograf # Testing (optional deps) - espaloma_charge # To us Espaloma FF in tests