OpenFreeEnergy · ijpulidos · Nov 7, 2024 · Nov 20, 2024 · Nov 20, 2024 · Dec 5, 2024
diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml
@@ -6,13 +6,20 @@ dependencies:
     # Base depends
   - gufe >=0.9.5
   - numpy
-  - openfe >=0.15  # TODO: Remove once we don't depend on openfe
+#  - openfe >=1.0  # TODO: Remove once we don't depend on openfe
   - openff-units
   - openmm
   - openmmforcefields >=0.14.1  # TODO: remove when upstream deps fix this
-  - pymbar <4
+  - openmmtools >=0.23.0
+  - pymbar ~=3.0
   - pydantic >=1.10.17
   - python
+    # openfe branch with protein mutation support (TEMPORARY)
+  - pip:
+      - "git+https://github.com/OpenFreeEnergy/openfe.git@protein-mutation-support"
+    # Dependencies for openfe branch (temporary)
+  - lomap2
+  - kartograf
 
     # Testing (optional deps)
   - espaloma_charge  # To us Espaloma FF in tests

diff --git a/feflow/protocols/nonequilibrium_cycling.py b/feflow/protocols/nonequilibrium_cycling.py
@@ -9,6 +9,7 @@
 import pickle
 import time
 
+from gufe import SolventComponent, ProteinComponent
 from gufe.settings import Settings
 from gufe.chemicalsystem import ChemicalSystem
 from gufe.mapping import ComponentMapping
@@ -31,6 +32,11 @@
 
 from ..settings import NonEquilibriumCyclingSettings
 from ..utils.data import serialize, deserialize
+from ..utils.misc import (
+    generate_omm_top_from_component,
+    get_residue_index_from_atom_index,
+    get_positions_from_component,
+)
 
 # Specific instance of logger for this module
 logger = logging.getLogger(__name__)
@@ -175,9 +181,15 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
         from openmmtools.integrators import PeriodicNonequilibriumIntegrator
         from gufe.components import SmallMoleculeComponent
         from openfe.protocols.openmm_rfe import _rfe_utils
-        from openfe.protocols.openmm_utils.system_validation import get_components
+        from openfe.protocols.openmm_utils.system_validation import (
+            get_alchemical_components,
+        )
         from feflow.utils.hybrid_topology import HybridTopologyFactory
         from feflow.utils.charge import get_alchemical_charge_difference
+        from feflow.utils.misc import (
+            get_typed_components,
+            register_ff_parameters_template,
+        )
 
         # Check compatibility between states (same receptor and solvent)
         self._check_states_compatibility(state_a, state_b)
@@ -187,13 +199,14 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
         )  # infer phase from systems and components
 
         # Get receptor components from systems if found (None otherwise)
-        solvent_comp, receptor_comp, small_mols_a = get_components(state_a)
+        solvent_comp_a = get_typed_components(state_a, SolventComponent)
+        protein_comps_a = get_typed_components(state_a, ProteinComponent)
+        small_mols_a = get_typed_components(state_a, SmallMoleculeComponent)
 
-        # Get ligand/small-mol components
-        ligand_mapping = mapping
-        ligand_a = ligand_mapping.componentA
-        ligand_b = ligand_mapping.componentB
+        # Get alchemical components
+        alchemical_comps = get_alchemical_components(state_a, state_b)
 
+        # TODO: Do we need to change something in the settings? Does the Protein mutation protocol require specific settings?
         # Get all the relevant settings
         settings: NonEquilibriumCyclingSettings = protocol.settings
         # Get settings for system generator
@@ -215,49 +228,28 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
             thermo_settings=thermodynamic_settings,
             integrator_settings=integrator_settings,
             cache=ffcache,
-            has_solvent=solvent_comp is not None,
+            has_solvent=bool(solvent_comp_a),
         )
 
         # Parameterizing small molecules
         self.logger.info("Parameterizing molecules")
-        # The following creates a dictionary with all the small molecules in the states, with the structure:
-        #    Dict[SmallMoleculeComponent, openff.toolkit.Molecule]
-        # Alchemical small mols
-        alchemical_small_mols_a = {ligand_a: ligand_a.to_openff()}
-        alchemical_small_mols_b = {ligand_b: ligand_b.to_openff()}
-        all_alchemical_mols = alchemical_small_mols_a | alchemical_small_mols_b
-        # non-alchemical common small mols
-        common_small_mols = {}
-        for comp in state_a.components.values():
-            # TODO: Refactor if/when gufe provides the functionality https://github.com/OpenFreeEnergy/gufe/issues/251
-            # NOTE: This relies on gufe key for "equality", important to keep in mind
-            if (
-                isinstance(comp, SmallMoleculeComponent)
-                and comp not in all_alchemical_mols
-            ):
-                common_small_mols[comp] = comp.to_openff()
-
-        # Assign partial charges to all small mols
-        all_openff_mols = list(
-            chain(all_alchemical_mols.values(), common_small_mols.values())
+        # Get small molecules from states
+        # TODO: Refactor if/when gufe provides the functionality https://github.com/OpenFreeEnergy/gufe/issues/251
+        state_a_small_mols = get_typed_components(state_a, SmallMoleculeComponent)
+        state_b_small_mols = get_typed_components(state_b, SmallMoleculeComponent)
+        all_small_mols = state_a_small_mols | state_b_small_mols
+
+        # Generate and register FF parameters in the system generator template
+        all_openff_mols = [comp.to_openff() for comp in all_small_mols]
+        register_ff_parameters_template(
+            system_generator, charge_settings, all_openff_mols
         )
-        self._assign_openff_partial_charges(
-            charge_settings=charge_settings, off_small_mols=all_openff_mols
-        )
-
-        # Force the creation of parameters
-        # This is necessary because we need to have the FF templates
-        # registered ahead of solvating the system.
-        for off_mol in all_openff_mols:
-            system_generator.create_system(
-                off_mol.to_topology().to_openmm(), molecules=[off_mol]
-            )
 
         # c. get OpenMM Modeller + a dictionary of resids for each component
-        state_a_modeller, comp_resids = system_creation.get_omm_modeller(
-            protein_comp=receptor_comp,
-            solvent_comp=solvent_comp,
-            small_mols=alchemical_small_mols_a | common_small_mols,
+        state_a_modeller, _ = system_creation.get_omm_modeller(
+            protein_comps=protein_comps_a,
+            solvent_comp=solvent_comp_a,
+            small_mols=small_mols_a,
             omm_forcefield=system_generator.forcefield,
             solvent_settings=solvation_settings,
         )
@@ -268,37 +260,51 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
         state_a_positions = to_openmm(from_openmm(state_a_modeller.getPositions()))
 
         # e. create the stateA System
+        # Note: If there are no small mols ommffs requires a None
         state_a_system = system_generator.create_system(
             state_a_modeller.topology,
-            molecules=list(
-                chain(alchemical_small_mols_a.values(), common_small_mols.values())
+            molecules=(
+                [mol.to_openff() for mol in state_a_small_mols]
+                if state_a_small_mols
+                else None
             ),
         )
 
         # 2. Get stateB system
-        # a. get the topology
+        # a. Generate topology reusing state A topology as possible
+        # Note: We are only dealing with single alchemical components
+        state_b_alchem_top = generate_omm_top_from_component(
+            alchemical_comps["stateB"][0]
+        )
+        state_b_alchem_pos = get_positions_from_component(alchemical_comps["stateB"][0])
+        # We get the residue index from the mapping unique atom indices
+        # NOTE: We assume single residue/point/component mutation here
+        state_a_alchem_resindex = [
+            get_residue_index_from_atom_index(
+                state_a_topology, next(mapping.componentA_unique)
+            )
+        ]
         (
             state_b_topology,
             state_b_alchem_resids,
         ) = _rfe_utils.topologyhelpers.combined_topology(
             state_a_topology,
-            ligand_b.to_openff().to_topology().to_openmm(),
-            exclude_resids=comp_resids[ligand_a],
+            state_b_alchem_top,
+            exclude_resids=iter(state_a_alchem_resindex),
         )
 
         state_b_system = system_generator.create_system(
             state_b_topology,
-            molecules=list(
-                chain(alchemical_small_mols_b.values(), common_small_mols.values())
-            ),
+            molecules=[mol.to_openff() for mol in state_b_small_mols],
         )
 
-        #  c. Define correspondence mappings between the two systems
+        # TODO: This doesn't have to be a ligand mapping. i.e. for protein mutation.
+        # c. Define correspondence mappings between the two systems
         ligand_mappings = _rfe_utils.topologyhelpers.get_system_mappings(
             mapping.componentA_to_componentB,
             state_a_system,
             state_a_topology,
-            comp_resids[ligand_a],
+            state_a_alchem_resindex,
             state_b_system,
             state_b_topology,
             state_b_alchem_resids,
@@ -313,7 +319,8 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
             mapping,
             forcefield_settings.nonbonded_method,
             alchemical_settings.explicit_charge_correction,
-            solvent_comp,
+            # TODO: I don't understand why this isn't erroring when it's vacuum leg. review
+            solvent_comp_a,  # Solvent comp in a is expected to be the same as in b
         )
 
         if alchemical_settings.explicit_charge_correction:
@@ -329,18 +336,16 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
                 state_b_system,
                 ligand_mappings,
                 charge_difference,
-                solvent_comp,
+                solvent_comp_a,
             )
 
-        #  d. Finally get the positions
+        # d. Finally get the positions
         state_b_positions = _rfe_utils.topologyhelpers.set_and_check_new_positions(
             ligand_mappings,
             state_a_topology,
             state_b_topology,
             old_positions=ensure_quantity(state_a_positions, "openmm"),
-            insert_positions=ensure_quantity(
-                ligand_b.to_openff().conformers[0], "openmm"
-            ),
+            insert_positions=state_b_alchem_pos,
         )
 
         # TODO: handle the literals directly in the HTF object (issue #42)
@@ -349,6 +354,8 @@ def _execute(self, ctx, *, protocol, state_a, state_b, mapping, **inputs):
             softcore_LJ_v2 = True
         elif alchemical_settings.softcore_LJ.lower() == "beutler":
             softcore_LJ_v2 = False
+        # TODO: We need to test HTF for protein mutation cases, probably.
+        #  What are ways to quickly check an HTF is correct?
         # Now we can create the HTF from the previous objects
         hybrid_factory = HybridTopologyFactory(
             state_a_system,

diff --git a/feflow/tests/test_hybrid_topology.py b/feflow/tests/test_hybrid_topology.py
@@ -247,7 +247,7 @@ def tip4p_benzene_to_toluene_htf(
 
         # Create state A model & get relevant OpenMM objects
         benz_model, comp_resids = system_creation.get_omm_modeller(
-            protein_comp=None,
+            protein_comps=None,
             solvent_comp=SolventComponent(),
             small_mols={benzene: benz_off},
             omm_forcefield=tip4p_system_generator.forcefield,

diff --git a/feflow/tests/test_protein_mutation.py b/feflow/tests/test_protein_mutation.py
@@ -545,13 +545,13 @@ def test_proline_mutation_fails(
         ala_to_pro_mapping : LigandAtomMapping
             Mapping object representing the atom mapping from ALA to PRO.
         """
-        from feflow.utils.exceptions import MethodConstraintError
+        from feflow.utils.exceptions import MethodLimitationtError
 
         settings = ProteinMutationProtocol.default_settings()
         protocol = ProteinMutationProtocol(settings=settings)
 
         # Expect an error when trying to create the DAG with this invalid transformation
-        with pytest.raises(MethodConstraintError, match="proline.*not supported"):
+        with pytest.raises(MethodLimitationtError, match="proline.*not supported"):
             protocol.create(
                 stateA=ala_capped_system,
                 stateB=pro_capped_system,
@@ -580,13 +580,13 @@ def test_double_charge_fails(
         lys_to_glu_mapping : LigandAtomMapping
             Atom mapping defining the correspondence between atoms in the lysine and glutamate systems.
         """
-        from feflow.utils.exceptions import NotSupportedError
+        from feflow.utils.exceptions import ProtocolSupportError
 
         settings = ProteinMutationProtocol.default_settings()
         protocol = ProteinMutationProtocol(settings=settings)
 
         # Expect an error when trying to create the DAG with this invalid transformation
-        with pytest.raises(NotSupportedError, match="double charge.*not supported"):
+        with pytest.raises(ProtocolSupportError, match="double charge.*not supported"):
             protocol.create(
                 stateA=lys_capped_system,
                 stateB=glu_capped_system,

diff --git a/feflow/tests/test_utils.py b/feflow/tests/test_utils.py
@@ -0,0 +1,78 @@
+"""
+Module to test utility functions in feflow.utils
+"""
+
+from gufe.components import SmallMoleculeComponent, ProteinComponent, SolventComponent
+from feflow.utils.misc import get_typed_components, register_ff_parameters_template
+
+
+def test_get_typed_components_vacuum(benzene_vacuum_system):
+    """Test extracting typed components from a vacuum phase chemical system.
+    One that only has a SmallMoleculeComponent.
+    """
+    small_mol_comps = get_typed_components(
+        benzene_vacuum_system, SmallMoleculeComponent
+    )
+    protein_comps = get_typed_components(benzene_vacuum_system, ProteinComponent)
+    solvent_comps = get_typed_components(benzene_vacuum_system, SolventComponent)
+
+    assert (
+        len(small_mol_comps) == 1
+    ), f"Expected one (1) small molecule component in solvent system. Found {len(small_mol_comps)}"
+    assert (
+        len(protein_comps) == 0
+    ), "Found protein component(s) in vacuum system. Expected none."
+    assert (
+        len(solvent_comps) == 0
+    ), "Found solvent component(s) in vacuum system. Expected none."
+
+
+def test_get_typed_components_solvent(benzene_solvent_system):
+    """Test extracting typed components from a solvent phase chemical system.
+    One that has a single SmallMoleculeComponent and a single SolventComponent.
+    """
+    small_mol_comps = get_typed_components(
+        benzene_solvent_system, SmallMoleculeComponent
+    )
+    protein_comps = get_typed_components(benzene_solvent_system, ProteinComponent)
+    solvent_comps = get_typed_components(benzene_solvent_system, SolventComponent)
+
+    assert (
+        len(small_mol_comps) == 1
+    ), f"Expected one (1) small molecule component in vacuum system. Found {len(small_mol_comps)}."
+    assert (
+        len(protein_comps) == 0
+    ), "Found protein component(s) in solvent system. Expected none."
+    assert (
+        len(solvent_comps) == 1
+    ), f"Expected one (1) solvent component in solvent system. Found {len(solvent_comps)}."
+
+
+def test_register_ff_parameters_template(
+    toluene_solvent_system, short_settings, tmp_path
+):
+    from openff.toolkit import Molecule
+    from openfe.protocols.openmm_utils import system_creation
+    from openmmforcefields.generators import SystemGenerator
+    from feflow.settings import OpenFFPartialChargeSettings as ChargeSettings
+    from openfe.protocols.openmm_utils.system_validation import get_components
+
+    solvent_comp, receptor_comp, small_mols_a = get_components(toluene_solvent_system)
+
+    system_generator = system_creation.get_system_generator(
+        forcefield_settings=short_settings.forcefield_settings,
+        thermo_settings=short_settings.thermo_settings,
+        integrator_settings=short_settings.integrator_settings,
+        has_solvent=solvent_comp is not None,
+        cache=tmp_path,
+    )
+
+    system_generator = SystemGenerator(small_molecule_forcefield="openff-2.1.0")
+    charge_settings = ChargeSettings(
+        partial_charge_method="am1bcc",
+        off_toolkit_backend="ambertools",
+        number_of_conformers=1,
+        nagl_model=None,
+    )
+    openff_mols = [Molecule.from_smiles("CCO"), Molecule.from_smiles("CCN")]
+    register_ff_parameters_template(system_generator, charge_settings, openff_mols)