From 2b4dc4ae0ae2bbedb54fafc26f0a09ef06ce0b27 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 14 Dec 2024 15:36:21 +0100 Subject: [PATCH 01/47] Fix readme. --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 0aeb80a0..23dbef60 100644 --- a/README.rst +++ b/README.rst @@ -87,7 +87,7 @@ appropriate information for mapping angles to models. How To Cite =========== -If you use ``stk`` please cite +If you use ``cgexplore`` please cite https://github.com/andrewtarzia/CGExplore From 3ddabfea3c6eb1408e05f43f997b692b4b5fb79d Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sun, 15 Dec 2024 17:35:03 +0100 Subject: [PATCH 02/47] Fix readme. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bed808a0..046f1cb9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ ] requires-python = ">=3.11" dynamic = ["version"] -readme = "README.md" +readme = "README.rst" [project.optional-dependencies] dev = [ From 6800803231a3e6bf84c031a5a26deaeca0c33b1d Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sun, 15 Dec 2024 17:47:07 +0100 Subject: [PATCH 03/47] Fix readme. --- README.rst | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/README.rst b/README.rst index 23dbef60..48779d36 100644 --- a/README.rst +++ b/README.rst @@ -8,7 +8,7 @@ Overview ======== -:mod:`cgexplore` or ``cgx`` is a general toolkit built on +``cgexplore`` or ``cgx`` is a general toolkit built on `stk `_ for constructing, optimising and exploring molecular coarse-grained models. @@ -20,7 +20,7 @@ optimising and exploring molecular coarse-grained models. Installation ============ -:mod:`cgexplore` can be installed with pip: +``cgexplore`` can be installed with pip: .. code-block:: bash @@ -33,10 +33,10 @@ With dependancies `openmm `_ and `openmmtools `_ @@ -44,7 +44,7 @@ instructions to download and installed at Developer Setup --------------- -To develop with :mod:`cgexplore`, you can clone the repo and use +To develop with ``cgexplore``, you can clone the repo and use `just `_ to setup the dev environment: .. code-block:: bash @@ -57,7 +57,7 @@ Usage **To reproduce data in DOI: `10.1039/D3SC03991A `_**: -Download the source code from `first_paper_example - presubmission` +Download the source code from ``first_paper_example - presubmission`` release from ``Releases``.I do not guarantee that running the example code on the current version will work. However, with each pull request a test is run as a GitHub Action connected to this @@ -68,20 +68,20 @@ convention has changed and force field xml files should provide the appropriate information for mapping angles to models. -* The directory `cgexplore` contains the actual source code for the package. -* The directory `first_paper_example` contains the code for `10.1039/D3SC03991A `_. - * `generate_XX.py` generates cage structures for different topology sets - * `env_set.py` sets a specific environment for file outputs - * `plot_XX.py` produces images and figures, and performs analysis +* The directory ``cgexplore`` contains the actual source code for the package. +* The directory ``first_paper_example`` contains the code for `10.1039/D3SC03991A `_. + * ``generate_XX.py`` generates cage structures for different topology sets + * ``env_set.py`` sets a specific environment for file outputs + * ``plot_XX.py`` produces images and figures, and performs analysis .. important:: **Warning**: If you have a CUDA-capable GPU and attempt to use CUDA in the - first example, you may get `NaN` errors due to the torsion restriction for + first example, you may get ``NaN`` errors due to the torsion restriction for angles at 180 degrees, which cause problematic forces. This will be handled in future versions of the code. And logically, I would suggest removing the - torsion restriction for those angles. The `platform` can be handled through - this argument in `build_building_blocks` and `build_populations`, which I - currently set to `None`, meaning `OpenMM` will decide for itself. + torsion restriction for those angles. The ``platform`` can be handled through + this argument in ``build_building_blocks`` and ``build_populations``, which I + currently set to ``None``, meaning ``OpenMM`` will decide for itself. How To Cite @@ -98,7 +98,7 @@ and Publications using CGExplore ============================ -* Using stk for constructing larger numbers of coarse-grained models: `Systematic exploration of accessible topologies of cage molecules via minimalistic models`__ +* Using stk for constructing larger numbers of coarse-grained models: `Systematic exploration of accessible topologies of cage molecules via minimalistic models `_. Acknowledgements From 58fffc6fdbdf22b089ba4f0db72c58e8e30b4ba5 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Mon, 16 Dec 2024 14:20:30 +0100 Subject: [PATCH 04/47] Refactoring from tscram. --- src/cgexplore/molecular.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cgexplore/molecular.py b/src/cgexplore/molecular.py index c90f5627..e6f299af 100644 --- a/src/cgexplore/molecular.py +++ b/src/cgexplore/molecular.py @@ -13,7 +13,9 @@ FourC1Arm, LinearPrecursor, Precursor, + SixBead, SquarePrecursor, + StericSixBead, ThreeC0Arm, ThreeC1Arm, ThreeC2Arm, @@ -43,8 +45,10 @@ "LinearPrecursor", "Precursor", "PrecursorGenerator", + "SixBead", "SpindryConformer", "SquarePrecursor", + "StericSixBead", "ThreeC0Arm", "ThreeC1Arm", "ThreeC2Arm", From c4828ef8fc26439eb933175845da4cb77af30482 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Mon, 16 Dec 2024 14:20:58 +0100 Subject: [PATCH 05/47] Comment on unused code. Change method name. --- src/cgexplore/_internal/scram/enumeration.py | 162 ++++--------------- 1 file changed, 32 insertions(+), 130 deletions(-) diff --git a/src/cgexplore/_internal/scram/enumeration.py b/src/cgexplore/_internal/scram/enumeration.py index f5fcc007..321f8fd9 100644 --- a/src/cgexplore/_internal/scram/enumeration.py +++ b/src/cgexplore/_internal/scram/enumeration.py @@ -29,7 +29,12 @@ class TopologyIterator: - """Iterate over topology graphs.""" + """Iterate over topology graphs. + + This is an old version of this code, which I do not recommend using over + the `IHomolepticTopologyIterator`. + + """ def __init__( self, @@ -609,134 +614,6 @@ def get_mashed_topology( return None -class HomolepticTopologyIterator(TopologyIterator): - """Iterate over topology graphs.""" - - def __init__( # noqa: PLR0915 - self, - tetra_bb: stk.BuildingBlock, - ditopic_bb: stk.BuildingBlock, - multiplier: int, - stoichiometry: tuple[int, int], - ) -> None: - """Initialize.""" - if stoichiometry == (2, 1): - if multiplier == 1: - self._building_blocks = { - tetra_bb: (0,), - ditopic_bb: (1, 2), - } - self._underlying_topology = UnalignedM1L2 - self._scale_multiplier = 2 - self._num_scrambles = 10 - self._num_mashes = 2 - self._beta = 10 - - if multiplier == 2: # noqa: PLR2004 - self._building_blocks = { - tetra_bb: (0, 1), - ditopic_bb: (2, 3, 4, 5), - } - self._underlying_topology = stk.cage.M2L4Lantern - self._scale_multiplier = 2 - self._num_scrambles = 40 - self._num_mashes = 1 - self._beta = 10 - - if multiplier == 3: # noqa: PLR2004 - self._building_blocks = { - tetra_bb: (0, 1, 2), - ditopic_bb: (3, 4, 5, 6, 7, 8), - } - self._underlying_topology = stk.cage.M3L6 - self._scale_multiplier = 2 - self._num_scrambles = 100 - self._num_mashes = 1 - self._beta = 10 - - if multiplier == 4: # noqa: PLR2004 - self._building_blocks = { - tetra_bb: (0, 1, 2, 3), - ditopic_bb: (4, 5, 6, 7, 8, 9, 10, 11), - } - self._underlying_topology = CGM4L8 - self._scale_multiplier = 2 - self._num_scrambles = 100 - self._num_mashes = 1 - self._beta = 10 - - if multiplier == 6: # noqa: PLR2004 - self._building_blocks = { - tetra_bb: range(6), - ditopic_bb: range(6, 18), - } - self._underlying_topology = stk.cage.M6L12Cube - self._scale_multiplier = 5 - self._num_scrambles = 500 - self._num_mashes = 1 - self._beta = 10 - - if multiplier == 8: # noqa: PLR2004 - self._building_blocks = { - tetra_bb: range(8), - ditopic_bb: range(8, 24), - } - self._underlying_topology = stk.cage.EightPlusSixteen - self._scale_multiplier = 5 - self._num_scrambles = 500 - self._num_mashes = 1 - self._beta = 1 - - if multiplier == 10: # noqa: PLR2004 - self._building_blocks = { - tetra_bb: range(10), - ditopic_bb: range(10, 30), - } - self._underlying_topology = stk.cage.TenPlusTwenty - self._scale_multiplier = 5 - self._num_scrambles = 500 - self._num_mashes = 1 - self._beta = 1 - - if multiplier == 12: # noqa: PLR2004 - self._building_blocks = { - tetra_bb: range(12), - ditopic_bb: range(12, 36), - } - self._underlying_topology = CGM12L24 - self._scale_multiplier = 5 - self._num_scrambles = 500 - self._num_mashes = 1 - self._beta = 1 - - self._init_vertex_prototypes = deepcopy( - self._underlying_topology._vertex_prototypes # noqa: SLF001 - ) - self._init_edge_prototypes = deepcopy( - self._underlying_topology._edge_prototypes # noqa: SLF001 - ) - self._vertices = tuple( - stk.cage.UnaligningVertex( - id=i.get_id(), - position=i.get_position(), - aligner_edge=i.get_aligner_edge(), - use_neighbor_placement=i.use_neighbor_placement, - ) - for i in self._underlying_topology._vertex_prototypes # noqa: SLF001 - ) - self._edges = tuple( - stk.Edge( - id=i.get_id(), - vertex1=self._vertices[i.get_vertex1_id()], - vertex2=self._vertices[i.get_vertex2_id()], - ) - for i in self._underlying_topology._edge_prototypes # noqa: SLF001 - ) - - self._skip_initial = True - self._define_underlying() - - @dataclass class IHomolepticTopologyIterator: """Iterate over topology graphs.""" @@ -961,7 +838,32 @@ def _define_all_graphs(self) -> None: with self.graphs_path.open("w") as f: json.dump(to_save, f) - def get_graphs(self) -> abc.Generator[TopologyCode]: + def count_graphs(self) -> int: + """Count completely connected graphs in iteration.""" + if not self.graphs_path.exists(): + self._define_all_graphs() + + with self.graphs_path.open("r") as f: + all_graphs = json.load(f) + + logging.info( + "there are %s graphs, %s", len(all_graphs), self.graph_type + ) + count = 0 + for combination in all_graphs: + topology_code = TopologyCode( + vertex_map=combination, + as_string=vmap_to_str(combination), + ) + + num_components = rx.number_connected_components( + topology_code.get_graph() + ) + if num_components == 1: + count += 1 + return count + + def yield_graphs(self) -> abc.Generator[TopologyCode]: """Get constructed molecules from iteration. Yields only completely connected graphs. From 542845fd74b6aea51709947c439ed1379cf471be Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Mon, 16 Dec 2024 14:22:43 +0100 Subject: [PATCH 06/47] Add bb config control. --- src/cgexplore/_internal/scram/construction.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/cgexplore/_internal/scram/construction.py b/src/cgexplore/_internal/scram/construction.py index 8ba86c8b..9ad7da6c 100644 --- a/src/cgexplore/_internal/scram/construction.py +++ b/src/cgexplore/_internal/scram/construction.py @@ -23,6 +23,7 @@ yield_shifted_models, ) +from .building_block_enum import BuildingBlockConfiguration from .enumeration import IHomolepticTopologyIterator, TopologyIterator logging.basicConfig( @@ -454,14 +455,20 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 def try_except_construction( iterator: TopologyIterator | IHomolepticTopologyIterator, topology_code: TopologyCode, + building_block_configuration: BuildingBlockConfiguration | None = None, vertex_positions: dict[int, np.ndarray] | None = None, ) -> stk.ConstructedMolecule: """Try construction with alignment, then without.""" + if building_block_configuration is None: + bbs = iterator.building_blocks + else: + bbs = building_block_configuration.get_building_block_dictionary() + try: # Try with aligning vertices. constructed_molecule = stk.ConstructedMolecule( CustomTopology( - building_blocks=iterator.building_blocks, + building_blocks=bbs, vertex_prototypes=iterator.get_vertex_prototypes( unaligning=False ), @@ -479,7 +486,7 @@ def try_except_construction( # Try with unaligning. constructed_molecule = stk.ConstructedMolecule( CustomTopology( - building_blocks=iterator.building_blocks, + building_blocks=bbs, vertex_prototypes=iterator.get_vertex_prototypes( unaligning=True ), From 22cc2322aab8ced864fcfc5ab459ab636c43d848 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Mon, 16 Dec 2024 14:22:52 +0100 Subject: [PATCH 07/47] Refactoring from tscram. --- .../molecular/molecule_construction.py | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/src/cgexplore/_internal/molecular/molecule_construction.py b/src/cgexplore/_internal/molecular/molecule_construction.py index 4d07915e..3fc89ae4 100644 --- a/src/cgexplore/_internal/molecular/molecule_construction.py +++ b/src/cgexplore/_internal/molecular/molecule_construction.py @@ -592,3 +592,99 @@ def __init__( ] ), ) + + +class SixBead(Precursor): + """A Precursor.""" + + def __init__(self, bead: CgBead, abead1: CgBead, abead2: CgBead) -> None: + """Initialize a precursor.""" + self._bead = bead + self._abead1 = abead1 + self._abead2 = abead2 + self._name = f"6C2{bead.bead_type}{abead1.bead_type}{abead2.bead_type}" + self._bead_set = { + bead.bead_type: bead, + abead1.bead_type: abead1, + abead2.bead_type: abead2, + } + + new_fgs = stk.SmartsFunctionalGroupFactory( + smarts=f"[{abead2.element_string}X1][{abead1.element_string}]", + bonders=(0,), + deleters=(), + placers=(0, 1), + ) + self._building_block = stk.BuildingBlock( + smiles=( + f"[{abead2.element_string}][{abead1.element_string}]" + f"[{bead.element_string}][{bead.element_string}]" + f"[{abead1.element_string}][{abead2.element_string}]" + ), + functional_groups=new_fgs, + position_matrix=np.array( + [ + [-6, 3, 0.2], + [-4, 2, 0], + [-2, 0.1, 0], + [2, 0, 0], + [4, 2, 0], + [6, 3, 0.2], + ] + ), + ) + + +class StericSixBead(Precursor): + """A Precursor.""" + + def __init__( + self, + bead: CgBead, + abead1: CgBead, + abead2: CgBead, + sbead: CgBead, + ) -> None: + """Initialize a precursor.""" + self._bead = bead + self._abead1 = abead1 + self._abead2 = abead2 + self._sbead = sbead + self._name = ( + f"6S2{bead.bead_type}{abead1.bead_type}{abead2.bead_type}" + f"{sbead.bead_type}" + ) + self._bead_set = { + bead.bead_type: bead, + abead1.bead_type: abead1, + abead2.bead_type: abead2, + sbead.bead_type: sbead, + } + + new_fgs = stk.SmartsFunctionalGroupFactory( + smarts=f"[{abead2.element_string}X1][{abead1.element_string}]", + bonders=(0,), + deleters=(), + placers=(0, 1), + ) + self._building_block = stk.BuildingBlock( + smiles=( + f"[{abead2.element_string}][{abead1.element_string}]" + f"[{bead.element_string}][{bead.element_string}]" + f"([{sbead.element_string}])[{bead.element_string}]" + f"[{abead1.element_string}][{abead2.element_string}]" + ), + functional_groups=new_fgs, + position_matrix=np.array( + [ + [-6, 3, 0.2], + [-4, 2, 0], + [-2, 0.1, 0], + [0, 0.1, 0], + [0, 1, 0], + [2, 0, 0], + [4, 2, 0], + [6, 3, 0.2], + ] + ), + ) From 76a67190630164ddf9632f2980863a1723d83f14 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Mon, 16 Dec 2024 14:23:06 +0100 Subject: [PATCH 08/47] Update module. --- src/cgexplore/scram.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cgexplore/scram.py b/src/cgexplore/scram.py index 3c62a511..f418d70c 100644 --- a/src/cgexplore/scram.py +++ b/src/cgexplore/scram.py @@ -1,6 +1,8 @@ """scram package.""" from cgexplore._internal.scram.building_block_enum import ( + BuildingBlockConfiguration, + get_custom_bb_configurations, get_potential_bb_dicts, ) from cgexplore._internal.scram.construction import ( @@ -9,7 +11,6 @@ try_except_construction, ) from cgexplore._internal.scram.enumeration import ( - HomolepticTopologyIterator, IHomolepticTopologyIterator, TopologyIterator, ) @@ -17,11 +18,12 @@ from cgexplore._internal.scram.utilities import points_on_sphere, vmap_to_str __all__ = [ + "BuildingBlockConfiguration", "Constructed", - "HomolepticTopologyIterator", "IHomolepticTopologyIterator", "TopologyCode", "TopologyIterator", + "get_custom_bb_configurations", "get_potential_bb_dicts", "graph_optimise_cage", "optimise_cage", From fae1a95d5a41594a76b6856f37b1e8702514a209 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Mon, 16 Dec 2024 14:23:28 +0100 Subject: [PATCH 09/47] Add warnings to old algorithm. --- .../_internal/scram/building_block_enum.py | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/cgexplore/_internal/scram/building_block_enum.py b/src/cgexplore/_internal/scram/building_block_enum.py index a342c24d..d57a50fb 100644 --- a/src/cgexplore/_internal/scram/building_block_enum.py +++ b/src/cgexplore/_internal/scram/building_block_enum.py @@ -83,10 +83,22 @@ def length_4_heteroleptic_bb_dicts(tstr: str) -> dict[int, int]: def get_potential_bb_dicts( tstr: str, ratio: tuple[int, int], - bb_type: str, + study_type: Literal["ditopic", "tritopic", "tetratopic"], ) -> abc.Sequence[dict[int, abc.Sequence[int]]]: - """Get potential building block dictionaries.""" - match bb_type: + """Get potential building block dictionaries from known topology graphs. + + Parameters: + tstr: + A key to known topology graphs and their building dictionary. + + study_type: + `ditopic`, `tetratopic`, `tritopic` explore 1:1:1 heteroleptic + systems with distinct 2-,4-,3-functional group building blocks, + respectively. If you are using this in conjuction with graph + screening, use `get_custom_bb_configurations`. + + """ + match study_type: case "ditopic": possibilities, count_to_add = length_2_heteroleptic_bb_dicts(tstr) current_counter = max( @@ -132,9 +144,15 @@ def get_potential_bb_dicts( possible_dicts.append((len(possible_dicts), new_possibility)) - msg = "bring rmsd checker in here" + msg = ( + "bring rmsd checker in here: use symmetry corrected RMSD on " + "single-bead repr of tstr" + ) logging.info(msg) - msg = "use symmetry corrected RMSD on single-bead repr of tstr" + + return tuple(possible_dicts) + + logging.info(msg) return tuple(possible_dicts) From ebaef89e4f42a6bb796aa5210d1ed2284d635fa7 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Mon, 16 Dec 2024 14:23:45 +0100 Subject: [PATCH 10/47] Write new, custom topology building block permutation function. --- .../_internal/scram/building_block_enum.py | 171 +++++++++++++++++- 1 file changed, 169 insertions(+), 2 deletions(-) diff --git a/src/cgexplore/_internal/scram/building_block_enum.py b/src/cgexplore/_internal/scram/building_block_enum.py index d57a50fb..f44c5350 100644 --- a/src/cgexplore/_internal/scram/building_block_enum.py +++ b/src/cgexplore/_internal/scram/building_block_enum.py @@ -2,9 +2,14 @@ import itertools as it import logging -from collections import Counter, abc +from collections import Counter, abc, defaultdict from copy import deepcopy -from typing import assert_never +from dataclasses import dataclass +from typing import Literal, assert_never + +import stk + +from cgexplore._internal.scram.enumeration import IHomolepticTopologyIterator logging.basicConfig( level=logging.INFO, @@ -153,6 +158,168 @@ def get_potential_bb_dicts( return tuple(possible_dicts) +@dataclass +class BuildingBlockConfiguration: + """Naming convention for building block configurations.""" + + idx: int + building_block_idx_map: dict[stk.BuildingBlock, int] + building_block_idx_dict: dict[int, abc.Sequence[int]] + + def get_building_block_dictionary( + self, + ) -> dict[stk.BuildingBlock, abc.Sequence[int]]: + idx_map = {idx: bb for bb, idx in self.building_block_idx_map.items()} + return { + idx_map[idx]: tuple(vertices) + for idx, vertices in self.building_block_idx_dict.items() + } + + def get_hashable_bbidx_dict( + self, + ) -> abc.Sequence[tuple[int, abc.Sequence[int]]]: + """Get a hashable representation of the building block dictionary.""" + return tuple(sorted(self.building_block_idx_dict.items())) + + def __str__(self) -> str: + """Return a string representation of the OMMTrajectory.""" + return ( + f"{self.__class__.__name__}(idx={self.idx}, " + f"building_block_idx_dict={self.building_block_idx_dict})" + ) + + def __repr__(self) -> str: + """Return a string representation of the OMMTrajectory.""" + return str(self) + + +def get_custom_bb_configurations( # noqa: C901 + iterator: IHomolepticTopologyIterator, +) -> abc.Sequence[dict[int, abc.Sequence[int]]]: + """Get potential building block dictionaries.""" + # Get building blocks with the same functional group count - these are + # swappable. + building_blocks_by_fg = { + i: i.get_num_functional_groups() for i in iterator.building_blocks + } + + count_of_fg_types = defaultdict(int) + fg_counts_by_building_block = defaultdict(int) + + for bb, count in iterator.building_block_counts.items(): + fg_counts_by_building_block[bb.get_num_functional_groups()] += count + count_of_fg_types[bb.get_num_functional_groups()] += 1 + + modifiable_types = tuple( + fg_count for fg_count, count in count_of_fg_types.items() if count > 1 + ) + if len(modifiable_types) != 1: + msg = ( + f"modifiable_types is len {len(modifiable_types)}. If 0" + ", then you have no need to screen building block configurations." + " If greater than 2, then this code cannot handle this yet. Sorry!" + ) + raise RuntimeError(msg) + + # Get the associated vertex ids. + modifiable_vertices = { + fg_count: iterator.vertex_types_by_fg[fg_count] + for fg_count in iterator.vertex_types_by_fg + # ASSUMES 1 modifiable FG. + if fg_count == modifiable_types[0] + } + + unmodifiable_vertices = { + fg_count: iterator.vertex_types_by_fg[fg_count] + for fg_count in iterator.vertex_types_by_fg + # ASSUMES 1 modifiable FG. + if fg_count != modifiable_types[0] + } + + # Count of functional groups: number of vertices that need adding. + count_to_add = { + i: fg_counts_by_building_block[i] for i in modifiable_types + } + + if len(count_to_add) != 1: + msg = ( + f"count to add is len {len(count_to_add)}. If greater than 1, " + "then this code cannot handle this yet. Sorry!" + ) + raise RuntimeError(msg) + + bb_map = {bb: idx for idx, bb in enumerate(building_blocks_by_fg)} + + empty_bb_dict = {} + for bb, fg_count in building_blocks_by_fg.items(): + if fg_count in modifiable_types: + empty_bb_dict[bb_map[bb]] = [] + else: + empty_bb_dict[bb_map[bb]] = tuple( + i for i in unmodifiable_vertices[fg_count] + ) + + saved = set() + saved_bb_dicts = set() + possible_dicts = [] + + # ASSUMES 1 modifiable FG. + modifiable = tuple( + bb_idx + for bb_idx, vertices in empty_bb_dict.items() + if len(vertices) == 0 + ) + + # Get combinations of building blocks with the right count. + for combo in it.product( + modifiable, # ASSUMES 1 modifiable FG. + repeat=count_to_add[modifiable_types[0]], + ): + subset_bb_counts = { + bb_map[bb]: count + for bb, count in iterator.building_block_counts.items() + if bb_map[bb] in modifiable + } + + if Counter(combo) != subset_bb_counts: + continue + + if combo in saved: + continue + saved.add(combo) + + # Then assign to vertices with all permutations. + # ASSUMES 1 modifiable FG. + for vertex_id_permutation in it.permutations( + modifiable_vertices[modifiable_types[0]] + ): + new_possibility = deepcopy(empty_bb_dict) + + for bb_idx, vertex_id in zip( + combo, vertex_id_permutation, strict=True + ): + new_possibility[bb_idx].append(vertex_id) + + bbconfig = BuildingBlockConfiguration( + idx=len(possible_dicts), + building_block_idx_map=bb_map, + building_block_idx_dict={ + i: tuple(j) for i, j in new_possibility.items() + }, + ) + + if bbconfig.get_hashable_bbidx_dict() in saved_bb_dicts: + continue + saved_bb_dicts.add(bbconfig.get_hashable_bbidx_dict()) + + # Check for deduplication. + + possible_dicts.append(bbconfig) + + msg = ( + "bring rmsd checker in here: use symmetry corrected RMSD on " + "single-bead repr of tstr" + ) logging.info(msg) return tuple(possible_dicts) From 86295971a8ae32387c150eeabbcc30f507fd41c8 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Tue, 17 Dec 2024 14:25:17 +0100 Subject: [PATCH 11/47] Make accessible function for aligning ditopic bbs. --- .../_internal/atomistic/utilities.py | 42 +++++++++++++++++++ src/cgexplore/atomistic.py | 2 + 2 files changed, 44 insertions(+) diff --git a/src/cgexplore/_internal/atomistic/utilities.py b/src/cgexplore/_internal/atomistic/utilities.py index 19a4507e..a18b0890 100644 --- a/src/cgexplore/_internal/atomistic/utilities.py +++ b/src/cgexplore/_internal/atomistic/utilities.py @@ -3,6 +3,7 @@ import logging import pathlib +import bbprep import numpy as np import stk import stko @@ -125,3 +126,44 @@ def cgx_optimisation_sequence( gulp2_mol = cage.with_structure_from_file(gulp2_output) return cage.with_structure_from_file(gulp2_output) + + +def get_ditopic_aligned_bb( + path: pathlib.Path, + optl_path: pathlib.Path, +) -> stk.BuildingBlock: + """Get building block for the target ligand and prepare for cage model.""" + if not path.exists(): + temp = stk.BuildingBlock.init_from_file( + path=optl_path, + functional_groups=( + stko.functional_groups.ThreeSiteFactory("[#6]~[#7X2]~[#6]"), + ), + ) + # Handle if not ditopic. + if temp.get_num_functional_groups() != 2: # noqa: PLR2004 + temp = bbprep.FurthestFGs().modify( + building_block=temp, + desired_functional_groups=2, + ) + + generator = bbprep.generators.ETKDG(num_confs=100) + ensemble = generator.generate_conformers(temp) + process = bbprep.DitopicFitter(ensemble=ensemble) + min_molecule = process.get_minimum() + min_molecule.molecule.write(path) + + molecule = stk.BuildingBlock.init_from_file( + path=path, + functional_groups=( + stko.functional_groups.ThreeSiteFactory("[#6]~[#7X2]~[#6]"), + ), + ) + # Handle if not ditopic. + if molecule.get_num_functional_groups() != 2: # noqa: PLR2004 + molecule = bbprep.FurthestFGs().modify( + building_block=molecule, + desired_functional_groups=2, + ) + + return molecule diff --git a/src/cgexplore/atomistic.py b/src/cgexplore/atomistic.py index 19df21b3..411c462b 100644 --- a/src/cgexplore/atomistic.py +++ b/src/cgexplore/atomistic.py @@ -7,11 +7,13 @@ from cgexplore._internal.atomistic.utilities import ( cgx_optimisation_sequence, extract_ensemble, + get_ditopic_aligned_bb, ) __all__ = [ "Crest", "cgx_optimisation_sequence", "extract_ensemble", + "get_ditopic_aligned_bb", "run_conformer_analysis", ] From f487f44e71172b84878422551999b8416e91979e Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Tue, 17 Dec 2024 14:25:35 +0100 Subject: [PATCH 12/47] Bug fix for ditopic bbs. --- src/cgexplore/_internal/atomistic/crest_process.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/cgexplore/_internal/atomistic/crest_process.py b/src/cgexplore/_internal/atomistic/crest_process.py index 2eb78a7b..06e1459f 100644 --- a/src/cgexplore/_internal/atomistic/crest_process.py +++ b/src/cgexplore/_internal/atomistic/crest_process.py @@ -8,6 +8,7 @@ import uuid from collections import abc +import bbprep import stk import stko from rdkit import RDLogger @@ -212,6 +213,13 @@ def run_conformer_analysis( # noqa: PLR0913 functional_groups=functional_group_factories, ) + # Handle if not ditopic. + if molecule.get_num_functional_groups() != 2: # noqa: PLR2004 + molecule = bbprep.FurthestFGs().modify( + building_block=molecule, + desired_functional_groups=2, + ) + if not opt_file.exists(): # Run calculation. optimiser = Crest( From 21beaa4c5162de3338a4afeaeb44a41fb935370a Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Tue, 17 Dec 2024 14:26:30 +0100 Subject: [PATCH 13/47] Fix name. --- src/cgexplore/_internal/atomistic/crest_process.py | 4 ++-- src/cgexplore/_internal/atomistic/utilities.py | 5 ++++- src/cgexplore/atomistic.py | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/cgexplore/_internal/atomistic/crest_process.py b/src/cgexplore/_internal/atomistic/crest_process.py index 06e1459f..79e61fda 100644 --- a/src/cgexplore/_internal/atomistic/crest_process.py +++ b/src/cgexplore/_internal/atomistic/crest_process.py @@ -13,7 +13,7 @@ import stko from rdkit import RDLogger -from .utilities import extract_ensemble +from .utilities import extract_ditopic_ensemble logging.basicConfig( level=logging.INFO, @@ -248,4 +248,4 @@ def run_conformer_analysis( # noqa: PLR0913 opt_molecule = optimiser.optimize(molecule) opt_molecule.write(opt_file) - return extract_ensemble(molecule, crest_run) + return extract_ditopic_ensemble(molecule, crest_run) diff --git a/src/cgexplore/_internal/atomistic/utilities.py b/src/cgexplore/_internal/atomistic/utilities.py index a18b0890..0c7c39fa 100644 --- a/src/cgexplore/_internal/atomistic/utilities.py +++ b/src/cgexplore/_internal/atomistic/utilities.py @@ -14,7 +14,10 @@ ) -def extract_ensemble(molecule: stk.Molecule, crest_run: pathlib.Path) -> dict: +def extract_ditopic_ensemble( + molecule: stk.Molecule, + crest_run: pathlib.Path, +) -> dict: """Extract and save an ensemble from a crest run.""" ensemble_dir = crest_run / "ensemble" num_atoms = molecule.get_num_atoms() diff --git a/src/cgexplore/atomistic.py b/src/cgexplore/atomistic.py index 411c462b..85d6a0ca 100644 --- a/src/cgexplore/atomistic.py +++ b/src/cgexplore/atomistic.py @@ -6,14 +6,14 @@ ) from cgexplore._internal.atomistic.utilities import ( cgx_optimisation_sequence, - extract_ensemble, + extract_ditopic_ensemble, get_ditopic_aligned_bb, ) __all__ = [ "Crest", "cgx_optimisation_sequence", - "extract_ensemble", + "extract_ditopic_ensemble", "get_ditopic_aligned_bb", "run_conformer_analysis", ] From da3f95be8ba9c57c21a1b7ce1f28197aa2d08337 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Tue, 17 Dec 2024 14:26:38 +0100 Subject: [PATCH 14/47] Minor. --- src/cgexplore/_internal/scram/enumeration.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/cgexplore/_internal/scram/enumeration.py b/src/cgexplore/_internal/scram/enumeration.py index 321f8fd9..660efbcd 100644 --- a/src/cgexplore/_internal/scram/enumeration.py +++ b/src/cgexplore/_internal/scram/enumeration.py @@ -846,9 +846,6 @@ def count_graphs(self) -> int: with self.graphs_path.open("r") as f: all_graphs = json.load(f) - logging.info( - "there are %s graphs, %s", len(all_graphs), self.graph_type - ) count = 0 for combination in all_graphs: topology_code = TopologyCode( From 5900299ce4eb1b4f70ddb026d1ff424aa8d04508 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Tue, 17 Dec 2024 14:29:22 +0100 Subject: [PATCH 15/47] Update deps. --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 046f1cb9..0cdfc10a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "atomlite", "chemiscope>0.7.3", "rustworkx", + "bbprepared", ] requires-python = ">=3.11" dynamic = ["version"] @@ -150,5 +151,6 @@ module = [ "spindry.*", "vabene.*", "chemiscope.*", + "bbprep.*", ] ignore_missing_imports = true From 47e3ded7a6ea3ed2f24302004bd99221e8f77df6 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Tue, 17 Dec 2024 14:29:28 +0100 Subject: [PATCH 16/47] Update docs. --- ...ore.atomistic.extract_ditopic_ensemble.rst | 6 ++++ .../cgexplore.atomistic.extract_ensemble.rst | 6 ---- ...plore.atomistic.get_ditopic_aligned_bb.rst | 6 ++++ .../_autosummary/cgexplore.atomistic.rst | 3 +- ...or.rst => cgexplore.molecular.SixBead.rst} | 8 ++--- .../cgexplore.molecular.StericSixBead.rst | 25 +++++++++++++ .../_autosummary/cgexplore.molecular.rst | 2 ++ ...plore.scram.BuildingBlockConfiguration.rst | 35 +++++++++++++++++++ ...lore.scram.IHomolepticTopologyIterator.rst | 3 +- ...ore.scram.get_custom_bb_configurations.rst | 6 ++++ docs/source/_autosummary/cgexplore.scram.rst | 3 +- 11 files changed, 90 insertions(+), 13 deletions(-) create mode 100644 docs/source/_autosummary/cgexplore.atomistic.extract_ditopic_ensemble.rst delete mode 100644 docs/source/_autosummary/cgexplore.atomistic.extract_ensemble.rst create mode 100644 docs/source/_autosummary/cgexplore.atomistic.get_ditopic_aligned_bb.rst rename docs/source/_autosummary/{cgexplore.scram.HomolepticTopologyIterator.rst => cgexplore.molecular.SixBead.rst} (52%) create mode 100644 docs/source/_autosummary/cgexplore.molecular.StericSixBead.rst create mode 100644 docs/source/_autosummary/cgexplore.scram.BuildingBlockConfiguration.rst create mode 100644 docs/source/_autosummary/cgexplore.scram.get_custom_bb_configurations.rst diff --git a/docs/source/_autosummary/cgexplore.atomistic.extract_ditopic_ensemble.rst b/docs/source/_autosummary/cgexplore.atomistic.extract_ditopic_ensemble.rst new file mode 100644 index 00000000..fea45b22 --- /dev/null +++ b/docs/source/_autosummary/cgexplore.atomistic.extract_ditopic_ensemble.rst @@ -0,0 +1,6 @@ +cgexplore.atomistic.extract\_ditopic\_ensemble +============================================== + +.. currentmodule:: cgexplore.atomistic + +.. autofunction:: extract_ditopic_ensemble \ No newline at end of file diff --git a/docs/source/_autosummary/cgexplore.atomistic.extract_ensemble.rst b/docs/source/_autosummary/cgexplore.atomistic.extract_ensemble.rst deleted file mode 100644 index 785fb021..00000000 --- a/docs/source/_autosummary/cgexplore.atomistic.extract_ensemble.rst +++ /dev/null @@ -1,6 +0,0 @@ -cgexplore.atomistic.extract\_ensemble -===================================== - -.. currentmodule:: cgexplore.atomistic - -.. autofunction:: extract_ensemble \ No newline at end of file diff --git a/docs/source/_autosummary/cgexplore.atomistic.get_ditopic_aligned_bb.rst b/docs/source/_autosummary/cgexplore.atomistic.get_ditopic_aligned_bb.rst new file mode 100644 index 00000000..b56dde13 --- /dev/null +++ b/docs/source/_autosummary/cgexplore.atomistic.get_ditopic_aligned_bb.rst @@ -0,0 +1,6 @@ +cgexplore.atomistic.get\_ditopic\_aligned\_bb +============================================= + +.. currentmodule:: cgexplore.atomistic + +.. autofunction:: get_ditopic_aligned_bb \ No newline at end of file diff --git a/docs/source/_autosummary/cgexplore.atomistic.rst b/docs/source/_autosummary/cgexplore.atomistic.rst index d0404660..6824be9a 100644 --- a/docs/source/_autosummary/cgexplore.atomistic.rst +++ b/docs/source/_autosummary/cgexplore.atomistic.rst @@ -30,7 +30,8 @@ cgexplore.atomistic :nosignatures: cgx_optimisation_sequence - extract_ensemble + extract_ditopic_ensemble + get_ditopic_aligned_bb run_conformer_analysis diff --git a/docs/source/_autosummary/cgexplore.scram.HomolepticTopologyIterator.rst b/docs/source/_autosummary/cgexplore.molecular.SixBead.rst similarity index 52% rename from docs/source/_autosummary/cgexplore.scram.HomolepticTopologyIterator.rst rename to docs/source/_autosummary/cgexplore.molecular.SixBead.rst index ad358696..c8daecbd 100644 --- a/docs/source/_autosummary/cgexplore.scram.HomolepticTopologyIterator.rst +++ b/docs/source/_autosummary/cgexplore.molecular.SixBead.rst @@ -1,9 +1,9 @@ -cgexplore.scram.HomolepticTopologyIterator -========================================== +cgexplore.molecular.SixBead +=========================== -.. currentmodule:: cgexplore.scram +.. currentmodule:: cgexplore.molecular -.. autoclass:: HomolepticTopologyIterator +.. autoclass:: SixBead :members: :inherited-members: :undoc-members: diff --git a/docs/source/_autosummary/cgexplore.molecular.StericSixBead.rst b/docs/source/_autosummary/cgexplore.molecular.StericSixBead.rst new file mode 100644 index 00000000..b637e9bc --- /dev/null +++ b/docs/source/_autosummary/cgexplore.molecular.StericSixBead.rst @@ -0,0 +1,25 @@ +cgexplore.molecular.StericSixBead +================================= + +.. currentmodule:: cgexplore.molecular + +.. autoclass:: StericSixBead + :members: + :inherited-members: + :undoc-members: + :show-inheritance: + + + + + .. rubric:: Methods + + .. autosummary:: + :nosignatures: + + + + + + + \ No newline at end of file diff --git a/docs/source/_autosummary/cgexplore.molecular.rst b/docs/source/_autosummary/cgexplore.molecular.rst index 3cba8ec1..6b570687 100644 --- a/docs/source/_autosummary/cgexplore.molecular.rst +++ b/docs/source/_autosummary/cgexplore.molecular.rst @@ -26,8 +26,10 @@ cgexplore.molecular LinearPrecursor Precursor PrecursorGenerator + SixBead SpindryConformer SquarePrecursor + StericSixBead ThreeC0Arm ThreeC1Arm ThreeC2Arm diff --git a/docs/source/_autosummary/cgexplore.scram.BuildingBlockConfiguration.rst b/docs/source/_autosummary/cgexplore.scram.BuildingBlockConfiguration.rst new file mode 100644 index 00000000..0ffce84a --- /dev/null +++ b/docs/source/_autosummary/cgexplore.scram.BuildingBlockConfiguration.rst @@ -0,0 +1,35 @@ +cgexplore.scram.BuildingBlockConfiguration +========================================== + +.. currentmodule:: cgexplore.scram + +.. autoclass:: BuildingBlockConfiguration + :members: + :inherited-members: + :undoc-members: + :show-inheritance: + + + + + .. rubric:: Methods + + .. autosummary:: + :nosignatures: + + ~BuildingBlockConfiguration.get_building_block_dictionary + ~BuildingBlockConfiguration.get_hashable_bbidx_dict + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~BuildingBlockConfiguration.idx + ~BuildingBlockConfiguration.building_block_idx_map + ~BuildingBlockConfiguration.building_block_idx_dict + + \ No newline at end of file diff --git a/docs/source/_autosummary/cgexplore.scram.IHomolepticTopologyIterator.rst b/docs/source/_autosummary/cgexplore.scram.IHomolepticTopologyIterator.rst index 4565a4fc..fd1d1f1c 100644 --- a/docs/source/_autosummary/cgexplore.scram.IHomolepticTopologyIterator.rst +++ b/docs/source/_autosummary/cgexplore.scram.IHomolepticTopologyIterator.rst @@ -17,9 +17,10 @@ cgexplore.scram.IHomolepticTopologyIterator .. autosummary:: :nosignatures: - ~IHomolepticTopologyIterator.get_graphs + ~IHomolepticTopologyIterator.count_graphs ~IHomolepticTopologyIterator.get_num_building_blocks ~IHomolepticTopologyIterator.get_vertex_prototypes + ~IHomolepticTopologyIterator.yield_graphs diff --git a/docs/source/_autosummary/cgexplore.scram.get_custom_bb_configurations.rst b/docs/source/_autosummary/cgexplore.scram.get_custom_bb_configurations.rst new file mode 100644 index 00000000..e692a895 --- /dev/null +++ b/docs/source/_autosummary/cgexplore.scram.get_custom_bb_configurations.rst @@ -0,0 +1,6 @@ +cgexplore.scram.get\_custom\_bb\_configurations +=============================================== + +.. currentmodule:: cgexplore.scram + +.. autofunction:: get_custom_bb_configurations \ No newline at end of file diff --git a/docs/source/_autosummary/cgexplore.scram.rst b/docs/source/_autosummary/cgexplore.scram.rst index 6751bc3c..8d430e16 100644 --- a/docs/source/_autosummary/cgexplore.scram.rst +++ b/docs/source/_autosummary/cgexplore.scram.rst @@ -16,8 +16,8 @@ cgexplore.scram :template: class.rst :nosignatures: + BuildingBlockConfiguration Constructed - HomolepticTopologyIterator IHomolepticTopologyIterator TopologyCode TopologyIterator @@ -33,6 +33,7 @@ cgexplore.scram :toctree: :nosignatures: + get_custom_bb_configurations get_potential_bb_dicts graph_optimise_cage optimise_cage From 893e6800576c5c9db8c8cfa274379902b8472ba7 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Wed, 18 Dec 2024 12:08:14 +0100 Subject: [PATCH 17/47] Refactoring. --- first_paper_example/analysis.py | 25 +++-------------------- first_paper_example/plot_maps.py | 11 +++++----- first_paper_example/plot_phase_spaces.py | 9 +++++--- first_paper_example/save_to_chemiscope.py | 4 ++-- optimisation_example/optimsation_test.py | 18 ++++------------ 5 files changed, 21 insertions(+), 46 deletions(-) diff --git a/first_paper_example/analysis.py b/first_paper_example/analysis.py index a80441f6..4ebb2b50 100644 --- a/first_paper_example/analysis.py +++ b/first_paper_example/analysis.py @@ -429,25 +429,6 @@ def xc_map(tstr: str) -> int: }[tstr] -def stoich_map(tstr: str) -> int: - """Stoichiometry maps to the number of building blocks.""" - return { - "2P3": 5, - "4P6": 10, - "4P62": 10, - "6P9": 15, - "8P12": 20, - "2P4": 6, - "3P6": 9, - "4P8": 12, - "4P82": 12, - "6P12": 18, - "8P16": 24, - "12P24": 36, - "6P8": 14, - }[tstr] - - def cltypetopo_to_colormap() -> dict[str, dict]: """Convert label.""" return { @@ -670,9 +651,9 @@ def data_to_array(json_files, output_dir: pathlib.Path) -> pd.DataFrame: if row["optimised"]: row["strain_energy"] = res_dict["fin_energy_kjmol"] - row["energy_per_bb"] = res_dict["fin_energy_kjmol"] / stoich_map( - t_str - ) + row["energy_per_bb"] = res_dict[ + "fin_energy_kjmol" + ] / cgx.topologies.stoich_map(t_str) for force_title in res_dict["fin_energy_decomp"]: if force_title in ( "CMMotionRemover_kJ/mol", diff --git a/first_paper_example/plot_maps.py b/first_paper_example/plot_maps.py index 9341d6d7..558d0783 100644 --- a/first_paper_example/plot_maps.py +++ b/first_paper_example/plot_maps.py @@ -22,13 +22,12 @@ data_to_array, eb_str, isomer_energy, - stoich_map, topology_labels, xc_map, ) from env_set import calculations, figures, outputdata -from cgexplore.utilities import draw_pie +import cgexplore as cgx logging.basicConfig( level=logging.INFO, @@ -413,7 +412,7 @@ def selfsort_map(all_data: pd.DataFrame, figure_output: pathlib.Path) -> None: ] ) - draw_pie( + cgx.utilities.draw_pie( colours=colours, xpos=xvalue, ypos=yvalue, @@ -481,7 +480,9 @@ def kinetic_selfsort_map( if len(mixed_energies) == 0: colours = ["white"] else: - stoichiometries = {i: stoich_map(i) for i in mixed_energies} + stoichiometries = { + i: cgx.topologies.stoich_map(i) for i in mixed_energies + } min_stoichiometry = min(stoichiometries.values()) kinetic_energies = { i: mixed_energies[i] @@ -503,7 +504,7 @@ def kinetic_selfsort_map( ] ] - draw_pie( + cgx.utilities.draw_pie( colours=colours, xpos=xvalue, ypos=yvalue, diff --git a/first_paper_example/plot_phase_spaces.py b/first_paper_example/plot_phase_spaces.py index 9ef02ce2..0c36bfca 100644 --- a/first_paper_example/plot_phase_spaces.py +++ b/first_paper_example/plot_phase_spaces.py @@ -16,12 +16,13 @@ isomer_energy, pore_str, rg_str, - stoich_map, ) from env_set import calculations, figures, outputdata from matplotlib.lines import Line2D from scipy.spatial import ConvexHull +import cgexplore as cgx + logging.basicConfig( level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s", @@ -48,7 +49,9 @@ def phase_space_2(all_data: pd.DataFrame, figure_output: pathlib.Path) -> None: } if len(stable_energies) == 0: continue - stoichiometries = {i: stoich_map(i) for i in stable_energies} + stoichiometries = { + i: cgx.topologies.stoich_map(i) for i in stable_energies + } min_stoichiometry = min(stoichiometries.values()) kinetic_energies = { i: stable_energies[i] @@ -118,7 +121,7 @@ def no_conversion(value: float) -> float: "ylbl": pore_str(), "x": "topology", "xlbl": "num. building blocks", - "xmapfun": stoich_map, + "xmapfun": cgx.topologies.stoich_map, "xlim": (None, None), "c": "cmap", }, diff --git a/first_paper_example/save_to_chemiscope.py b/first_paper_example/save_to_chemiscope.py index adc8b611..32d9d59b 100644 --- a/first_paper_example/save_to_chemiscope.py +++ b/first_paper_example/save_to_chemiscope.py @@ -6,7 +6,7 @@ import logging from collections import abc -from analysis import stoich_map, topology_labels +from analysis import topology_labels from env_set import outputdata import cgexplore as cgx @@ -22,7 +22,7 @@ def divider(tstr: str) -> abc.Callable: def divide_by_stoich(value: float) -> float: """Divide energy by stoichiometry.""" - return value / stoich_map(tstr) + return value / cgx.topologies.stoich_map(tstr) return divide_by_stoich diff --git a/optimisation_example/optimsation_test.py b/optimisation_example/optimsation_test.py index 84eacd33..7d00a2e7 100644 --- a/optimisation_example/optimsation_test.py +++ b/optimisation_example/optimsation_test.py @@ -17,17 +17,6 @@ def isomer_energy() -> float: return 0.3 -def stoich_map(tstr: str) -> int: - """Stoichiometry maps to the number of building blocks.""" - return { - "2P3": 5, - "4P6": 10, - "4P62": 10, - "6P9": 15, - "8P12": 20, - }[tstr] - - def colours() -> dict[str, str]: """Colours map to topologies.""" return { @@ -94,7 +83,8 @@ def analyse_cage( raise res_dict = { "strain_energy": fin_energy, - "energy_per_bb": fin_energy / stoich_map(topology_str), + "energy_per_bb": fin_energy + / cgx.topologies.stoich_map(topology_str), } database.add_properties(key=name, property_dict=res_dict) @@ -426,14 +416,14 @@ def fitness_function( ) other_topologies = {} - current_stoich = stoich_map(tstr) + current_stoich = cgx.topologies.stoich_map(tstr) for other_chromosome in differ_by_topology: other_name = ( f"{other_chromosome.prefix}_{other_chromosome.get_string()}" ) other_tstr, _ = other_chromosome.get_topology_information() # Only recalculate smaller or equivalent cages. - if stoich_map(other_tstr) <= current_stoich: + if cgx.topologies.stoich_map(other_tstr) <= current_stoich: if not database.has_molecule(other_name): # Run calculation. structure_function( From f25e592dddeb4c9dd831407220dbdb8f2a2a015f Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Wed, 18 Dec 2024 12:10:51 +0100 Subject: [PATCH 18/47] Update docs. --- src/cgexplore/_internal/topologies/graphs.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/cgexplore/_internal/topologies/graphs.py b/src/cgexplore/_internal/topologies/graphs.py index 9b4b0ec7..5881cefd 100644 --- a/src/cgexplore/_internal/topologies/graphs.py +++ b/src/cgexplore/_internal/topologies/graphs.py @@ -7,7 +7,17 @@ def stoich_map(tstr: str) -> int: - """Stoichiometry maps to the number of building blocks.""" + """The total number of building blocks in a topology graph. + + Available graphs: + `2P3`, `4P6`, `4P62`, `6P9`, `8P12`, `2P4`, `3P6`, `4P8`, `4P82`, + `6P12`, `8P16`, `6P122`, `8P162`, `12P24`, `6P8` + + Parameters: + tstr: + The string for the topology. + + """ return { "2P3": 5, "4P6": 10, From 533832ec9aac9a3a8cc6ea471362656cacf16406 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Wed, 18 Dec 2024 17:13:45 +0100 Subject: [PATCH 19/47] Improve wasted overlaps in bb dict generation. --- .../_internal/scram/building_block_enum.py | 83 +++++++++---------- 1 file changed, 40 insertions(+), 43 deletions(-) diff --git a/src/cgexplore/_internal/scram/building_block_enum.py b/src/cgexplore/_internal/scram/building_block_enum.py index f44c5350..4eb62055 100644 --- a/src/cgexplore/_internal/scram/building_block_enum.py +++ b/src/cgexplore/_internal/scram/building_block_enum.py @@ -259,62 +259,59 @@ def get_custom_bb_configurations( # noqa: C901 i for i in unmodifiable_vertices[fg_count] ) - saved = set() - saved_bb_dicts = set() - possible_dicts = [] - # ASSUMES 1 modifiable FG. - modifiable = tuple( + modifiable_bb_idx = tuple( bb_idx for bb_idx, vertices in empty_bb_dict.items() if len(vertices) == 0 ) + modifiable_bb_idx_counted = [] + for bb, count in iterator.building_block_counts.items(): + idx = bb_map[bb] + if idx not in modifiable_bb_idx: + continue + modifiable_bb_idx_counted.extend([idx] * count) - # Get combinations of building blocks with the right count. - for combo in it.product( - modifiable, # ASSUMES 1 modifiable FG. - repeat=count_to_add[modifiable_types[0]], - ): - subset_bb_counts = { - bb_map[bb]: count - for bb, count in iterator.building_block_counts.items() - if bb_map[bb] in modifiable - } + # Iterate over the placement of the bb indices. + vertex_map = { + v_idx: idx + for idx, v_idx in enumerate(modifiable_vertices[modifiable_types[0]]) + } + iteration = it.product( + # ASSUMES 1 modifiable FG. + *(modifiable_bb_idx for i in modifiable_vertices[modifiable_types[0]]) + ) - if Counter(combo) != subset_bb_counts: - continue + saved_bb_dicts = set() + possible_dicts = [] - if combo in saved: + for config in iteration: + if sorted(config) != modifiable_bb_idx_counted: continue - saved.add(combo) - # Then assign to vertices with all permutations. - # ASSUMES 1 modifiable FG. - for vertex_id_permutation in it.permutations( - modifiable_vertices[modifiable_types[0]] - ): - new_possibility = deepcopy(empty_bb_dict) - - for bb_idx, vertex_id in zip( - combo, vertex_id_permutation, strict=True - ): - new_possibility[bb_idx].append(vertex_id) - - bbconfig = BuildingBlockConfiguration( - idx=len(possible_dicts), - building_block_idx_map=bb_map, - building_block_idx_dict={ - i: tuple(j) for i, j in new_possibility.items() - }, - ) + bb_config_dict = { + vertex_id: config[vertex_map[vertex_id]] + for vertex_id in modifiable_vertices[modifiable_types[0]] + } - if bbconfig.get_hashable_bbidx_dict() in saved_bb_dicts: - continue - saved_bb_dicts.add(bbconfig.get_hashable_bbidx_dict()) + new_possibility = deepcopy(empty_bb_dict) + for vertex_id, bb_idx in bb_config_dict.items(): + new_possibility[bb_idx].append(vertex_id) - # Check for deduplication. + bbconfig = BuildingBlockConfiguration( + idx=len(possible_dicts), + building_block_idx_map=bb_map, + building_block_idx_dict={ + i: tuple(j) for i, j in new_possibility.items() + }, + ) + + if bbconfig.get_hashable_bbidx_dict() in saved_bb_dicts: + continue + # Check for deduplication. + saved_bb_dicts.add(bbconfig.get_hashable_bbidx_dict()) - possible_dicts.append(bbconfig) + possible_dicts.append(bbconfig) msg = ( "bring rmsd checker in here: use symmetry corrected RMSD on " From 7cf0010f45197e3e04588906924d233f2491b9f6 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Wed, 18 Dec 2024 17:13:58 +0100 Subject: [PATCH 20/47] Minor. --- src/cgexplore/_internal/scram/enumeration.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/cgexplore/_internal/scram/enumeration.py b/src/cgexplore/_internal/scram/enumeration.py index 660efbcd..82370f05 100644 --- a/src/cgexplore/_internal/scram/enumeration.py +++ b/src/cgexplore/_internal/scram/enumeration.py @@ -871,10 +871,6 @@ def yield_graphs(self) -> abc.Generator[TopologyCode]: with self.graphs_path.open("r") as f: all_graphs = json.load(f) - logging.info( - "there are %s graphs, %s", len(all_graphs), self.graph_type - ) - for combination in all_graphs: topology_code = TopologyCode( vertex_map=combination, From bb46fb2a9f3a37824d69a9ffb8178d3659b01437 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Thu, 19 Dec 2024 22:37:55 +0100 Subject: [PATCH 21/47] Add new graph using new algorithm for 3-types. --- src/cgexplore/_internal/scram/enumeration.py | 126 ++++++++++++++++-- .../known_graphs/rx_4-4FG_6-2FG_4-1FG.json | 1 + 2 files changed, 117 insertions(+), 10 deletions(-) create mode 100644 src/cgexplore/_internal/scram/known_graphs/rx_4-4FG_6-2FG_4-1FG.json diff --git a/src/cgexplore/_internal/scram/enumeration.py b/src/cgexplore/_internal/scram/enumeration.py index 82370f05..8d4db267 100644 --- a/src/cgexplore/_internal/scram/enumeration.py +++ b/src/cgexplore/_internal/scram/enumeration.py @@ -622,8 +622,9 @@ class IHomolepticTopologyIterator: graph_type: str graph_set: Literal["rx", "nx", "rx_nodoubles"] = "rx" scale_multiplier = 5 + max_samples: int | None = None - def __post_init__(self) -> None: + def __post_init__(self) -> None: # noqa: PLR0915, PLR0912, C901 """Initialize.""" match self.graph_set: case "rx": @@ -632,7 +633,8 @@ def __post_init__(self) -> None: / "known_graphs" / f"rx_{self.graph_type}.json" ) - self.max_samples = int(1e4) + if self.max_samples is None: + self.max_samples = int(1e4) case "rx_nodoubles": self.graphs_path = ( @@ -640,7 +642,8 @@ def __post_init__(self) -> None: / "known_graphs" / f"rxnd_{self.graph_type}.json" ) - self.max_samples = int(1e5) + if self.max_samples is None: + self.max_samples = int(1e5) case "nx": self.graphs_path = ( @@ -704,7 +707,16 @@ def __post_init__(self) -> None: ) building_block_dict[building_block].append(vertex_id) vertex_counts[vertex_id] = num_functional_groups - if num_functional_groups == 2: # noqa: PLR2004 + if num_functional_groups == 1: + vertex_prototypes.append( + stk.cage.UnaligningVertex( + id=vertex_id, + position=position, + use_neighbor_placement=False, + ) + ) + + elif num_functional_groups == 2: # noqa: PLR2004 vertex_prototypes.append( stk.cage.AngledVertex( id=vertex_id, @@ -756,15 +768,10 @@ def get_vertex_prototypes(self, unaligning: bool) -> list[stk.Vertex]: return self.unaligned_vertex_prototypes return self.vertex_prototypes - def _define_all_graphs(self) -> None: + def _two_type_algorithm(self) -> None: combinations_tested = set() run_topology_codes = [] - num_types = len(self.vertex_types_by_fg.keys()) - if num_types != 2: # noqa: PLR2004 - msg = "not implemented for other types yet" - raise RuntimeError(msg) - type1, type2 = sorted(self.vertex_types_by_fg.keys(), reverse=True) itera1 = [ @@ -838,6 +845,103 @@ def _define_all_graphs(self) -> None: with self.graphs_path.open("w") as f: json.dump(to_save, f) + def _three_type_algorithm(self) -> None: + combinations_tested = set() + run_topology_codes = [] + + type1, type2, type3 = sorted( + self.vertex_types_by_fg.keys(), reverse=True + ) + + itera1 = [ + i + for i in self.reactable_vertex_ids + if i in self.vertex_types_by_fg[type1] + ] + + rng = np.random.default_rng(seed=100) + options1 = [ + i + for i in self.reactable_vertex_ids + if i in self.vertex_types_by_fg[type2] + ] + options2 = [ + i + for i in self.reactable_vertex_ids + if i in self.vertex_types_by_fg[type3] + ] + + to_save = [] + for _ in range(self.max_samples): + # Merging options1 and options2 because they both bind to itera. + mixed_options = options1 + options2 + rng.shuffle(mixed_options) + + # Build an edge selection. + combination = [ + tuple(sorted((i, j))) + for i, j in zip(itera1, mixed_options, strict=True) + ] + + # Need to check for nonsensical ones here. + # Check the number of egdes per vertex is correct. + counter = Counter([i for j in combination for i in j]) + if counter != self.vertex_counts: + continue + + # If are any self-reactions. + if any(abs(i - j) == 0 for i, j in combination): + continue + + topology_code = TopologyCode( + vertex_map=combination, + as_string=vmap_to_str(combination), + ) + + if ( + self.graph_set == "rx_nodoubles" + and topology_code.contains_doubles() + ): + continue + + # Check for string done. + if topology_code.as_string in combinations_tested: + continue + + combinations_tested.add(topology_code.as_string) + + # Convert TopologyCode to a graph. + current_graph = topology_code.get_graph() + + # Check that graph for isomorphism with others graphs. + passed_iso = True + for tc in run_topology_codes: + test_graph = tc.get_graph() + + if rx.is_isomorphic(current_graph, test_graph): + passed_iso = False + break + + if not passed_iso: + continue + + run_topology_codes.append(topology_code) + to_save.append(combination) + logging.info("found one at %s", _) + + with self.graphs_path.open("w") as f: + json.dump(to_save, f) + + def _define_all_graphs(self) -> None: + num_types = len(self.vertex_types_by_fg.keys()) + if num_types == 2: # noqa: PLR2004 + self._two_type_algorithm() + elif num_types == 3: # noqa: PLR2004 + self._three_type_algorithm() + else: + msg = "not implemented for other types yet" + raise RuntimeError(msg) + def count_graphs(self) -> int: """Count completely connected graphs in iteration.""" if not self.graphs_path.exists(): @@ -856,8 +960,10 @@ def count_graphs(self) -> int: num_components = rx.number_connected_components( topology_code.get_graph() ) + if num_components == 1: count += 1 + return count def yield_graphs(self) -> abc.Generator[TopologyCode]: diff --git a/src/cgexplore/_internal/scram/known_graphs/rx_4-4FG_6-2FG_4-1FG.json b/src/cgexplore/_internal/scram/known_graphs/rx_4-4FG_6-2FG_4-1FG.json new file mode 100644 index 00000000..9e933ad9 --- /dev/null +++ b/src/cgexplore/_internal/scram/known_graphs/rx_4-4FG_6-2FG_4-1FG.json @@ -0,0 +1 @@ +[[[0, 5], [0, 10], [0, 13], [0, 7], [1, 6], [1, 5], [1, 9], [1, 4], [2, 8], [2, 12], [2, 4], [2, 7], [3, 11], [3, 6], [3, 9], [3, 8]], [[0, 11], [0, 7], [0, 6], [0, 9], [1, 13], [1, 9], [1, 7], [1, 5], [2, 8], [2, 10], [2, 12], [2, 4], [3, 6], [3, 8], [3, 5], [3, 4]], [[0, 9], [0, 6], [0, 5], [0, 5], [1, 4], [1, 9], [1, 11], [1, 10], [2, 8], [2, 7], [2, 12], [2, 6], [3, 7], [3, 13], [3, 8], [3, 4]], [[0, 11], [0, 9], [0, 10], [0, 7], [1, 8], [1, 13], [1, 6], [1, 6], [2, 9], [2, 4], [2, 7], [2, 5], [3, 5], [3, 4], [3, 8], [3, 12]], [[0, 4], [0, 5], [0, 10], [0, 4], [1, 11], [1, 6], [1, 13], [1, 5], [2, 9], [2, 7], [2, 8], [2, 7], [3, 6], [3, 12], [3, 9], [3, 8]], [[0, 5], [0, 7], [0, 6], [0, 12], [1, 9], [1, 9], [1, 7], [1, 13], [2, 11], [2, 10], [2, 4], [2, 5], [3, 4], [3, 8], [3, 6], [3, 8]], [[0, 9], [0, 5], [0, 4], [0, 7], [1, 4], [1, 5], [1, 8], [1, 9], [2, 6], [2, 12], [2, 10], [2, 11], [3, 8], [3, 13], [3, 6], [3, 7]], [[0, 5], [0, 12], [0, 8], [0, 6], [1, 6], [1, 4], [1, 9], [1, 7], [2, 13], [2, 10], [2, 11], [2, 9], [3, 4], [3, 5], [3, 7], [3, 8]], [[0, 13], [0, 12], [0, 8], [0, 4], [1, 8], [1, 7], [1, 9], [1, 6], [2, 5], [2, 7], [2, 6], [2, 5], [3, 4], [3, 10], [3, 9], [3, 11]], [[0, 9], [0, 5], [0, 6], [0, 8], [1, 4], [1, 7], [1, 9], [1, 12], [2, 7], [2, 13], [2, 4], [2, 11], [3, 8], [3, 10], [3, 6], [3, 5]], [[0, 8], [0, 7], [0, 12], [0, 9], [1, 5], [1, 8], [1, 6], [1, 6], [2, 9], [2, 4], [2, 11], [2, 10], [3, 13], [3, 4], [3, 7], [3, 5]], [[0, 12], [0, 7], [0, 7], [0, 8], [1, 8], [1, 5], [1, 5], [1, 6], [2, 4], [2, 13], [2, 10], [2, 11], [3, 6], [3, 9], [3, 9], [3, 4]], [[0, 4], [0, 6], [0, 10], [0, 7], [1, 12], [1, 5], [1, 7], [1, 9], [2, 13], [2, 6], [2, 4], [2, 8], [3, 11], [3, 9], [3, 8], [3, 5]], [[0, 7], [0, 8], [0, 9], [0, 4], [1, 9], [1, 10], [1, 13], [1, 6], [2, 6], [2, 11], [2, 7], [2, 4], [3, 5], [3, 5], [3, 8], [3, 12]], [[0, 5], [0, 4], [0, 9], [0, 11], [1, 5], [1, 4], [1, 12], [1, 8], [2, 6], [2, 13], [2, 6], [2, 9], [3, 7], [3, 7], [3, 8], [3, 10]], [[0, 8], [0, 5], [0, 13], [0, 4], [1, 8], [1, 9], [1, 6], [1, 10], [2, 7], [2, 9], [2, 4], [2, 11], [3, 6], [3, 7], [3, 5], [3, 12]], [[0, 13], [0, 9], [0, 4], [0, 4], [1, 8], [1, 7], [1, 10], [1, 5], [2, 6], [2, 6], [2, 11], [2, 9], [3, 8], [3, 7], [3, 12], [3, 5]], [[0, 4], [0, 7], [0, 9], [0, 9], [1, 12], [1, 10], [1, 11], [1, 8], [2, 7], [2, 5], [2, 6], [2, 13], [3, 4], [3, 6], [3, 5], [3, 8]], [[0, 11], [0, 8], [0, 6], [0, 12], [1, 6], [1, 7], [1, 4], [1, 4], [2, 8], [2, 5], [2, 7], [2, 9], [3, 10], [3, 13], [3, 9], [3, 5]], [[0, 5], [0, 8], [0, 5], [0, 8], [1, 9], [1, 6], [1, 4], [1, 7], [2, 7], [2, 10], [2, 12], [2, 9], [3, 6], [3, 13], [3, 11], [3, 4]], [[0, 7], [0, 7], [0, 5], [0, 8], [1, 6], [1, 9], [1, 11], [1, 12], [2, 5], [2, 8], [2, 9], [2, 6], [3, 4], [3, 13], [3, 10], [3, 4]], [[0, 6], [0, 12], [0, 10], [0, 5], [1, 9], [1, 8], [1, 8], [1, 7], [2, 11], [2, 5], [2, 9], [2, 6], [3, 7], [3, 4], [3, 13], [3, 4]], [[0, 8], [0, 7], [0, 6], [0, 7], [1, 13], [1, 12], [1, 5], [1, 4], [2, 11], [2, 8], [2, 10], [2, 4], [3, 6], [3, 5], [3, 9], [3, 9]], [[0, 12], [0, 10], [0, 11], [0, 9], [1, 7], [1, 4], [1, 5], [1, 13], [2, 8], [2, 5], [2, 7], [2, 4], [3, 6], [3, 6], [3, 8], [3, 9]], [[0, 4], [0, 5], [0, 5], [0, 12], [1, 6], [1, 6], [1, 7], [1, 9], [2, 8], [2, 4], [2, 10], [2, 8], [3, 9], [3, 13], [3, 7], [3, 11]], [[0, 12], [0, 6], [0, 7], [0, 8], [1, 9], [1, 5], [1, 5], [1, 9], [2, 11], [2, 13], [2, 6], [2, 7], [3, 8], [3, 10], [3, 4], [3, 4]], [[0, 9], [0, 6], [0, 6], [0, 4], [1, 8], [1, 5], [1, 8], [1, 4], [2, 10], [2, 7], [2, 13], [2, 7], [3, 12], [3, 11], [3, 5], [3, 9]], [[0, 9], [0, 13], [0, 5], [0, 7], [1, 9], [1, 4], [1, 10], [1, 6], [2, 11], [2, 8], [2, 8], [2, 5], [3, 4], [3, 12], [3, 6], [3, 7]], [[0, 10], [0, 5], [0, 8], [0, 4], [1, 11], [1, 6], [1, 13], [1, 9], [2, 6], [2, 12], [2, 9], [2, 4], [3, 8], [3, 5], [3, 7], [3, 7]], [[0, 12], [0, 8], [0, 7], [0, 7], [1, 5], [1, 11], [1, 5], [1, 4], [2, 8], [2, 6], [2, 4], [2, 13], [3, 9], [3, 6], [3, 9], [3, 10]], [[0, 9], [0, 12], [0, 8], [0, 7], [1, 4], [1, 10], [1, 6], [1, 6], [2, 11], [2, 13], [2, 5], [2, 5], [3, 4], [3, 8], [3, 7], [3, 9]], [[0, 9], [0, 4], [0, 6], [0, 8], [1, 8], [1, 7], [1, 4], [1, 9], [2, 12], [2, 10], [2, 5], [2, 5], [3, 13], [3, 11], [3, 7], [3, 6]], [[0, 8], [0, 7], [0, 10], [0, 7], [1, 5], [1, 9], [1, 9], [1, 8], [2, 4], [2, 13], [2, 5], [2, 11], [3, 4], [3, 6], [3, 12], [3, 6]], [[0, 4], [0, 10], [0, 11], [0, 8], [1, 6], [1, 8], [1, 5], [1, 6], [2, 13], [2, 9], [2, 12], [2, 5], [3, 9], [3, 4], [3, 7], [3, 7]], [[0, 11], [0, 9], [0, 4], [0, 10], [1, 6], [1, 9], [1, 6], [1, 4], [2, 7], [2, 8], [2, 5], [2, 12], [3, 13], [3, 7], [3, 8], [3, 5]], [[0, 5], [0, 9], [0, 13], [0, 6], [1, 7], [1, 8], [1, 10], [1, 4], [2, 6], [2, 12], [2, 4], [2, 11], [3, 7], [3, 9], [3, 5], [3, 8]], [[0, 4], [0, 4], [0, 5], [0, 5], [1, 6], [1, 11], [1, 6], [1, 8], [2, 9], [2, 9], [2, 8], [2, 13], [3, 10], [3, 12], [3, 7], [3, 7]], [[0, 7], [0, 9], [0, 6], [0, 11], [1, 12], [1, 5], [1, 7], [1, 10], [2, 9], [2, 5], [2, 13], [2, 6], [3, 8], [3, 8], [3, 4], [3, 4]], [[0, 8], [0, 7], [0, 10], [0, 9], [1, 5], [1, 12], [1, 5], [1, 11], [2, 8], [2, 13], [2, 9], [2, 6], [3, 6], [3, 7], [3, 4], [3, 4]], [[0, 4], [0, 12], [0, 8], [0, 10], [1, 6], [1, 9], [1, 5], [1, 11], [2, 6], [2, 4], [2, 5], [2, 9], [3, 13], [3, 7], [3, 7], [3, 8]], [[0, 5], [0, 6], [0, 4], [0, 8], [1, 11], [1, 12], [1, 7], [1, 9], [2, 7], [2, 5], [2, 6], [2, 8], [3, 10], [3, 13], [3, 9], [3, 4]], [[0, 5], [0, 4], [0, 7], [0, 8], [1, 6], [1, 10], [1, 6], [1, 11], [2, 8], [2, 12], [2, 4], [2, 9], [3, 5], [3, 7], [3, 13], [3, 9]], [[0, 9], [0, 6], [0, 5], [0, 7], [1, 8], [1, 4], [1, 6], [1, 5], [2, 11], [2, 7], [2, 13], [2, 8], [3, 9], [3, 12], [3, 4], [3, 10]], [[0, 4], [0, 9], [0, 9], [0, 6], [1, 4], [1, 6], [1, 5], [1, 5], [2, 11], [2, 8], [2, 10], [2, 13], [3, 8], [3, 7], [3, 12], [3, 7]], [[0, 7], [0, 10], [0, 6], [0, 6], [1, 11], [1, 4], [1, 13], [1, 4], [2, 9], [2, 8], [2, 7], [2, 8], [3, 12], [3, 5], [3, 9], [3, 5]], [[0, 10], [0, 7], [0, 11], [0, 12], [1, 6], [1, 5], [1, 6], [1, 8], [2, 7], [2, 9], [2, 4], [2, 4], [3, 5], [3, 13], [3, 9], [3, 8]], [[0, 8], [0, 7], [0, 5], [0, 10], [1, 9], [1, 7], [1, 9], [1, 8], [2, 4], [2, 13], [2, 5], [2, 4], [3, 12], [3, 6], [3, 11], [3, 6]], [[0, 6], [0, 5], [0, 6], [0, 8], [1, 11], [1, 4], [1, 7], [1, 9], [2, 5], [2, 8], [2, 4], [2, 7], [3, 10], [3, 9], [3, 12], [3, 13]], [[0, 9], [0, 7], [0, 7], [0, 4], [1, 4], [1, 11], [1, 10], [1, 9], [2, 6], [2, 5], [2, 8], [2, 6], [3, 12], [3, 13], [3, 8], [3, 5]], [[0, 8], [0, 12], [0, 4], [0, 8], [1, 5], [1, 11], [1, 5], [1, 9], [2, 4], [2, 6], [2, 9], [2, 7], [3, 6], [3, 13], [3, 10], [3, 7]], [[0, 11], [0, 9], [0, 9], [0, 4], [1, 12], [1, 7], [1, 7], [1, 4], [2, 5], [2, 6], [2, 5], [2, 13], [3, 10], [3, 8], [3, 8], [3, 6]], [[0, 13], [0, 8], [0, 11], [0, 5], [1, 12], [1, 4], [1, 10], [1, 7], [2, 5], [2, 6], [2, 8], [2, 9], [3, 6], [3, 7], [3, 9], [3, 4]], [[0, 7], [0, 7], [0, 4], [0, 5], [1, 4], [1, 8], [1, 5], [1, 8], [2, 6], [2, 6], [2, 13], [2, 11], [3, 9], [3, 12], [3, 9], [3, 10]], [[0, 7], [0, 7], [0, 6], [0, 9], [1, 4], [1, 12], [1, 8], [1, 11], [2, 9], [2, 5], [2, 5], [2, 6], [3, 10], [3, 8], [3, 4], [3, 13]], [[0, 6], [0, 4], [0, 5], [0, 9], [1, 11], [1, 8], [1, 7], [1, 7], [2, 8], [2, 5], [2, 6], [2, 9], [3, 4], [3, 10], [3, 13], [3, 12]], [[0, 7], [0, 8], [0, 6], [0, 7], [1, 4], [1, 9], [1, 4], [1, 9], [2, 8], [2, 13], [2, 5], [2, 10], [3, 11], [3, 6], [3, 12], [3, 5]], [[0, 13], [0, 7], [0, 12], [0, 5], [1, 9], [1, 6], [1, 9], [1, 6], [2, 8], [2, 8], [2, 5], [2, 7], [3, 4], [3, 11], [3, 4], [3, 10]], [[0, 10], [0, 4], [0, 5], [0, 4], [1, 6], [1, 6], [1, 7], [1, 7], [2, 8], [2, 9], [2, 9], [2, 12], [3, 5], [3, 13], [3, 11], [3, 8]], [[0, 4], [0, 8], [0, 5], [0, 8], [1, 7], [1, 6], [1, 5], [1, 12], [2, 6], [2, 13], [2, 10], [2, 11], [3, 7], [3, 4], [3, 9], [3, 9]], [[0, 4], [0, 6], [0, 8], [0, 5], [1, 10], [1, 12], [1, 13], [1, 4], [2, 5], [2, 7], [2, 11], [2, 7], [3, 9], [3, 6], [3, 8], [3, 9]], [[0, 7], [0, 13], [0, 12], [0, 11], [1, 9], [1, 9], [1, 8], [1, 8], [2, 6], [2, 10], [2, 6], [2, 4], [3, 7], [3, 5], [3, 5], [3, 4]], [[0, 5], [0, 4], [0, 12], [0, 8], [1, 10], [1, 8], [1, 5], [1, 4], [2, 6], [2, 9], [2, 13], [2, 7], [3, 6], [3, 7], [3, 11], [3, 9]], [[0, 4], [0, 4], [0, 5], [0, 5], [1, 7], [1, 9], [1, 8], [1, 7], [2, 10], [2, 12], [2, 6], [2, 11], [3, 6], [3, 13], [3, 8], [3, 9]], [[0, 10], [0, 13], [0, 11], [0, 12], [1, 9], [1, 5], [1, 6], [1, 9], [2, 8], [2, 6], [2, 4], [2, 7], [3, 7], [3, 8], [3, 5], [3, 4]], [[0, 10], [0, 11], [0, 12], [0, 13], [1, 5], [1, 7], [1, 4], [1, 8], [2, 5], [2, 8], [2, 6], [2, 6], [3, 9], [3, 4], [3, 9], [3, 7]], [[0, 7], [0, 8], [0, 6], [0, 7], [1, 12], [1, 10], [1, 11], [1, 13], [2, 5], [2, 6], [2, 9], [2, 9], [3, 8], [3, 4], [3, 5], [3, 4]], [[0, 5], [0, 8], [0, 7], [0, 4], [1, 9], [1, 6], [1, 6], [1, 12], [2, 10], [2, 9], [2, 13], [2, 11], [3, 5], [3, 7], [3, 8], [3, 4]], [[0, 9], [0, 6], [0, 8], [0, 4], [1, 7], [1, 5], [1, 6], [1, 4], [2, 8], [2, 7], [2, 9], [2, 5], [3, 10], [3, 11], [3, 13], [3, 12]], [[0, 7], [0, 10], [0, 6], [0, 13], [1, 9], [1, 5], [1, 8], [1, 4], [2, 6], [2, 7], [2, 12], [2, 11], [3, 9], [3, 5], [3, 8], [3, 4]], [[0, 9], [0, 4], [0, 4], [0, 9], [1, 8], [1, 13], [1, 11], [1, 12], [2, 7], [2, 6], [2, 10], [2, 5], [3, 7], [3, 6], [3, 5], [3, 8]], [[0, 13], [0, 9], [0, 8], [0, 4], [1, 4], [1, 9], [1, 8], [1, 12], [2, 7], [2, 7], [2, 6], [2, 6], [3, 11], [3, 10], [3, 5], [3, 5]], [[0, 11], [0, 13], [0, 6], [0, 5], [1, 4], [1, 8], [1, 4], [1, 8], [2, 10], [2, 12], [2, 6], [2, 5], [3, 7], [3, 7], [3, 9], [3, 9]], [[0, 9], [0, 7], [0, 5], [0, 8], [1, 13], [1, 6], [1, 6], [1, 10], [2, 11], [2, 4], [2, 12], [2, 4], [3, 8], [3, 9], [3, 7], [3, 5]], [[0, 9], [0, 9], [0, 7], [0, 7], [1, 5], [1, 6], [1, 5], [1, 6], [2, 4], [2, 11], [2, 4], [2, 13], [3, 12], [3, 8], [3, 8], [3, 10]], [[0, 9], [0, 7], [0, 7], [0, 9], [1, 4], [1, 6], [1, 10], [1, 4], [2, 12], [2, 6], [2, 11], [2, 13], [3, 5], [3, 8], [3, 5], [3, 8]], [[0, 8], [0, 9], [0, 7], [0, 7], [1, 10], [1, 12], [1, 13], [1, 11], [2, 4], [2, 6], [2, 6], [2, 4], [3, 5], [3, 8], [3, 9], [3, 5]], [[0, 6], [0, 4], [0, 5], [0, 9], [1, 7], [1, 7], [1, 8], [1, 8], [2, 9], [2, 5], [2, 6], [2, 4], [3, 11], [3, 12], [3, 13], [3, 10]]] \ No newline at end of file From aee33d38b9b2cefb8bc63cec157c79b0c00634d8 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Thu, 19 Dec 2024 22:38:22 +0100 Subject: [PATCH 22/47] Add a step to optimiser, which seems to improve results. --- src/cgexplore/_internal/scram/construction.py | 74 +++++++++++-------- 1 file changed, 43 insertions(+), 31 deletions(-) diff --git a/src/cgexplore/_internal/scram/construction.py b/src/cgexplore/_internal/scram/construction.py index 9ad7da6c..f5f46460 100644 --- a/src/cgexplore/_internal/scram/construction.py +++ b/src/cgexplore/_internal/scram/construction.py @@ -149,30 +149,31 @@ def graph_optimise_cage( # noqa: PLR0913 nx.kamada_kawai_layout(stko_graph.get_graph(), dim=3), ) ): - try: - # We allow these to independantly failed because the nx graphs can - # be ridiculous. - pos_mat = np.array([nx_positions[i] for i in nx_positions]) - if pos_mat.shape[1] != 3: # noqa: PLR2004 - msg = "built a non 3D graph" - raise RuntimeError(msg) - - test_molecule = conformer.molecule.with_position_matrix( - pos_mat * 10 - ) - conformer = run_optimisation( - assigned_system=forcefield.assign_terms( - test_molecule, name, output_dir - ), - name=name, - file_suffix="nopt", - output_dir=output_dir, - platform=platform, - ) - - ensemble.add_conformer(conformer=conformer, source=f"nx{i}") - except OpenMMException: - logging.info("failed graph opt of %s", name) + # We allow these to independantly failed because the nx graphs can + # be ridiculous. + for j, scaler in enumerate((5, 10, 15)): + try: + pos_mat = np.array([nx_positions[i] for i in nx_positions]) + if pos_mat.shape[1] != 3: # noqa: PLR2004 + msg = "built a non 3D graph" + raise RuntimeError(msg) + + test_molecule = conformer.molecule.with_position_matrix( + pos_mat * scaler + ) + conformer = run_optimisation( + assigned_system=forcefield.assign_terms( + test_molecule, name, output_dir + ), + name=name, + file_suffix="nopt", + output_dir=output_dir, + platform=platform, + ) + + ensemble.add_conformer(conformer=conformer, source=f"nx{i}{j}") + except OpenMMException: + logging.info("failed graph opt of %s", name) # Try with graph positions. rng = np.random.default_rng(seed=100) @@ -328,13 +329,24 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 # Add neighbours to systematic scan. if "scan" in name: - si, sj = name.split("_")[1].split("-") - - potential_names = [ - f"scan_{int(si)-1}-{int(sj)-1}", - f"scan_{int(si)-1}-{int(sj)}", - f"scan_{int(si)}-{int(sj)-1}", - ] + if "ufo" in name: + _, multiplier, sisj = name.split("_") + si, sj = sisj.split("-") + + potential_names = [ + f"ufoscan_{multiplier}_{int(si)-1}-{int(sj)-1}", + f"ufoscan_{multiplier}_{int(si)-1}-{int(sj)}", + f"ufoscan_{multiplier}_{int(si)}-{int(sj)-1}", + ] + + else: + si, sj = name.split("_")[1].split("-") + + potential_names = [ + f"scan_{int(si)-1}-{int(sj)-1}", + f"scan_{int(si)-1}-{int(sj)}", + f"scan_{int(si)}-{int(sj)-1}", + ] elif "ts_" in name: _, tstr, si, sj, _at = name.split("_") From aad02570a3002d07d744f0c082c0351bdc15af71 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sun, 29 Dec 2024 23:02:40 +0100 Subject: [PATCH 23/47] New graph set. --- .../_internal/scram/known_graphs/rx_2-4FG_3-2FG_2-1FG.json | 1 + 1 file changed, 1 insertion(+) create mode 100644 src/cgexplore/_internal/scram/known_graphs/rx_2-4FG_3-2FG_2-1FG.json diff --git a/src/cgexplore/_internal/scram/known_graphs/rx_2-4FG_3-2FG_2-1FG.json b/src/cgexplore/_internal/scram/known_graphs/rx_2-4FG_3-2FG_2-1FG.json new file mode 100644 index 00000000..6472c87c --- /dev/null +++ b/src/cgexplore/_internal/scram/known_graphs/rx_2-4FG_3-2FG_2-1FG.json @@ -0,0 +1 @@ +[[[0, 3], [0, 2], [0, 6], [0, 5], [1, 4], [1, 4], [1, 3], [1, 2]], [[0, 3], [0, 4], [0, 4], [0, 5], [1, 3], [1, 2], [1, 2], [1, 6]], [[0, 4], [0, 3], [0, 5], [0, 2], [1, 6], [1, 3], [1, 2], [1, 4]], [[0, 4], [0, 4], [0, 3], [0, 3], [1, 5], [1, 6], [1, 2], [1, 2]]] \ No newline at end of file From 6461cb42d9ded13cdd468d15139d904fe16d9419 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sun, 29 Dec 2024 23:03:01 +0100 Subject: [PATCH 24/47] Minor. --- src/cgexplore/_internal/scram/construction.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cgexplore/_internal/scram/construction.py b/src/cgexplore/_internal/scram/construction.py index f5f46460..61e6b78c 100644 --- a/src/cgexplore/_internal/scram/construction.py +++ b/src/cgexplore/_internal/scram/construction.py @@ -79,12 +79,12 @@ def graph_optimise_cage( # noqa: PLR0913 return ensemble.get_lowest_e_conformer() assigned_system = forcefield.assign_terms(molecule, name, output_dir) - if (output_dir / f"{name}_ensemblewip.xyz").exists(): - (output_dir / f"{name}_ensemblewip.xyz").unlink() + if (output_dir / f"{name}_ensemble.xyz").exists(): + (output_dir / f"{name}_ensemble.xyz").unlink() ensemble = Ensemble( base_molecule=molecule, base_mol_path=output_dir / f"{name}_base.mol", - conformer_xyz=output_dir / f"{name}_ensemblewip.xyz", + conformer_xyz=output_dir / f"{name}_ensemble.xyz", data_json=output_dir / f"{name}_ensemble.json", overwrite=True, ) From 01382951d2b242580361625199473eb5ed5b4c08 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sun, 29 Dec 2024 23:03:15 +0100 Subject: [PATCH 25/47] Allow custom number of components. --- src/cgexplore/_internal/scram/enumeration.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cgexplore/_internal/scram/enumeration.py b/src/cgexplore/_internal/scram/enumeration.py index 8d4db267..e05c1c69 100644 --- a/src/cgexplore/_internal/scram/enumeration.py +++ b/src/cgexplore/_internal/scram/enumeration.py @@ -622,6 +622,7 @@ class IHomolepticTopologyIterator: graph_type: str graph_set: Literal["rx", "nx", "rx_nodoubles"] = "rx" scale_multiplier = 5 + allowed_num_components: int = 1 max_samples: int | None = None def __post_init__(self) -> None: # noqa: PLR0915, PLR0912, C901 @@ -961,7 +962,7 @@ def count_graphs(self) -> int: topology_code.get_graph() ) - if num_components == 1: + if num_components == self.allowed_num_components: count += 1 return count @@ -986,5 +987,5 @@ def yield_graphs(self) -> abc.Generator[TopologyCode]: num_components = rx.number_connected_components( topology_code.get_graph() ) - if num_components == 1: + if num_components == self.allowed_num_components: yield topology_code From ddb5935b5aea716c1cbfc957a3e6580a1c69fa27 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 4 Jan 2025 20:37:35 +0100 Subject: [PATCH 26/47] Add forcefield dict. --- optimisation_example/optimsation_test.py | 55 +---------------- .../_internal/forcefields/forcefield.py | 59 +++++++++++++++++++ .../_internal/forcefields/utilities.py | 2 - 3 files changed, 62 insertions(+), 54 deletions(-) diff --git a/optimisation_example/optimsation_test.py b/optimisation_example/optimsation_test.py index 7d00a2e7..012a729d 100644 --- a/optimisation_example/optimsation_test.py +++ b/optimisation_example/optimsation_test.py @@ -112,60 +112,11 @@ def analyse_cage( ) if "forcefield_dict" not in properties: - # This is matched to the existing analysis code. I recommend - # generalising in the future. - ff_targets = forcefield.get_targets() - k_dict = {} - v_dict = {} - - for bt in ff_targets["bonds"]: - cp = (bt.type1, bt.type2) - k_dict["_".join(cp)] = bt.bond_k.value_in_unit( - openmm.unit.kilojoule - / openmm.unit.mole - / openmm.unit.nanometer**2 - ) - v_dict["_".join(cp)] = bt.bond_r.value_in_unit( - openmm.unit.angstrom - ) - - for at in ff_targets["angles"]: - cp = (at.type1, at.type2, at.type3) - try: - k_dict["_".join(cp)] = at.angle_k.value_in_unit( - openmm.unit.kilojoule - / openmm.unit.mole - / openmm.unit.radian**2 - ) - v_dict["_".join(cp)] = at.angle.value_in_unit( - openmm.unit.degrees - ) - except TypeError: - # Handle different angle types. - k_dict["_".join(cp)] = at.angle_k.value_in_unit( - openmm.unit.kilojoule / openmm.unit.mole - ) - v_dict["_".join(cp)] = at.angle.value_in_unit( - openmm.unit.degrees - ) - - for at in ff_targets["torsions"]: - cp = at.search_string - k_dict["_".join(cp)] = at.torsion_k.value_in_unit( - openmm.unit.kilojoules_per_mole - ) - v_dict["_".join(cp)] = at.phi0.value_in_unit(openmm.unit.degrees) - - forcefield_dict = { - "ff_id": forcefield.get_identifier(), - "ff_prefix": forcefield.get_prefix(), - "v_dict": v_dict, - "k_dict": k_dict, - } - database.add_properties( key=name, - property_dict={"forcefield_dict": forcefield_dict}, + property_dict={ + "forcefield_dict": forcefield.get_forcefield_dictionary() + }, ) diff --git a/src/cgexplore/_internal/forcefields/forcefield.py b/src/cgexplore/_internal/forcefields/forcefield.py index 392edbd7..ea2dde11 100644 --- a/src/cgexplore/_internal/forcefields/forcefield.py +++ b/src/cgexplore/_internal/forcefields/forcefield.py @@ -735,6 +735,65 @@ def __repr__(self) -> str: """Return a string representation of the Ensemble.""" return str(self) + def get_forcefield_dictionary(self) -> dict[str, str | dict]: + """Get the underlying forcefield dict.""" + # This is matched to the existing analysis code. I recommend + # generalising in the future. + ff_targets = self.get_targets() + k_dict = {} + v_dict = {} + + for bt in ff_targets["bonds"]: + cp = (bt.type1, bt.type2) + k_dict["_".join(cp)] = bt.bond_k.value_in_unit( + openmm.unit.kilojoule + / openmm.unit.mole + / openmm.unit.nanometer**2 + ) + v_dict["_".join(cp)] = bt.bond_r.value_in_unit( + openmm.unit.angstrom + ) + + for at in ff_targets["angles"]: + cp = (at.type1, at.type2, at.type3) + try: + k_dict["_".join(cp)] = at.angle_k.value_in_unit( + openmm.unit.kilojoule + / openmm.unit.mole + / openmm.unit.radian**2 + ) + v_dict["_".join(cp)] = at.angle.value_in_unit( + openmm.unit.degrees + ) + except TypeError: + # Handle different angle types. + k_dict["_".join(cp)] = at.angle_k.value_in_unit( + openmm.unit.kilojoule / openmm.unit.mole + ) + v_dict["_".join(cp)] = (at.n, at.b) + + for at in ff_targets["torsions"]: + cp = at.search_string + k_dict["_".join(cp)] = at.torsion_k.value_in_unit( + openmm.unit.kilojoules_per_mole + ) + v_dict["_".join(cp)] = at.phi0.value_in_unit(openmm.unit.degrees) + + for at in ff_targets["nonbondeds"]: + v_dict[at.bead_class] = at.sigma.value_in_unit( + openmm.unit.angstrom + ) + k_dict[at.bead_class] = at.epsilon.value_in_unit( + openmm.unit.kilojoules_per_mole + ) + + return { + "ff_id": self.get_identifier(), + "ff_prefix": self.get_prefix(), + "k_dict": k_dict, + "v_dict": v_dict, + } + class MartiniForceField(ForceField): """Class defining a Martini Forcefield.""" diff --git a/src/cgexplore/_internal/forcefields/utilities.py b/src/cgexplore/_internal/forcefields/utilities.py index b805e595..01935152 100644 --- a/src/cgexplore/_internal/forcefields/utilities.py +++ b/src/cgexplore/_internal/forcefields/utilities.py @@ -1,5 +1,3 @@ -# Distributed under the terms of the MIT License. - """Utilities module.""" from openmm import openmm From 60afa01751d3a645c8b4b73496a92e5837d4400f Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 4 Jan 2025 20:37:47 +0100 Subject: [PATCH 27/47] Update steric six bead. --- .../_internal/molecular/molecule_construction.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/cgexplore/_internal/molecular/molecule_construction.py b/src/cgexplore/_internal/molecular/molecule_construction.py index 3fc89ae4..befd7bf7 100644 --- a/src/cgexplore/_internal/molecular/molecule_construction.py +++ b/src/cgexplore/_internal/molecular/molecule_construction.py @@ -643,22 +643,25 @@ def __init__( bead: CgBead, abead1: CgBead, abead2: CgBead, + ibead: CgBead, sbead: CgBead, ) -> None: """Initialize a precursor.""" self._bead = bead self._abead1 = abead1 self._abead2 = abead2 + self._ibead = ibead self._sbead = sbead self._name = ( f"6S2{bead.bead_type}{abead1.bead_type}{abead2.bead_type}" - f"{sbead.bead_type}" + f"{ibead.bead_type}{sbead.bead_type}" ) self._bead_set = { bead.bead_type: bead, abead1.bead_type: abead1, abead2.bead_type: abead2, sbead.bead_type: sbead, + ibead.bead_type: ibead, } new_fgs = stk.SmartsFunctionalGroupFactory( @@ -670,8 +673,8 @@ def __init__( self._building_block = stk.BuildingBlock( smiles=( f"[{abead2.element_string}][{abead1.element_string}]" - f"[{bead.element_string}][{bead.element_string}]" - f"([{sbead.element_string}])[{bead.element_string}]" + f"[{bead.element_string}][{ibead.element_string}]" + f"[{bead.element_string}]" f"[{abead1.element_string}][{abead2.element_string}]" ), functional_groups=new_fgs, @@ -681,7 +684,6 @@ def __init__( [-4, 2, 0], [-2, 0.1, 0], [0, 0.1, 0], - [0, 1, 0], [2, 0, 0], [4, 2, 0], [6, 3, 0.2], From 079021d0d3ef617e475f80f63ac558cc40cc2a92 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 4 Jan 2025 20:38:02 +0100 Subject: [PATCH 28/47] Add potential name. --- src/cgexplore/_internal/scram/construction.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cgexplore/_internal/scram/construction.py b/src/cgexplore/_internal/scram/construction.py index 61e6b78c..e73ffe83 100644 --- a/src/cgexplore/_internal/scram/construction.py +++ b/src/cgexplore/_internal/scram/construction.py @@ -357,6 +357,7 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 f"ts_{tstr}_{int(si)-1}_{int(sj)-1}_{i}", f"ts_{tstr}_{int(si)-1}_{int(sj)}_{i}", f"ts_{tstr}_{int(si)}_{int(sj)-1}_{i}", + f"ts_{tstr}_{int(si)}_{int(sj)}_{i}", ] ) else: @@ -366,6 +367,7 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 potential_file = output_dir / f"{potential_name}_final.mol" if not potential_file.exists(): continue + test_molecule = temp_molecule.with_structure_from_file(potential_file) conformer = run_optimisation( assigned_system=AssignedSystem( From 9e42b7c3c6f83b54191872704b9d340a56f064f2 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sun, 5 Jan 2025 18:40:46 +0100 Subject: [PATCH 29/47] Update comment. --- first_paper_example/analysis.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/first_paper_example/analysis.py b/first_paper_example/analysis.py index 4ebb2b50..65752a6e 100644 --- a/first_paper_example/analysis.py +++ b/first_paper_example/analysis.py @@ -122,8 +122,10 @@ def analyse_cage( lig_shape_measures = liga_shape.calculate(l_shape_mol) l_shape_mol.write(shape_molfile2) - # Always want to extract target torions if present. g_measure = cgx.analysis.GeomMeasure( + # Always want to extract target torions if present, in toff, they + # are no in the forcefield, so cannot use + # `GeomMeasure.from_forcefield`. target_torsions=( cgx.terms.TargetTorsion( search_string=("b1", "a1", "c1", "a1", "b1"), From ebe5750cf377259890401fd3acf913d4800becf1 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sun, 5 Jan 2025 18:41:08 +0100 Subject: [PATCH 30/47] Refactor energy per bb. --- optimisation_example/optimsation_test.py | 38 ++----------------- .../_internal/utilities/utilities.py | 31 +++++++++++++++ src/cgexplore/utilities.py | 2 + 3 files changed, 37 insertions(+), 34 deletions(-) diff --git a/optimisation_example/optimsation_test.py b/optimisation_example/optimsation_test.py index 012a729d..5bd43604 100644 --- a/optimisation_example/optimsation_test.py +++ b/optimisation_example/optimsation_test.py @@ -50,41 +50,11 @@ def analyse_cage( ) if "energy_per_bb" not in properties: - energy_decomp = {} - for component in properties["energy_decomposition"]: - component_tup = properties["energy_decomposition"][component] - if component == "total energy": - energy_decomp[f"{component}_{component_tup[1]}"] = float( - component_tup[0] - ) - else: - just_name = component.split("'")[1] - key = f"{just_name}_{component_tup[1]}" - value = float(component_tup[0]) - if key in energy_decomp: - energy_decomp[key] += value - else: - energy_decomp[key] = value - fin_energy = energy_decomp["total energy_kJ/mol"] - try: - assert ( # noqa: S101 - sum( - energy_decomp[i] - for i in energy_decomp - if "total energy" not in i - ) - == fin_energy - ) - except AssertionError as ex: - ex.add_note( - "energy decompisition does not sum to total energy for" - f" {name}: {energy_decomp}" - ) - raise res_dict = { - "strain_energy": fin_energy, - "energy_per_bb": fin_energy - / cgx.topologies.stoich_map(topology_str), + "energy_per_bb": cgx.utilities.get_energy_per_bb( + energy_decomposition=properties["energy_decomposition"], + number_building_blocks=cgx.topologies.stoich_map(topology_str), + ), } database.add_properties(key=name, property_dict=res_dict) diff --git a/src/cgexplore/_internal/utilities/utilities.py b/src/cgexplore/_internal/utilities/utilities.py index 0096f459..13ec28c8 100644 --- a/src/cgexplore/_internal/utilities/utilities.py +++ b/src/cgexplore/_internal/utilities/utilities.py @@ -107,3 +107,34 @@ def extract_property( msg = f"{path} is too deep ({len(path)})." raise RuntimeError(msg) return value + + +def get_energy_per_bb( + energy_decomposition: dict[str, tuple[float, str]], + number_building_blocks: int, +) -> float: + """Get the energy per building blocks used in most papers.""" + energy_decomp = {} + for component, component_tup in energy_decomposition.items(): + if component == "total energy": + energy_decomp[f"{component}_{component_tup[1]}"] = float( + component_tup[0] + ) + else: + just_name = component.split("'")[1] + key = f"{just_name}_{component_tup[1]}" + value = float(component_tup[0]) + if key in energy_decomp: + energy_decomp[key] += value + else: + energy_decomp[key] = value + + fin_energy = energy_decomp["total energy_kJ/mol"] + if ( + sum(energy_decomp[i] for i in energy_decomp if "total energy" not in i) + != fin_energy + ): + msg = "energy decompisition does not sum to total energy" + raise RuntimeError(msg) + + return fin_energy / number_building_blocks diff --git a/src/cgexplore/utilities.py b/src/cgexplore/utilities.py index 6efff682..02609a1d 100644 --- a/src/cgexplore/utilities.py +++ b/src/cgexplore/utilities.py @@ -27,6 +27,7 @@ convert_pyramid_angle, draw_pie, extract_property, + get_energy_per_bb, ) from cgexplore._internal.utilities.visualisation import ( Pymol, @@ -45,6 +46,7 @@ "convert_pyramid_angle", "draw_pie", "extract_property", + "get_energy_per_bb", "get_supramolecule", "get_unforced_supramolecule", "optimise_ligand", From d0b836a7e297085da32698b3c6473bd93ce8f39c Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sun, 5 Jan 2025 18:41:25 +0100 Subject: [PATCH 31/47] Add steric precursor. --- .../molecular/molecule_construction.py | 60 ++++++++++++++++++- src/cgexplore/molecular.py | 2 + 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/cgexplore/_internal/molecular/molecule_construction.py b/src/cgexplore/_internal/molecular/molecule_construction.py index befd7bf7..691827ce 100644 --- a/src/cgexplore/_internal/molecular/molecule_construction.py +++ b/src/cgexplore/_internal/molecular/molecule_construction.py @@ -638,6 +638,61 @@ def __init__(self, bead: CgBead, abead1: CgBead, abead2: CgBead) -> None: class StericSixBead(Precursor): """A Precursor.""" + def __init__( + self, + bead: CgBead, + abead1: CgBead, + abead2: CgBead, + ibead: CgBead, + ) -> None: + """Initialize a precursor.""" + self._bead = bead + self._abead1 = abead1 + self._abead2 = abead2 + self._ibead = ibead + + self._name = ( + f"6S2{bead.bead_type}{abead1.bead_type}{abead2.bead_type}" + f"{ibead.bead_type}" + ) + self._bead_set = { + bead.bead_type: bead, + abead1.bead_type: abead1, + abead2.bead_type: abead2, + ibead.bead_type: ibead, + } + + new_fgs = stk.SmartsFunctionalGroupFactory( + smarts=f"[{abead2.element_string}X1][{abead1.element_string}]", + bonders=(0,), + deleters=(), + placers=(0, 1), + ) + self._building_block = stk.BuildingBlock( + smiles=( + f"[{abead2.element_string}][{abead1.element_string}]" + f"[{bead.element_string}][{ibead.element_string}]" + f"[{bead.element_string}]" + f"[{abead1.element_string}][{abead2.element_string}]" + ), + functional_groups=new_fgs, + position_matrix=np.array( + [ + [-6, 3, 0.2], + [-4, 2, 0], + [-2, 0.1, 0], + [0, 0.1, 0], + [2, 0, 0], + [4, 2, 0], + [6, 3, 0.2], + ] + ), + ) + + +class StericSevenBead(Precursor): + """A Precursor.""" + def __init__( self, bead: CgBead, @@ -653,7 +708,7 @@ def __init__( self._ibead = ibead self._sbead = sbead self._name = ( - f"6S2{bead.bead_type}{abead1.bead_type}{abead2.bead_type}" + f"7S2{bead.bead_type}{abead1.bead_type}{abead2.bead_type}" f"{ibead.bead_type}{sbead.bead_type}" ) self._bead_set = { @@ -674,7 +729,7 @@ def __init__( smiles=( f"[{abead2.element_string}][{abead1.element_string}]" f"[{bead.element_string}][{ibead.element_string}]" - f"[{bead.element_string}]" + f"([{sbead.element_string}])[{bead.element_string}]" f"[{abead1.element_string}][{abead2.element_string}]" ), functional_groups=new_fgs, @@ -684,6 +739,7 @@ def __init__( [-4, 2, 0], [-2, 0.1, 0], [0, 0.1, 0], + [0, 1, 0], [2, 0, 0], [4, 2, 0], [6, 3, 0.2], diff --git a/src/cgexplore/molecular.py b/src/cgexplore/molecular.py index e6f299af..d1ce5105 100644 --- a/src/cgexplore/molecular.py +++ b/src/cgexplore/molecular.py @@ -15,6 +15,7 @@ Precursor, SixBead, SquarePrecursor, + StericSevenBead, StericSixBead, ThreeC0Arm, ThreeC1Arm, @@ -48,6 +49,7 @@ "SixBead", "SpindryConformer", "SquarePrecursor", + "StericSevenBead", "StericSixBead", "ThreeC0Arm", "ThreeC1Arm", From d4d340923df5ba74b205bb6f2e49533978b329f7 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sun, 5 Jan 2025 18:41:46 +0100 Subject: [PATCH 32/47] Update the neighbour process. --- src/cgexplore/_internal/scram/construction.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/cgexplore/_internal/scram/construction.py b/src/cgexplore/_internal/scram/construction.py index e73ffe83..aa3bc017 100644 --- a/src/cgexplore/_internal/scram/construction.py +++ b/src/cgexplore/_internal/scram/construction.py @@ -351,7 +351,7 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 _, tstr, si, sj, _at = name.split("_") potential_names = [] - for i in range(6): + for i in range(20): potential_names.extend( [ f"ts_{tstr}_{int(si)-1}_{int(sj)-1}_{i}", @@ -368,21 +368,18 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 if not potential_file.exists(): continue - test_molecule = temp_molecule.with_structure_from_file(potential_file) + test_molecule = stk.BuildingBlock.init_from_file(potential_file) + conformer = run_optimisation( - assigned_system=AssignedSystem( - molecule=test_molecule, - forcefield_terms=assigned_system.forcefield_terms, - system_xml=assigned_system.system_xml, - topology_xml=assigned_system.topology_xml, - bead_set=assigned_system.bead_set, - vdw_bond_cutoff=assigned_system.vdw_bond_cutoff, + assigned_system=forcefield.assign_terms( + test_molecule, name, output_dir ), name=name, file_suffix="ns", output_dir=output_dir, platform=platform, ) + ensemble.add_conformer(conformer=conformer, source="ns") num_steps = 20000 From 20809e5c64124856eea343e0158b182c492add10 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sun, 5 Jan 2025 18:42:00 +0100 Subject: [PATCH 33/47] Add initialisation from ff. --- src/cgexplore/_internal/analysis/geom.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/cgexplore/_internal/analysis/geom.py b/src/cgexplore/_internal/analysis/geom.py index 398c5413..54316b62 100644 --- a/src/cgexplore/_internal/analysis/geom.py +++ b/src/cgexplore/_internal/analysis/geom.py @@ -2,12 +2,14 @@ """Module for geometry analysis.""" +import typing from collections import abc, defaultdict import stk import stko from rdkit.Chem import AllChem +from cgexplore._internal.forcefields.forcefield import ForceField from cgexplore._internal.terms.torsions import TargetTorsion from cgexplore._internal.terms.utilities import find_torsions @@ -228,3 +230,13 @@ def calculate_max_diameter(self, molecule: stk.Molecule) -> float: """ return self._stko_analyser.get_max_diameter(molecule) + + @classmethod + def from_forcefield( + cls, + forcefield: ForceField, + ) -> typing.Self: + """Get the values in terms of forcefield terms.""" + ff_targets = forcefield.get_targets() + + return cls(target_torsions=ff_targets["torsions"]) From 2e1817cb1eb7ed74767df2d21ed118245b4dc596 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Fri, 17 Jan 2025 20:46:38 +0100 Subject: [PATCH 34/47] Update miniconda action version. --- .github/workflows/tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7d261ce5..581efe51 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -46,7 +46,7 @@ jobs: steps: - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true @@ -77,7 +77,7 @@ jobs: steps: - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true From 88023421c93613634a940ac2f3122c6d76ea8d66 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Fri, 17 Jan 2025 20:54:13 +0100 Subject: [PATCH 35/47] Ruff fixes. --- .github/workflows/tests.yml | 5 +++-- first_paper_example/plot_cages.py | 3 +-- first_paper_example/plot_distributions.py | 17 +++++------------ first_paper_example/plot_maps.py | 2 +- first_paper_example/plot_shape_analysis.py | 15 +++------------ src/cgexplore/_internal/analysis/shape.py | 7 +++---- .../_internal/forcefields/assigned_system.py | 10 +++++----- .../_internal/forcefields/forcefield.py | 16 ++++++++-------- src/cgexplore/_internal/molecular/ensembles.py | 3 +-- .../_internal/optimisation/openmm_optimizer.py | 5 ++--- src/cgexplore/_internal/scram/construction.py | 18 +++++++++--------- .../_internal/systems_optimisation/inputs.py | 2 +- src/cgexplore/_internal/terms/torsions.py | 8 ++++---- .../_internal/utilities/visualisation.py | 2 +- 14 files changed, 47 insertions(+), 66 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 581efe51..a126608e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -76,13 +76,14 @@ jobs: shell: bash -l {0} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: - auto-update-conda: true + mamba-version: "*" miniforge-variant: Mambaforge channels: conda-forge,defaults + channel-priority: true python-version: 3.11 activate-environment: buildtest use-mamba: true diff --git a/first_paper_example/plot_cages.py b/first_paper_example/plot_cages.py index d1f72554..703b3cd1 100644 --- a/first_paper_example/plot_cages.py +++ b/first_paper_example/plot_cages.py @@ -829,8 +829,7 @@ def webapp_csv( logging.info("running webapp_csv") github_base_url = ( - "https://github.com/andrewtarzia/cgmodels/blob/main/" - "cg_model_jul2023/" + "https://github.com/andrewtarzia/cgmodels/blob/main/cg_model_jul2023/" ) github_selfsort_url = github_base_url + "self_sort_outcomes/" diff --git a/first_paper_example/plot_distributions.py b/first_paper_example/plot_distributions.py index 953c1f99..feb4dd47 100644 --- a/first_paper_example/plot_distributions.py +++ b/first_paper_example/plot_distributions.py @@ -218,20 +218,17 @@ def geom_distributions( ax.tick_params(axis="both", which="major", labelsize=16) if column is None: ax.set_title( - f"{convert_tors(tors,num=False)} ", + f"{convert_tors(tors, num=False)} ", fontsize=16, ) ax.set_ylabel(cdict["xlabel"], fontsize=16) else: ax.set_title( - ( - f'{cdict["xlabel"]}: ' - f"{convert_tors(tors,num=False)} " - ), + (f"{cdict['xlabel']}: {convert_tors(tors, num=False)} "), fontsize=16, ) ax.set_ylabel( - f'observed - target [{cdict["units"]}]', + f"observed - target [{cdict['units']}]", fontsize=16, ) ax.set_xticks([tcpos[i] for i in tcpos]) @@ -501,9 +498,7 @@ def plot_mixed_unstable( c=color_map[(tor, cltitle)], lw=3, linestyle="-", - label=( - f"{cltitle[0]}C: " f"{convert_tors(tor, num=False)}, unstable" - ), + label=(f"{cltitle[0]}C: {convert_tors(tor, num=False)}, unstable"), ) ax.plot( xs, @@ -511,9 +506,7 @@ def plot_mixed_unstable( c=color_map[(tor, cltitle)], lw=3, linestyle="--", - label=( - f"{cltitle[0]}C: " f"{convert_tors(tor, num=False)}, mixed" - ), + label=(f"{cltitle[0]}C: {convert_tors(tor, num=False)}, mixed"), ) ax.tick_params(axis="both", which="major", labelsize=16) diff --git a/first_paper_example/plot_maps.py b/first_paper_example/plot_maps.py index 558d0783..b38beff5 100644 --- a/first_paper_example/plot_maps.py +++ b/first_paper_example/plot_maps.py @@ -191,7 +191,7 @@ def bite_angle_relationship( c=cmap[tors][0], alpha=1.0, linestyle="--", - label=f"{convert_tors(tors,num=False)}", + label=f"{convert_tors(tors, num=False)}", marker=cmap[tors][1], ) diff --git a/first_paper_example/plot_shape_analysis.py b/first_paper_example/plot_shape_analysis.py index 8df53fb8..96f7de1f 100644 --- a/first_paper_example/plot_shape_analysis.py +++ b/first_paper_example/plot_shape_analysis.py @@ -280,10 +280,7 @@ def shape_topology( ), ) ax.set_title( - ( - f"{convert_topo(tstr)}: " - f"{convert_tors(tor, num=False)}" - ), + (f"{convert_topo(tstr)}: {convert_tors(tor, num=False)}"), fontsize=16, ) ax.set_xlabel(target_shape, fontsize=16) @@ -374,10 +371,7 @@ def shape_topology_main( ), ) ax.set_title( - ( - f"{convert_topo(tstr)}: " - f"{convert_tors(tor, num=False)}" - ), + (f"{convert_topo(tstr)}: {convert_tors(tor, num=False)}"), fontsize=16, ) ax.set_xlabel(target_shape, fontsize=16) @@ -495,10 +489,7 @@ def shape_input_relationships( ) ax.set_title( - ( - f"{convert_topo(tstr)}: " - f"{convert_tors(tor, num=False)}" - ), + (f"{convert_topo(tstr)}: {convert_tors(tor, num=False)}"), fontsize=16, ) ax.tick_params(axis="both", which="major", labelsize=16) diff --git a/src/cgexplore/_internal/analysis/shape.py b/src/cgexplore/_internal/analysis/shape.py index 907e82b0..32b8c21a 100644 --- a/src/cgexplore/_internal/analysis/shape.py +++ b/src/cgexplore/_internal/analysis/shape.py @@ -566,7 +566,7 @@ def reference_shape_dict(self) -> dict[str, dict]: "vertices": "9", "label": "JTC-9", "shape": ( - "Triangular cupola (J3) = trivacant cuboctahedron " "C3v" + "Triangular cupola (J3) = trivacant cuboctahedron C3v" ), }, "JCCU-9": { @@ -732,8 +732,7 @@ def reference_shape_dict(self) -> dict[str, dict]: "vertices": "11", "label": "JCPPR-11", "shape": ( - "Capped pent. Prism (Elongated pentagonal pyramid " - "J9) C5v" + "Capped pent. Prism (Elongated pentagonal pyramid J9) C5v" ), }, "JCPAPR-11": { @@ -804,7 +803,7 @@ def reference_shape_dict(self) -> dict[str, dict]: "vertices": "12", "label": "ACOC-12", "shape": ( - "Anticuboctahedron (Triangular orthobicupola J27) " "D3h" + "Anticuboctahedron (Triangular orthobicupola J27) D3h" ), }, "IC-12": { diff --git a/src/cgexplore/_internal/forcefields/assigned_system.py b/src/cgexplore/_internal/forcefields/assigned_system.py index f5ec4299..01310307 100644 --- a/src/cgexplore/_internal/forcefields/assigned_system.py +++ b/src/cgexplore/_internal/forcefields/assigned_system.py @@ -397,8 +397,8 @@ def _get_bonds_string(self) -> str: / openmm.unit.nanometer**2 ) string += ( - f" {assigned_force.atom_ids[0]+1} " - f"{assigned_force.atom_ids[1]+1} " + f" {assigned_force.atom_ids[0] + 1} " + f"{assigned_force.atom_ids[1] + 1} " f"{assigned_force.funct} " f"{length} " f"{k}\n" @@ -422,9 +422,9 @@ def _get_angles_string(self) -> str: / openmm.unit.radian**2 ) string += ( - f" {assigned_force.atom_ids[0]+1} " - f"{assigned_force.atom_ids[1]+1} " - f"{assigned_force.atom_ids[2]+1} " + f" {assigned_force.atom_ids[0] + 1} " + f"{assigned_force.atom_ids[1] + 1} " + f"{assigned_force.atom_ids[2] + 1} " f"{assigned_force.funct} " f"{angle} " f"{k}\n" diff --git a/src/cgexplore/_internal/forcefields/forcefield.py b/src/cgexplore/_internal/forcefields/forcefield.py index ea2dde11..4251bfa2 100644 --- a/src/cgexplore/_internal/forcefields/forcefield.py +++ b/src/cgexplore/_internal/forcefields/forcefield.py @@ -187,7 +187,7 @@ def _assign_bond_terms(self, molecule: stk.Molecule) -> tuple: Bond( atoms=atoms, atom_names=tuple( - f"{i.__class__.__name__}{i.get_id()+1}" + f"{i.__class__.__name__}{i.get_id() + 1}" for i in atoms ), atom_ids=tuple(i.get_id() for i in atoms), @@ -267,12 +267,12 @@ def _assign_angle_terms( # noqa: C901, PLR0912, PLR0915 central_bead = cgbeads[1] central_atom = list(found_angle.atoms)[1] central_name = ( - f"{atom_estrings[1]}{central_atom.get_id()+1}" + f"{atom_estrings[1]}{central_atom.get_id() + 1}" ) actual_angle = Angle( atoms=found_angle.atoms, atom_names=tuple( - f"{i.__class__.__name__}" f"{i.get_id()+1}" + f"{i.__class__.__name__}{i.get_id() + 1}" for i in found_angle.atoms ), atom_ids=found_angle.atom_ids, @@ -304,13 +304,13 @@ def _assign_angle_terms( # noqa: C901, PLR0912, PLR0915 central_bead = cgbeads[1] central_atom = list(found_angle.atoms)[1] central_name = ( - f"{atom_estrings[1]}{central_atom.get_id()+1}" + f"{atom_estrings[1]}{central_atom.get_id() + 1}" ) angle_terms.append( CosineAngle( atoms=found_angle.atoms, atom_names=tuple( - f"{i.__class__.__name__}" f"{i.get_id()+1}" + f"{i.__class__.__name__}{i.get_id() + 1}" for i in found_angle.atoms ), atom_ids=found_angle.atom_ids, @@ -342,12 +342,12 @@ def _assign_angle_terms( # noqa: C901, PLR0912, PLR0915 central_bead = cgbeads[1] central_atom = list(found_angle.atoms)[1] central_name = ( - f"{atom_estrings[1]}{central_atom.get_id()+1}" + f"{atom_estrings[1]}{central_atom.get_id() + 1}" ) actual_angle = Angle( atoms=found_angle.atoms, atom_names=tuple( - f"{i.__class__.__name__}" f"{i.get_id()+1}" + f"{i.__class__.__name__}{i.get_id() + 1}" for i in found_angle.atoms ), atom_ids=found_angle.atom_ids, @@ -505,7 +505,7 @@ def _assign_torsion_terms( Torsion( atom_names=tuple( f"{found_torsion.atoms[i].__class__.__name__}" - f"{found_torsion.atoms[i].get_id()+1}" + f"{found_torsion.atoms[i].get_id() + 1}" for i in target_torsion.measured_atom_ids ), atom_ids=tuple( diff --git a/src/cgexplore/_internal/molecular/ensembles.py b/src/cgexplore/_internal/molecular/ensembles.py index 7fefa8de..50b95f53 100644 --- a/src/cgexplore/_internal/molecular/ensembles.py +++ b/src/cgexplore/_internal/molecular/ensembles.py @@ -198,8 +198,7 @@ def keystoint(x: dict) -> dict: def __str__(self) -> str: """Return a string representation of the Ensemble.""" return ( - f"{self.__class__.__name__}(" - f"num_confs={self.get_num_conformers()})" + f"{self.__class__.__name__}(num_confs={self.get_num_conformers()})" ) def __repr__(self) -> str: diff --git a/src/cgexplore/_internal/optimisation/openmm_optimizer.py b/src/cgexplore/_internal/optimisation/openmm_optimizer.py index c5f97613..ce6b0ca9 100644 --- a/src/cgexplore/_internal/optimisation/openmm_optimizer.py +++ b/src/cgexplore/_internal/optimisation/openmm_optimizer.py @@ -222,8 +222,7 @@ def _add_atom_constraints( distance=current_distance / 10, ) self._output_string += ( - f"{constraint[0]} {constraint[1]} " - f"{current_distance / 10} nm\n" + f"{constraint[0]} {constraint[1]} {current_distance / 10} nm\n" ) self._output_string += "\n" @@ -594,7 +593,7 @@ def _run_molecular_dynamics( end = time.time() speed = self._num_steps / (end - start) self._output_string += ( - f"done in {end-start} s ({round(speed, 2)} steps/s)\n\n" + f"done in {end - start} s ({round(speed, 2)} steps/s)\n\n" ) return simulation diff --git a/src/cgexplore/_internal/scram/construction.py b/src/cgexplore/_internal/scram/construction.py index aa3bc017..65f5d92c 100644 --- a/src/cgexplore/_internal/scram/construction.py +++ b/src/cgexplore/_internal/scram/construction.py @@ -334,18 +334,18 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 si, sj = sisj.split("-") potential_names = [ - f"ufoscan_{multiplier}_{int(si)-1}-{int(sj)-1}", - f"ufoscan_{multiplier}_{int(si)-1}-{int(sj)}", - f"ufoscan_{multiplier}_{int(si)}-{int(sj)-1}", + f"ufoscan_{multiplier}_{int(si) - 1}-{int(sj) - 1}", + f"ufoscan_{multiplier}_{int(si) - 1}-{int(sj)}", + f"ufoscan_{multiplier}_{int(si)}-{int(sj) - 1}", ] else: si, sj = name.split("_")[1].split("-") potential_names = [ - f"scan_{int(si)-1}-{int(sj)-1}", - f"scan_{int(si)-1}-{int(sj)}", - f"scan_{int(si)}-{int(sj)-1}", + f"scan_{int(si) - 1}-{int(sj) - 1}", + f"scan_{int(si) - 1}-{int(sj)}", + f"scan_{int(si)}-{int(sj) - 1}", ] elif "ts_" in name: _, tstr, si, sj, _at = name.split("_") @@ -354,9 +354,9 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 for i in range(20): potential_names.extend( [ - f"ts_{tstr}_{int(si)-1}_{int(sj)-1}_{i}", - f"ts_{tstr}_{int(si)-1}_{int(sj)}_{i}", - f"ts_{tstr}_{int(si)}_{int(sj)-1}_{i}", + f"ts_{tstr}_{int(si) - 1}_{int(sj) - 1}_{i}", + f"ts_{tstr}_{int(si) - 1}_{int(sj)}_{i}", + f"ts_{tstr}_{int(si)}_{int(sj) - 1}_{i}", f"ts_{tstr}_{int(si)}_{int(sj)}_{i}", ] ) diff --git a/src/cgexplore/_internal/systems_optimisation/inputs.py b/src/cgexplore/_internal/systems_optimisation/inputs.py index 9ce9d974..9173e62b 100644 --- a/src/cgexplore/_internal/systems_optimisation/inputs.py +++ b/src/cgexplore/_internal/systems_optimisation/inputs.py @@ -655,7 +655,7 @@ def __str__(self) -> str: """Return a string representation of the ChromosomeGenerator.""" _num_chromosomes = self.get_num_chromosomes() _num_genes = len(self.chromosome_map) - return f"{self.__class__.__name__}(" f"num_genes={_num_genes})" + return f"{self.__class__.__name__}(num_genes={_num_genes})" def __repr__(self) -> str: """Return a string representation of the Chromosome.""" diff --git a/src/cgexplore/_internal/terms/torsions.py b/src/cgexplore/_internal/terms/torsions.py index 54546e27..e8a44ba5 100644 --- a/src/cgexplore/_internal/terms/torsions.py +++ b/src/cgexplore/_internal/terms/torsions.py @@ -114,8 +114,8 @@ def human_readable(self) -> str: """Return human-readable definition of this target term.""" return ( f"{self.__class__.__name__}(" - f'{"".join(self.search_string)}, ' - f'{"".join(self.search_estring)}, ' + f"{''.join(self.search_string)}, " + f"{''.join(self.search_estring)}, " f"{self.measured_atom_ids!s}, " f"{self.phi0.in_units_of(openmm.unit.degrees)}, " f"{self.torsion_k.in_units_of(openmm.unit.kilojoules_per_mole)}, " @@ -207,8 +207,8 @@ def human_readable(self) -> str: """Return human-readable definition of this target term.""" return ( f"{self.__class__.__name__}(" - f'{"".join(self.search_string)}, ' - f'{"".join(self.search_estring)}, ' + f"{''.join(self.search_string)}, " + f"{''.join(self.search_estring)}, " f"{self.measured_atom_ids!s}, " f"{self.funct}," f"{self.phi0.in_units_of(openmm.unit.degrees)}, " diff --git a/src/cgexplore/_internal/utilities/visualisation.py b/src/cgexplore/_internal/utilities/visualisation.py index 08dca8ff..d71437a1 100644 --- a/src/cgexplore/_internal/utilities/visualisation.py +++ b/src/cgexplore/_internal/utilities/visualisation.py @@ -56,7 +56,7 @@ def _get_zoom_string(self, structure_files: list, zoom_scale: int) -> str: path=str(fi), ).get_maximum_diameter() max_max_diam = max((max_diam, max_max_diam)) - return f"zoom center, {max_max_diam/zoom_scale}" + return f"zoom center, {max_max_diam / zoom_scale}" def _write_pymol_script( self, From 86115163eea47818c01396bf9b4401baf881a76d Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Fri, 17 Jan 2025 20:55:25 +0100 Subject: [PATCH 36/47] Try fix workflow. --- .github/workflows/tests.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a126608e..57f525f8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -37,7 +37,6 @@ jobs: - run: "pip install -e '.[dev]'" - run: ruff format --check . pytest: - # https://ericmjl.github.io/blog/2021/12/30/better-conda-environments-on-github-actions/ runs-on: ubuntu-22.04 defaults: @@ -45,16 +44,13 @@ jobs: shell: bash -l {0} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: - auto-update-conda: true - miniforge-variant: Mambaforge - channels: conda-forge,defaults + miniforge-version: latest python-version: 3.11 activate-environment: pytest - use-mamba: true - name: Build environment run: | From db525caa1ec8914764a82caf1cd3beac080694f8 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Fri, 17 Jan 2025 21:14:55 +0100 Subject: [PATCH 37/47] Try fix workflow. --- .github/workflows/tests.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 57f525f8..f49e98bd 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -64,7 +64,6 @@ jobs: run: pytest build-test: - # https://ericmjl.github.io/blog/2021/12/30/better-conda-environments-on-github-actions/ runs-on: ubuntu-22.04 defaults: @@ -76,13 +75,9 @@ jobs: - uses: conda-incubator/setup-miniconda@v3 with: - mamba-version: "*" - miniforge-variant: Mambaforge - channels: conda-forge,defaults - channel-priority: true + miniforge-version: latest python-version: 3.11 activate-environment: buildtest - use-mamba: true - name: Build environment run: | From f80788aec9ef9fa43c085329b1f5620c74eae67b Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Fri, 17 Jan 2025 21:24:50 +0100 Subject: [PATCH 38/47] Add mypy.ini for rdkit issue. --- mypy.ini | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 mypy.ini diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000..ea213a5b --- /dev/null +++ b/mypy.ini @@ -0,0 +1,5 @@ +[mypy] + +[mypy-rdkit.*] +follow_imports = skip +follow_imports_for_stubs = True From c2d126a71adfb8b4729d1128643f01f3ae0cabf8 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 18 Jan 2025 00:25:08 +0100 Subject: [PATCH 39/47] Add mypy.ini and use it properly, some mypy fixes, WIP. --- mypy.ini | 58 +++++++ pyproject.toml | 33 ---- .../_internal/atomistic/crest_process.py | 8 +- .../_internal/atomistic/utilities.py | 29 ++-- .../_internal/forcefields/forcefield.py | 8 +- src/cgexplore/_internal/molecular/beads.py | 4 +- .../_internal/scram/building_block_enum.py | 32 ++-- src/cgexplore/_internal/scram/enumeration.py | 1 + .../_internal/scram/topology_code.py | 8 +- .../_internal/scripts/get_energies.py | 4 +- .../_internal/topologies/custom_topology.py | 8 +- src/cgexplore/_internal/topologies/graphs.py | 154 ++++++++++-------- 12 files changed, 204 insertions(+), 143 deletions(-) diff --git a/mypy.ini b/mypy.ini index ea213a5b..1dadb1d8 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,5 +1,63 @@ [mypy] +show_error_codes = true +implicit_optional = false +warn_no_return = true +strict_optional = true +disallow_untyped_defs = true +disallow_incomplete_defs = true +check_untyped_defs = true +disallow_untyped_decorators = true +warn_unreachable = true +disallow_any_generics = false [mypy-rdkit.*] follow_imports = skip follow_imports_for_stubs = True + +[mypy-scipy.*] +ignore_missing_imports = True + +[mypy-matplotlib.*] +ignore_missing_imports = True + +[mypy-pandas.*] +ignore_missing_imports = True + +[mypy-stk.*] +ignore_missing_imports = True + +[mypy-stko.*] +ignore_missing_imports = True + +[mypy-mchammer.*] +ignore_missing_imports = True + +[mypy-openmmtools.*] +ignore_missing_imports = True + +[mypy-openmm.*] +ignore_missing_imports = True + +[mypy-atomlite.*] +ignore_missing_imports = True + +[mypy-networkx.*] +ignore_missing_imports = True + +[mypy-martini_openmm.*] +ignore_missing_imports = True + +[mypy-pathos.*] +ignore_missing_imports = True + +[mypy-spindry.*] +ignore_missing_imports = True + +[mypy-vabene.*] +ignore_missing_imports = True + +[mypy-chemiscope.*] +ignore_missing_imports = True + +[mypy-bbprep.*] +ignore_missing_imports = True diff --git a/pyproject.toml b/pyproject.toml index 0cdfc10a..37e0d1d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -121,36 +121,3 @@ python_functions = [ "test_*", ] -[tool.mypy] -show_error_codes = true -implicit_optional = false -warn_no_return = true -strict_optional = true -disallow_untyped_defs = true -disallow_incomplete_defs = true -check_untyped_defs = true -disallow_untyped_decorators = true -warn_unreachable = true -disallow_any_generics = false - -[[tool.mypy.overrides]] -module = [ - "rdkit.*", - "scipy.*", - "matplotlib.*", - "pandas.*", - "stk.*", - "stko.*", - "mchammer.*", - "openmmtools.*", - "openmm.*", - "atomlite.*", - "networkx.*", - "martini_openmm.*", - "pathos.*", - "spindry.*", - "vabene.*", - "chemiscope.*", - "bbprep.*", -] -ignore_missing_imports = true diff --git a/src/cgexplore/_internal/atomistic/crest_process.py b/src/cgexplore/_internal/atomistic/crest_process.py index 79e61fda..c3be878b 100644 --- a/src/cgexplore/_internal/atomistic/crest_process.py +++ b/src/cgexplore/_internal/atomistic/crest_process.py @@ -33,7 +33,7 @@ def __init__( # noqa: PLR0913 crest_path: pathlib.Path, xtb_path: pathlib.Path, gfn_method: str = "2", - output_dir: str | None = None, + output_dir: pathlib.Path | str | None = None, num_cores: int = 4, charge: int = 0, electronic_temperature: float = 300, @@ -85,7 +85,7 @@ def _check_path(self, path: pathlib.Path | str) -> None: path = pathlib.Path(path) if not path.exists(): msg = f"XTB or CREST not found at {path}" - raise pathlib.PathError(msg) + raise RuntimeError(msg) def _write_detailed_control(self) -> None: string = f"$gbsa\n gbsagrid={self._solvent_grid}" @@ -146,7 +146,7 @@ def _run_crest(self, xyz: str, out_file: pathlib.Path | str) -> None: shell=True, ) - def optimize(self, molecule: stk.Molecule) -> stk.Molecule: + def optimize(self, molecule: stko.MoleculeT) -> stko.MoleculeT: """Optimise a solute-solvent pair.""" if self._output_dir is None: output_dir = pathlib.Path(str(uuid.uuid4().int)).resolve() @@ -194,7 +194,7 @@ def optimize(self, molecule: stk.Molecule) -> stk.Molecule: def run_conformer_analysis( # noqa: PLR0913 ligand_name: str, - molecule: stk.Molecule, + molecule: stk.BuildingBlock, ligand_dir: pathlib.Path, calculation_dir: pathlib.Path, functional_group_factories: tuple[stk.FunctionalGroupFactory, ...], diff --git a/src/cgexplore/_internal/atomistic/utilities.py b/src/cgexplore/_internal/atomistic/utilities.py index 0c7c39fa..7be02332 100644 --- a/src/cgexplore/_internal/atomistic/utilities.py +++ b/src/cgexplore/_internal/atomistic/utilities.py @@ -15,13 +15,20 @@ def extract_ditopic_ensemble( - molecule: stk.Molecule, + molecule: stk.BuildingBlock, crest_run: pathlib.Path, ) -> dict: - """Extract and save an ensemble from a crest run.""" + """Extract and save an ensemble from a crest run. + + TODO: Turn the ensemble into a dataclass so that typing makes sense, remove + all mypy ignore statements. + + """ ensemble_dir = crest_run / "ensemble" num_atoms = molecule.get_num_atoms() - ensemble = {} + ensemble: dict[ + int, dict[str, float | int | str | stk.BuildingBlock | tuple] + ] = {} ensemble_dir.mkdir(exist_ok=True, parents=True) # Calculate geometrical properties. @@ -43,9 +50,9 @@ def extract_ditopic_ensemble( if len(splits) != 4: # noqa: PLR2004 continue symb, x, y, z = splits - x = float(x) - y = float(y) - z = float(z) + x = float(x) # type: ignore[assignment] + y = float(y) # type: ignore[assignment] + z = float(z) # type: ignore[assignment] position_matrix.append(np.array((x, y, z))) @@ -56,13 +63,13 @@ def extract_ditopic_ensemble( calc = stko.molecule_analysis.DitopicThreeSiteAnalyser() adjacent_centroids = calc.get_adjacent_centroids(conf_molecule) - adjacent_distance = np.linalg.norm( - adjacent_centroids[0] - adjacent_centroids[1] + adjacent_distance = float( + np.linalg.norm(adjacent_centroids[0] - adjacent_centroids[1]) ) ensemble[i] = { "energy": float(energy), - "molecule": conf_molecule, + "molecule": conf_molecule, # type: ignore[dict-item] "binder_angles": calc.get_binder_angles(conf_molecule), "binder_binder_angle": calc.get_binder_binder_angle(conf_molecule), "binder_distance": calc.get_binder_distance(conf_molecule), @@ -73,7 +80,9 @@ def extract_ditopic_ensemble( "binder_com_angle": calc.get_binder_centroid_angle(conf_molecule), } - ensemble[i]["molecule"].write(ensemble_dir / f"conf_{i}.mol") + ensemble[i]["molecule"].write( # type: ignore[union-attr] + ensemble_dir / f"conf_{i}.mol" + ) return ensemble diff --git a/src/cgexplore/_internal/forcefields/forcefield.py b/src/cgexplore/_internal/forcefields/forcefield.py index 4251bfa2..1f2d9eb4 100644 --- a/src/cgexplore/_internal/forcefields/forcefield.py +++ b/src/cgexplore/_internal/forcefields/forcefield.py @@ -503,12 +503,12 @@ def _assign_torsion_terms( torsion_terms.append( Torsion( - atom_names=tuple( + atom_names=tuple( # type: ignore[arg-type] f"{found_torsion.atoms[i].__class__.__name__}" f"{found_torsion.atoms[i].get_id() + 1}" for i in target_torsion.measured_atom_ids ), - atom_ids=tuple( + atom_ids=tuple( # type: ignore[arg-type] found_torsion.atoms[i].get_id() for i in target_torsion.measured_atom_ids ), @@ -672,7 +672,7 @@ def get_prefix(self) -> str: """Get forcefield prefix.""" return self._prefix - def get_present_beads(self) -> tuple: + def get_present_beads(self) -> abc.Sequence[CgBead]: """Get beads present.""" return self._present_beads @@ -755,7 +755,7 @@ def get_forcefield_dictionary(self) -> dict[str, str | dict]: ) for at in ff_targets["angles"]: - cp = (at.type1, at.type2, at.type3) + cp = (at.type1, at.type2, at.type3) # type: ignore[assignment] try: k_dict["_".join(cp)] = at.angle_k.value_in_unit( openmm.unit.kilojoule diff --git a/src/cgexplore/_internal/molecular/beads.py b/src/cgexplore/_internal/molecular/beads.py index 1ba79ec6..5782acec 100644 --- a/src/cgexplore/_internal/molecular/beads.py +++ b/src/cgexplore/_internal/molecular/beads.py @@ -3,7 +3,7 @@ """Module for beads.""" import logging -from collections import Counter +from collections import Counter, abc from dataclasses import dataclass logging.basicConfig( @@ -23,7 +23,7 @@ class CgBead: class BeadLibrary: """Define a library of beads used in a model.""" - def __init__(self, beads: tuple[CgBead, ...]) -> None: + def __init__(self, beads: abc.Sequence[CgBead]) -> None: """Initialize a BeadLibrary.""" self._beads = beads # Run a check. diff --git a/src/cgexplore/_internal/scram/building_block_enum.py b/src/cgexplore/_internal/scram/building_block_enum.py index 4eb62055..c92db72b 100644 --- a/src/cgexplore/_internal/scram/building_block_enum.py +++ b/src/cgexplore/_internal/scram/building_block_enum.py @@ -17,7 +17,9 @@ ) -def length_2_heteroleptic_bb_dicts(tstr: str) -> dict[int, int]: +def length_2_heteroleptic_bb_dicts( + tstr: str, +) -> tuple[dict[int, list[int]], int]: """Define bb dictionaries available to heteroleptic systems. Allows for two ditopic building blocks to be added as: @@ -44,7 +46,9 @@ def length_2_heteroleptic_bb_dicts(tstr: str) -> dict[int, int]: }[tstr] -def length_3_heteroleptic_bb_dicts(tstr: str) -> dict[int, int]: +def length_3_heteroleptic_bb_dicts( + tstr: str, +) -> tuple[dict[int, list[int]], int]: """Define bb dictionaries available to heteroleptic systems. Allows for two tritopic building blocks to be added as: @@ -63,7 +67,9 @@ def length_3_heteroleptic_bb_dicts(tstr: str) -> dict[int, int]: }[tstr] -def length_4_heteroleptic_bb_dicts(tstr: str) -> dict[int, int]: +def length_4_heteroleptic_bb_dicts( + tstr: str, +) -> tuple[dict[int, list[int]], int]: """Define bb dictionaries available to heteroleptic systems. Allows for two tetratopic building blocks to be added as: @@ -89,7 +95,7 @@ def get_potential_bb_dicts( tstr: str, ratio: tuple[int, int], study_type: Literal["ditopic", "tritopic", "tetratopic"], -) -> abc.Sequence[dict[int, abc.Sequence[int]]]: +) -> list[tuple[int, dict[int, list[int]]]]: """Get potential building block dictionaries from known topology graphs. Parameters: @@ -130,7 +136,7 @@ def get_potential_bb_dicts( modifiable = [i for i in possibilities if len(possibilities[i]) == 0] saved = set() - possible_dicts = [] + possible_dicts: list[tuple[int, dict[int, list[int]]]] = [] for combo in it.product(modifiable, repeat=count_to_add): counted = Counter(combo).values() current_ratio = [i / min(counted) for i in counted] @@ -155,7 +161,7 @@ def get_potential_bb_dicts( ) logging.info(msg) - return tuple(possible_dicts) + return possible_dicts @dataclass @@ -195,7 +201,7 @@ def __repr__(self) -> str: def get_custom_bb_configurations( # noqa: C901 iterator: IHomolepticTopologyIterator, -) -> abc.Sequence[dict[int, abc.Sequence[int]]]: +) -> abc.Sequence[BuildingBlockConfiguration]: """Get potential building block dictionaries.""" # Get building blocks with the same functional group count - these are # swappable. @@ -203,8 +209,8 @@ def get_custom_bb_configurations( # noqa: C901 i: i.get_num_functional_groups() for i in iterator.building_blocks } - count_of_fg_types = defaultdict(int) - fg_counts_by_building_block = defaultdict(int) + count_of_fg_types: dict[int, int] = defaultdict(int) + fg_counts_by_building_block: dict[int, int] = defaultdict(int) for bb, count in iterator.building_block_counts.items(): fg_counts_by_building_block[bb.get_num_functional_groups()] += count @@ -250,14 +256,12 @@ def get_custom_bb_configurations( # noqa: C901 bb_map = {bb: idx for idx, bb in enumerate(building_blocks_by_fg)} - empty_bb_dict = {} + empty_bb_dict: dict[int, list[int]] = {} for bb, fg_count in building_blocks_by_fg.items(): if fg_count in modifiable_types: empty_bb_dict[bb_map[bb]] = [] else: - empty_bb_dict[bb_map[bb]] = tuple( - i for i in unmodifiable_vertices[fg_count] - ) + empty_bb_dict[bb_map[bb]] = list(unmodifiable_vertices[fg_count]) # ASSUMES 1 modifiable FG. modifiable_bb_idx = tuple( @@ -283,7 +287,7 @@ def get_custom_bb_configurations( # noqa: C901 ) saved_bb_dicts = set() - possible_dicts = [] + possible_dicts: list[BuildingBlockConfiguration] = [] for config in iteration: if sorted(config) != modifiable_bb_idx_counted: diff --git a/src/cgexplore/_internal/scram/enumeration.py b/src/cgexplore/_internal/scram/enumeration.py index e05c1c69..c8872ced 100644 --- a/src/cgexplore/_internal/scram/enumeration.py +++ b/src/cgexplore/_internal/scram/enumeration.py @@ -45,6 +45,7 @@ def __init__( stoichiometry: tuple[int, int, int], ) -> None: """Initialize.""" + self._building_blocks: dict[stk.BuildingBlock, abc.Sequence[int]] if stoichiometry == (1, 1, 1): if multiplier == 1: self._building_blocks = { diff --git a/src/cgexplore/_internal/scram/topology_code.py b/src/cgexplore/_internal/scram/topology_code.py index c7110a5d..4ef831e8 100644 --- a/src/cgexplore/_internal/scram/topology_code.py +++ b/src/cgexplore/_internal/scram/topology_code.py @@ -32,7 +32,7 @@ def get_nx_graph(self) -> nx.Graph: def get_graph(self) -> rx.PyGraph: """Convert TopologyCode to a graph.""" - graph = rx.PyGraph(multigraph=True) + graph: rx.PyGraph = rx.PyGraph(multigraph=True) vertices = { vi: graph.add_node(vi) @@ -48,7 +48,7 @@ def get_graph(self) -> rx.PyGraph: def get_weighted_graph(self) -> rx.PyGraph: """Convert TopologyCode to a graph.""" - graph = rx.PyGraph(multigraph=False) + graph: rx.PyGraph = rx.PyGraph(multigraph=False) vertices = { vi: graph.add_node(vi) @@ -97,8 +97,8 @@ def contains_doubles(self) -> bool: paths = list( rx.graph_all_simple_paths( weighted_graph, - origin=node, - to=node, + origin=node, # type: ignore[call-arg] + to=node, # type: ignore[call-arg] cutoff=12, min_depth=4, ) diff --git a/src/cgexplore/_internal/scripts/get_energies.py b/src/cgexplore/_internal/scripts/get_energies.py index 488897a3..0f330a12 100644 --- a/src/cgexplore/_internal/scripts/get_energies.py +++ b/src/cgexplore/_internal/scripts/get_energies.py @@ -39,7 +39,7 @@ def main() -> None: if "energy_per_bb" not in properties: continue name = entry.key - energy = properties["energy_per_bb"] + energy: float = properties["energy_per_bb"] # type: ignore[assignment] if energy > min_energy and energy < max_energy: logging.info( "energy of %s is %s kJmol-1", @@ -50,7 +50,7 @@ def main() -> None: logging.info("showed %s energies", count) else: entry = database.get_entry(key=args.name) - energy = entry.properties["energy_per_bb"] + energy = entry.properties["energy_per_bb"] # type: ignore[assignment] logging.info("energy of %s is %s kJmol-1", args.name, round(energy, 3)) # type: ignore[arg-type] diff --git a/src/cgexplore/_internal/topologies/custom_topology.py b/src/cgexplore/_internal/topologies/custom_topology.py index 7a8e19dc..2a06b827 100644 --- a/src/cgexplore/_internal/topologies/custom_topology.py +++ b/src/cgexplore/_internal/topologies/custom_topology.py @@ -15,8 +15,8 @@ def __init__( # noqa: PLR0913 abc.Iterable[stk.BuildingBlock] | dict[stk.BuildingBlock, tuple[int, ...]] ), - vertex_prototypes: list[stk.Vertex], - edge_prototypes: list[stk.Edge], + vertex_prototypes: abc.Sequence[stk.Vertex], + edge_prototypes: abc.Sequence[stk.Edge], vertex_alignments: dict[int, int] | None = None, vertex_positions: dict[int, np.ndarray] | None = None, reaction_factory: stk.ReactionFactory = stk.GenericReactionFactory(), # noqa: B008 @@ -27,8 +27,8 @@ def __init__( # noqa: PLR0913 """Initialize.""" class InternalTopology(stk.cage.Cage): - _vertex_prototypes = vertex_prototypes - _edge_prototypes = edge_prototypes + _vertex_prototypes = vertex_prototypes # type: ignore[assignment] + _edge_prototypes = edge_prototypes # type: ignore[assignment] self._topology_graph = InternalTopology( building_blocks=building_blocks, diff --git a/src/cgexplore/_internal/topologies/graphs.py b/src/cgexplore/_internal/topologies/graphs.py index 5881cefd..74b24a8f 100644 --- a/src/cgexplore/_internal/topologies/graphs.py +++ b/src/cgexplore/_internal/topologies/graphs.py @@ -62,10 +62,16 @@ class M4L82(stk.cage.Cage): """Cage topology.""" _non_linears = ( - stk.cage.NonLinearVertex(0, [0, 0, np.sqrt(6) / 2]), - stk.cage.NonLinearVertex(1, [-1, -np.sqrt(3) / 3, -np.sqrt(6) / 6]), - stk.cage.NonLinearVertex(2, [1, -np.sqrt(3) / 3, -np.sqrt(6) / 6]), - stk.cage.NonLinearVertex(3, [0, 2 * np.sqrt(3) / 3, -np.sqrt(6) / 6]), + stk.cage.NonLinearVertex(0, np.array([0, 0, np.sqrt(6) / 2])), + stk.cage.NonLinearVertex( + 1, np.array([-1, -np.sqrt(3) / 3, -np.sqrt(6) / 6]) + ), + stk.cage.NonLinearVertex( + 2, np.array([1, -np.sqrt(3) / 3, -np.sqrt(6) / 6]) + ), + stk.cage.NonLinearVertex( + 3, np.array([0, 2 * np.sqrt(3) / 3, -np.sqrt(6) / 6]) + ), ) paired_wall_1_coord = ( @@ -150,18 +156,34 @@ class CGM4L8(stk.cage.M4L8): """New topology definition.""" _vertex_prototypes = ( - stk.cage.NonLinearVertex(0, [2, 0, 0]), - stk.cage.NonLinearVertex(1, [0, 2, 0]), - stk.cage.NonLinearVertex(2, [-2, 0, 0]), - stk.cage.NonLinearVertex(3, [0, -2, 0]), - stk.cage.LinearVertex(4, [1, 1, 0.5], use_neighbor_placement=False), - stk.cage.LinearVertex(5, [1, 1, -0.5], use_neighbor_placement=False), - stk.cage.LinearVertex(6, [1, -1, 0.5], use_neighbor_placement=False), - stk.cage.LinearVertex(7, [1, -1, -0.5], use_neighbor_placement=False), - stk.cage.LinearVertex(8, [-1, -1, 0.5], use_neighbor_placement=False), - stk.cage.LinearVertex(9, [-1, -1, -0.5], use_neighbor_placement=False), - stk.cage.LinearVertex(10, [-1, 1, 0.5], use_neighbor_placement=False), - stk.cage.LinearVertex(11, [-1, 1, -0.5], use_neighbor_placement=False), + stk.cage.NonLinearVertex(0, np.array([2, 0, 0])), + stk.cage.NonLinearVertex(1, np.array([0, 2, 0])), + stk.cage.NonLinearVertex(2, np.array([-2, 0, 0])), + stk.cage.NonLinearVertex(3, np.array([0, -2, 0])), + stk.cage.LinearVertex( + 4, np.array([1, 1, 0.5]), use_neighbor_placement=False + ), + stk.cage.LinearVertex( + 5, np.array([1, 1, -0.5]), use_neighbor_placement=False + ), + stk.cage.LinearVertex( + 6, np.array([1, -1, 0.5]), use_neighbor_placement=False + ), + stk.cage.LinearVertex( + 7, np.array([1, -1, -0.5]), use_neighbor_placement=False + ), + stk.cage.LinearVertex( + 8, np.array([-1, -1, 0.5]), use_neighbor_placement=False + ), + stk.cage.LinearVertex( + 9, np.array([-1, -1, -0.5]), use_neighbor_placement=False + ), + stk.cage.LinearVertex( + 10, np.array([-1, 1, 0.5]), use_neighbor_placement=False + ), + stk.cage.LinearVertex( + 11, np.array([-1, 1, -0.5]), use_neighbor_placement=False + ), ) _edge_prototypes = ( @@ -188,89 +210,89 @@ class CGM12L24(stk.cage.M12L24): """New topology definition.""" _vertex_prototypes = ( - stk.cage.NonLinearVertex(0, [1.25, 0, 0]), - stk.cage.NonLinearVertex(1, [-1.25, 0, 0]), - stk.cage.NonLinearVertex(2, [0, 1.25, 0]), - stk.cage.NonLinearVertex(3, [0, -1.25, 0]), - stk.cage.NonLinearVertex(4, [0.625, 0.625, 0.88]), - stk.cage.NonLinearVertex(5, [0.625, -0.625, 0.88]), - stk.cage.NonLinearVertex(6, [-0.625, 0.625, 0.88]), - stk.cage.NonLinearVertex(7, [-0.625, -0.625, 0.88]), - stk.cage.NonLinearVertex(8, [0.625, 0.625, -0.88]), - stk.cage.NonLinearVertex(9, [0.625, -0.625, -0.88]), - stk.cage.NonLinearVertex(10, [-0.625, 0.625, -0.88]), - stk.cage.NonLinearVertex(11, [-0.625, -0.625, -0.88]), + stk.cage.NonLinearVertex(0, np.array([1.25, 0, 0])), + stk.cage.NonLinearVertex(1, np.array([-1.25, 0, 0])), + stk.cage.NonLinearVertex(2, np.array([0, 1.25, 0])), + stk.cage.NonLinearVertex(3, np.array([0, -1.25, 0])), + stk.cage.NonLinearVertex(4, np.array([0.625, 0.625, 0.88])), + stk.cage.NonLinearVertex(5, np.array([0.625, -0.625, 0.88])), + stk.cage.NonLinearVertex(6, np.array([-0.625, 0.625, 0.88])), + stk.cage.NonLinearVertex(7, np.array([-0.625, -0.625, 0.88])), + stk.cage.NonLinearVertex(8, np.array([0.625, 0.625, -0.88])), + stk.cage.NonLinearVertex(9, np.array([0.625, -0.625, -0.88])), + stk.cage.NonLinearVertex(10, np.array([-0.625, 0.625, -0.88])), + stk.cage.NonLinearVertex(11, np.array([-0.625, -0.625, -0.88])), stk.cage.LinearVertex( - 12, [0.9, 0.31, 0.31], use_neighbor_placement=False + 12, np.array([0.9, 0.31, 0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 13, [0.9, 0.31, -0.31], use_neighbor_placement=False + 13, np.array([0.9, 0.31, -0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 14, [0.9, -0.31, 0.31], use_neighbor_placement=False + 14, np.array([0.9, -0.31, 0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 15, [0.9, -0.31, -0.31], use_neighbor_placement=False + 15, np.array([0.9, -0.31, -0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 16, [-0.9, 0.31, 0.31], use_neighbor_placement=False + 16, np.array([-0.9, 0.31, 0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 17, [-0.9, 0.31, -0.31], use_neighbor_placement=False + 17, np.array([-0.9, 0.31, -0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 18, [-0.9, -0.31, 0.31], use_neighbor_placement=False + 18, np.array([-0.9, -0.31, 0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 19, [-0.9, -0.31, -0.31], use_neighbor_placement=False + 19, np.array([-0.9, -0.31, -0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 20, [0.31, 0.9, 0.31], use_neighbor_placement=False + 20, np.array([0.31, 0.9, 0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 21, [0.31, 0.9, -0.31], use_neighbor_placement=False + 21, np.array([0.31, 0.9, -0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 22, [-0.31, 0.9, 0.31], use_neighbor_placement=False + 22, np.array([-0.31, 0.9, 0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 23, [-0.31, 0.9, -0.31], use_neighbor_placement=False + 23, np.array([-0.31, 0.9, -0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 24, [0.31, -0.9, 0.31], use_neighbor_placement=False + 24, np.array([0.31, -0.9, 0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 25, [0.31, -0.9, -0.31], use_neighbor_placement=False + 25, np.array([0.31, -0.9, -0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 26, [-0.31, -0.9, 0.31], use_neighbor_placement=False + 26, np.array([-0.31, -0.9, 0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 27, [-0.31, -0.9, -0.31], use_neighbor_placement=False + 27, np.array([-0.31, -0.9, -0.31]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 28, [0.58, 0, 0.82], use_neighbor_placement=False + 28, np.array([0.58, 0, 0.82]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 29, [-0.58, 0, 0.82], use_neighbor_placement=False + 29, np.array([-0.58, 0, 0.82]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 30, [0, 0.58, 0.82], use_neighbor_placement=False + 30, np.array([0, 0.58, 0.82]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 31, [0, -0.58, 0.82], use_neighbor_placement=False + 31, np.array([0, -0.58, 0.82]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 32, [0.58, 0, -0.82], use_neighbor_placement=False + 32, np.array([0.58, 0, -0.82]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 33, [-0.58, 0, -0.82], use_neighbor_placement=False + 33, np.array([-0.58, 0, -0.82]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 34, [0, 0.58, -0.82], use_neighbor_placement=False + 34, np.array([0, 0.58, -0.82]), use_neighbor_placement=False ), stk.cage.LinearVertex( - 35, [0, -0.58, -0.82], use_neighbor_placement=False + 35, np.array([0, -0.58, -0.82]), use_neighbor_placement=False ), ) @@ -345,12 +367,12 @@ class M6L122(stk.cage.Cage): _x = 2 * np.sqrt(3) / 4 _y = 2 _non_linears = ( - stk.cage.NonLinearVertex(0, [0, _x, 1]), - stk.cage.NonLinearVertex(1, [_y / 2, -_x, 1]), - stk.cage.NonLinearVertex(2, [-_y / 2, -_x, 1]), - stk.cage.NonLinearVertex(3, [0, _x, -1]), - stk.cage.NonLinearVertex(4, [_y / 2, -_x, -1]), - stk.cage.NonLinearVertex(5, [-_y / 2, -_x, -1]), + stk.cage.NonLinearVertex(0, np.array([0, _x, 1])), + stk.cage.NonLinearVertex(1, np.array([_y / 2, -_x, 1])), + stk.cage.NonLinearVertex(2, np.array([-_y / 2, -_x, 1])), + stk.cage.NonLinearVertex(3, np.array([0, _x, -1])), + stk.cage.NonLinearVertex(4, np.array([_y / 2, -_x, -1])), + stk.cage.NonLinearVertex(5, np.array([-_y / 2, -_x, -1])), ) paired_wall_1_coord = ( @@ -471,14 +493,14 @@ class M8L162(stk.cage.Cage): """Cage topology.""" _non_linears = ( - stk.cage.NonLinearVertex(0, [1, 1, 1]), - stk.cage.NonLinearVertex(1, [1, -1, 1]), - stk.cage.NonLinearVertex(2, [-1, -1, 1]), - stk.cage.NonLinearVertex(3, [-1, 1, 1]), - stk.cage.NonLinearVertex(4, [1, 1, -1]), - stk.cage.NonLinearVertex(5, [1, -1, -1]), - stk.cage.NonLinearVertex(6, [-1, -1, -1]), - stk.cage.NonLinearVertex(7, [-1, 1, -1]), + stk.cage.NonLinearVertex(0, np.array([1, 1, 1])), + stk.cage.NonLinearVertex(1, np.array([1, -1, 1])), + stk.cage.NonLinearVertex(2, np.array([-1, -1, 1])), + stk.cage.NonLinearVertex(3, np.array([-1, 1, 1])), + stk.cage.NonLinearVertex(4, np.array([1, 1, -1])), + stk.cage.NonLinearVertex(5, np.array([1, -1, -1])), + stk.cage.NonLinearVertex(6, np.array([-1, -1, -1])), + stk.cage.NonLinearVertex(7, np.array([-1, 1, -1])), ) paired_wall_1_coord = ( From 7a4be0e37d1b22d41cc156de96eb39d5c6e7dc91 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 18 Jan 2025 19:30:00 +0100 Subject: [PATCH 40/47] Remove unused parent class, Martini WIP. --- .../_internal/forcefields/assigned_system.py | 36 ++++++------------- src/cgexplore/forcefields.py | 2 -- 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/src/cgexplore/_internal/forcefields/assigned_system.py b/src/cgexplore/_internal/forcefields/assigned_system.py index 01310307..2250ce72 100644 --- a/src/cgexplore/_internal/forcefields/assigned_system.py +++ b/src/cgexplore/_internal/forcefields/assigned_system.py @@ -26,12 +26,17 @@ ) -class ForcedSystem: +@dataclass(frozen=True, slots=True) +class AssignedSystem: """A system with forces assigned.""" molecule: stk.Molecule forcefield_terms: dict[str, tuple] + system_xml: pathlib.Path + topology_xml: pathlib.Path + bead_set: BeadLibrary vdw_bond_cutoff: int + mass: float = 10 def _available_forces(self, force_type: str) -> openmm.Force: available = { @@ -216,36 +221,12 @@ def _add_nonbondeds(self, system: openmm.System) -> openmm.System: return system - def _add_atoms(self, system: openmm.System) -> openmm.System: - raise NotImplementedError - def _add_forces(self, system: openmm.System) -> openmm.System: system = self._add_bonds(system) system = self._add_angles(system) system = self._add_torsions(system) return self._add_nonbondeds(system) - def get_openmm_topology(self) -> app.topology.Topology: - """Return OpenMM.Topology object.""" - raise NotImplementedError - - def get_openmm_system(self) -> openmm.System: - """Return OpenMM.System object.""" - raise NotImplementedError - - -@dataclass(frozen=True, slots=True) -class AssignedSystem(ForcedSystem): - """A system with forces assigned.""" - - molecule: stk.Molecule - forcefield_terms: dict[str, tuple] - system_xml: pathlib.Path - topology_xml: pathlib.Path - bead_set: BeadLibrary - vdw_bond_cutoff: int - mass: float = 10 - def _add_atoms(self, system: openmm.System) -> openmm.System: for _atom in self.molecule.get_atoms(): system.addParticle(self.mass) @@ -344,7 +325,7 @@ def get_openmm_system(self) -> openmm.System: @dataclass(frozen=True, slots=True) -class MartiniSystem(ForcedSystem): +class MartiniSystem: """Assign a system using martini_openmm.""" molecule: stk.Molecule @@ -490,6 +471,9 @@ def _write_topology_itp(self, molecule: stk.Molecule) -> None: with self.topology_itp.open("w") as f: f.write(string) + def _add_forces(self, system: openmm.System) -> openmm.System: + raise NotImplementedError + def get_openmm_topology(self) -> app.topology.Topology: """Return OpenMM.Topology object.""" self._write_topology_itp(self.molecule) diff --git a/src/cgexplore/forcefields.py b/src/cgexplore/forcefields.py index 319bf216..cd4f5d51 100644 --- a/src/cgexplore/forcefields.py +++ b/src/cgexplore/forcefields.py @@ -2,7 +2,6 @@ from cgexplore._internal.forcefields.assigned_system import ( AssignedSystem, - ForcedSystem, MartiniSystem, ) from cgexplore._internal.forcefields.forcefield import ( @@ -25,7 +24,6 @@ "AssignedSystem", "ForceField", "ForceFieldLibrary", - "ForcedSystem", "MartiniForceField", "MartiniForceFieldLibrary", "MartiniSystem", From e205f72c13bf3c0c009f265e88e42ef9f5a2ed4b Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 18 Jan 2025 19:30:20 +0100 Subject: [PATCH 41/47] Rename. --- src/cgexplore/scram.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/cgexplore/scram.py b/src/cgexplore/scram.py index f418d70c..0a7f4c8f 100644 --- a/src/cgexplore/scram.py +++ b/src/cgexplore/scram.py @@ -10,17 +10,14 @@ optimise_cage, try_except_construction, ) -from cgexplore._internal.scram.enumeration import ( - IHomolepticTopologyIterator, - TopologyIterator, -) +from cgexplore._internal.scram.enumeration import Scrambler, TopologyIterator from cgexplore._internal.scram.topology_code import Constructed, TopologyCode from cgexplore._internal.scram.utilities import points_on_sphere, vmap_to_str __all__ = [ "BuildingBlockConfiguration", "Constructed", - "IHomolepticTopologyIterator", + "Scrambler", "TopologyCode", "TopologyIterator", "get_custom_bb_configurations", From d4e20eeb7eaea782e4b6794ca352999bea3569c3 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 18 Jan 2025 19:30:42 +0100 Subject: [PATCH 42/47] Rename and mypy. --- .../_internal/scram/building_block_enum.py | 4 +- src/cgexplore/_internal/scram/construction.py | 41 +++++++---- src/cgexplore/_internal/scram/enumeration.py | 73 ++++++++++--------- 3 files changed, 67 insertions(+), 51 deletions(-) diff --git a/src/cgexplore/_internal/scram/building_block_enum.py b/src/cgexplore/_internal/scram/building_block_enum.py index c92db72b..25232740 100644 --- a/src/cgexplore/_internal/scram/building_block_enum.py +++ b/src/cgexplore/_internal/scram/building_block_enum.py @@ -9,7 +9,7 @@ import stk -from cgexplore._internal.scram.enumeration import IHomolepticTopologyIterator +from cgexplore._internal.scram.enumeration import TopologyIterator logging.basicConfig( level=logging.INFO, @@ -200,7 +200,7 @@ def __repr__(self) -> str: def get_custom_bb_configurations( # noqa: C901 - iterator: IHomolepticTopologyIterator, + iterator: TopologyIterator, ) -> abc.Sequence[BuildingBlockConfiguration]: """Get potential building block dictionaries.""" # Get building blocks with the same functional group count - these are diff --git a/src/cgexplore/_internal/scram/construction.py b/src/cgexplore/_internal/scram/construction.py index 65f5d92c..e2b42193 100644 --- a/src/cgexplore/_internal/scram/construction.py +++ b/src/cgexplore/_internal/scram/construction.py @@ -24,7 +24,7 @@ ) from .building_block_enum import BuildingBlockConfiguration -from .enumeration import IHomolepticTopologyIterator, TopologyIterator +from .enumeration import TopologyIterator logging.basicConfig( level=logging.INFO, @@ -50,7 +50,7 @@ def graph_optimise_cage( # noqa: PLR0913 final_molecule.write(fina_mol_file) return Conformer( molecule=final_molecule, - energy_decomposition=database.get_property( + energy_decomposition=database.get_property( # type:ignore[arg-type] key=name, property_key="energy_decomposition", property_type=dict, @@ -71,7 +71,7 @@ def graph_optimise_cage( # noqa: PLR0913 database.add_properties( key=name, property_dict={ - "energy_decomposition": conformer.energy_decomposition, + "energy_decomposition": conformer.energy_decomposition, # type:ignore[dict-item] "source": conformer.source, "optimised": True, }, @@ -214,7 +214,7 @@ def graph_optimise_cage( # noqa: PLR0913 database.add_properties( key=name, property_dict={ - "energy_decomposition": min_energy_conformer.energy_decomposition, + "energy_decomposition": min_energy_conformer.energy_decomposition, # type:ignore[dict-item] "source": min_energy_conformer.source, "optimised": True, }, @@ -241,7 +241,7 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 final_molecule.write(fina_mol_file) return Conformer( molecule=final_molecule, - energy_decomposition=database.get_property( + energy_decomposition=database.get_property( # type:ignore[arg-type] key=name, property_key="energy_decomposition", property_type=dict, @@ -262,7 +262,7 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 database.add_properties( key=name, property_dict={ - "energy_decomposition": conformer.energy_decomposition, + "energy_decomposition": conformer.energy_decomposition, # type:ignore[dict-item] "source": conformer.source, "optimised": True, }, @@ -308,7 +308,9 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 # Run optimisations of series of conformers with shifted out # building blocks. for test_molecule in yield_shifted_models( - temp_molecule, forcefield, kicks=(1, 2, 3, 4) + temp_molecule, + forcefield, + kicks=(1, 2, 3, 4), ): conformer = run_optimisation( assigned_system=AssignedSystem( @@ -411,14 +413,14 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 failed_md = True if not failed_md: - soft_md_data = soft_md_trajectory.get_data() + soft_md_data = soft_md_trajectory.get_data() # type:ignore[union-attr] # Check that the trajectory is as long as it should be. if len(soft_md_data) != num_steps / traj_freq: failed_md = True # Go through each conformer from soft MD. # Optimise them all. - for md_conformer in soft_md_trajectory.yield_conformers(): + for md_conformer in soft_md_trajectory.yield_conformers(): # type:ignore[union-attr] if failed_md: continue conformer = run_optimisation( @@ -441,7 +443,9 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 min_energy_conformer = ensemble.get_lowest_e_conformer() min_energy_conformerid = min_energy_conformer.conformer_id - min_energy = min_energy_conformer.energy_decomposition["total energy"][0] + min_energy: float = min_energy_conformer.energy_decomposition[ + "total energy" + ][0] logging.info( "%s from %s with energy: %s kJ.mol-1", min_energy_conformerid, @@ -454,7 +458,7 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 database.add_properties( key=name, property_dict={ - "energy_decomposition": min_energy_conformer.energy_decomposition, + "energy_decomposition": min_energy_conformer.energy_decomposition, # type:ignore[dict-item] "source": min_energy_conformer.source, "optimised": True, }, @@ -464,8 +468,9 @@ def optimise_cage( # noqa: PLR0913, C901, PLR0915, PLR0912 def try_except_construction( - iterator: TopologyIterator | IHomolepticTopologyIterator, + iterator: TopologyIterator, topology_code: TopologyCode, + scale_multiplier: float | None = None, building_block_configuration: BuildingBlockConfiguration | None = None, vertex_positions: dict[int, np.ndarray] | None = None, ) -> stk.ConstructedMolecule: @@ -478,7 +483,7 @@ def try_except_construction( try: # Try with aligning vertices. constructed_molecule = stk.ConstructedMolecule( - CustomTopology( + CustomTopology( # type: ignore[arg-type] building_blocks=bbs, vertex_prototypes=iterator.get_vertex_prototypes( unaligning=False @@ -489,14 +494,16 @@ def try_except_construction( ), vertex_alignments=None, vertex_positions=vertex_positions, - scale_multiplier=iterator.scale_multiplier, + scale_multiplier=iterator.scale_multiplier + if scale_multiplier is None + else scale_multiplier, ) ) except ValueError: # Try with unaligning. constructed_molecule = stk.ConstructedMolecule( - CustomTopology( + CustomTopology( # type: ignore[arg-type] building_blocks=bbs, vertex_prototypes=iterator.get_vertex_prototypes( unaligning=True @@ -507,7 +514,9 @@ def try_except_construction( ), vertex_alignments=None, vertex_positions=vertex_positions, - scale_multiplier=iterator.scale_multiplier, + scale_multiplier=iterator.scale_multiplier + if scale_multiplier is None + else scale_multiplier, ) ) return constructed_molecule diff --git a/src/cgexplore/_internal/scram/enumeration.py b/src/cgexplore/_internal/scram/enumeration.py index c8872ced..d8cae127 100644 --- a/src/cgexplore/_internal/scram/enumeration.py +++ b/src/cgexplore/_internal/scram/enumeration.py @@ -28,11 +28,13 @@ ) -class TopologyIterator: +class Scrambler: """Iterate over topology graphs. This is an old version of this code, which I do not recommend using over - the `IHomolepticTopologyIterator`. + the `TopologyIterator`. + + TODO: Clean-up and remove this class. """ @@ -46,6 +48,8 @@ def __init__( ) -> None: """Initialize.""" self._building_blocks: dict[stk.BuildingBlock, abc.Sequence[int]] + self._underlying_topology: type[stk.cage.Cage] + if stoichiometry == (1, 1, 1): if multiplier == 1: self._building_blocks = { @@ -129,7 +133,7 @@ def __init__( id=i.get_id(), position=i.get_position(), aligner_edge=i.get_aligner_edge(), - use_neighbor_placement=i.use_neighbor_placement, + use_neighbor_placement=i.use_neighbor_placement(), ) for i in self._underlying_topology._vertex_prototypes # noqa: SLF001 ) @@ -148,7 +152,7 @@ def __init__( self._beta = 10 def _define_underlying(self) -> None: - self._vertex_connections = {} + self._vertex_connections: dict[int, int] = {} for edge in self._init_edge_prototypes: if edge.get_vertex1_id() not in self._vertex_connections: self._vertex_connections[edge.get_vertex1_id()] = 0 @@ -182,10 +186,6 @@ def get_num_building_blocks(self) -> int: """Get number of building blocks.""" return len(self._init_vertex_prototypes) - def get_beta(self) -> float: - """Get beta for MC algorithm.""" - return self._beta - def get_num_scrambles(self) -> int: """Get num. scrambles algorithm.""" return self._num_scrambles @@ -263,7 +263,7 @@ def get_constructed_molecules(self) -> abc.Generator[Constructed]: # noqa: C901 available_type1s = deepcopy(self._type1) available_type2s = deepcopy(self._type2) - new_edges = [] + new_edges: list[stk.Edge] = [] combination = [] for _ in range(len(self._init_edge_prototypes)): try: @@ -316,7 +316,7 @@ def get_constructed_molecules(self) -> abc.Generator[Constructed]: # noqa: C901 try: # Try with aligning vertices. constructed = stk.ConstructedMolecule( - CustomTopology( + CustomTopology( # type:ignore[arg-type] building_blocks=self._building_blocks, vertex_prototypes=self._init_vertex_prototypes, edge_prototypes=new_edges, @@ -334,7 +334,7 @@ def get_constructed_molecules(self) -> abc.Generator[Constructed]: # noqa: C901 # Try with unaligning. try: constructed = stk.ConstructedMolecule( - CustomTopology( + CustomTopology( # type:ignore[arg-type] building_blocks=self._building_blocks, vertex_prototypes=self._vertices, edge_prototypes=new_edges, @@ -363,7 +363,7 @@ def get_constructed_molecules(self) -> abc.Generator[Constructed]: # noqa: C901 try: # Try with aligning vertices. constructed = stk.ConstructedMolecule( - CustomTopology( + CustomTopology( # type:ignore[arg-type] building_blocks=self._building_blocks, vertex_prototypes=self._init_vertex_prototypes, edge_prototypes=new_edges, @@ -381,7 +381,7 @@ def get_constructed_molecules(self) -> abc.Generator[Constructed]: # noqa: C901 # Try with unaligning. try: constructed = stk.ConstructedMolecule( - CustomTopology( + CustomTopology( # type:ignore[arg-type] building_blocks=self._building_blocks, vertex_prototypes=self._vertices, edge_prototypes=new_edges, @@ -502,7 +502,7 @@ def get_topology( try: # Try with aligning vertices. constructed = stk.ConstructedMolecule( - CustomTopology( + CustomTopology( # type:ignore[arg-type] building_blocks=self._building_blocks, vertex_prototypes=self._init_vertex_prototypes, edge_prototypes=tuple( @@ -527,7 +527,7 @@ def get_topology( # Try with unaligning. try: constructed = stk.ConstructedMolecule( - CustomTopology( + CustomTopology( # type:ignore[arg-type] building_blocks=self._building_blocks, vertex_prototypes=self._vertices, edge_prototypes=tuple( @@ -565,7 +565,7 @@ def get_mashed_topology( try: # Try with aligning vertices. constructed = stk.ConstructedMolecule( - CustomTopology( + CustomTopology( # type:ignore[arg-type] building_blocks=self._building_blocks, vertex_prototypes=self._init_vertex_prototypes, edge_prototypes=tuple( @@ -590,7 +590,7 @@ def get_mashed_topology( # Try with unaligning. try: constructed = stk.ConstructedMolecule( - CustomTopology( + CustomTopology( # type:ignore[arg-type] building_blocks=self._building_blocks, vertex_prototypes=self._vertices, edge_prototypes=tuple( @@ -616,8 +616,13 @@ def get_mashed_topology( @dataclass -class IHomolepticTopologyIterator: - """Iterate over topology graphs.""" +class TopologyIterator: + """Iterate over topology graphs. + + This is the latest version, but without good symmetry and graph checks, + this can over produce structures. + + """ building_block_counts: dict[stk.BuildingBlock, int] graph_type: str @@ -636,7 +641,7 @@ def __post_init__(self) -> None: # noqa: PLR0915, PLR0912, C901 / f"rx_{self.graph_type}.json" ) if self.max_samples is None: - self.max_samples = int(1e4) + self.used_samples = int(1e4) case "rx_nodoubles": self.graphs_path = ( @@ -645,7 +650,7 @@ def __post_init__(self) -> None: # noqa: PLR0915, PLR0912, C901 / f"rxnd_{self.graph_type}.json" ) if self.max_samples is None: - self.max_samples = int(1e5) + self.used_samples = int(1e5) case "nx": self.graphs_path = ( @@ -670,13 +675,13 @@ def __post_init__(self) -> None: # noqa: PLR0915, PLR0912, C901 # Write vertex prototypes as a function of number of functional groups # and position them on spheres. vertex_map = {} - vertex_prototypes = [] + vertex_prototypes: list[stk.Vertex] = [] unaligned_vertex_prototypes = [] reactable_vertex_ids = [] num_edges = 0 vertex_counts = {} vertex_types_by_fg = defaultdict(list) - building_block_dict = {} + building_block_dict: dict[stk.BuildingBlock, list[int]] = {} for building_block, angle_rotation in zip( self.building_block_counts, angle_rotations, @@ -748,7 +753,7 @@ def __post_init__(self) -> None: # noqa: PLR0915, PLR0912, C901 ) ) - self.building_blocks = { + self.building_blocks: dict[stk.BuildingBlock, abc.Sequence[int]] = { i: tuple(building_block_dict[i]) for i in building_block_dict } self.vertex_map = vertex_map @@ -764,7 +769,9 @@ def get_num_building_blocks(self) -> int: """Get number of building blocks.""" return len(self.vertex_prototypes) - def get_vertex_prototypes(self, unaligning: bool) -> list[stk.Vertex]: + def get_vertex_prototypes( + self, unaligning: bool + ) -> abc.Sequence[stk.Vertex]: """Get vertex prototypes.""" if unaligning: return self.unaligned_vertex_prototypes @@ -772,7 +779,7 @@ def get_vertex_prototypes(self, unaligning: bool) -> list[stk.Vertex]: def _two_type_algorithm(self) -> None: combinations_tested = set() - run_topology_codes = [] + run_topology_codes: list[TopologyCode] = [] type1, type2 = sorted(self.vertex_types_by_fg.keys(), reverse=True) @@ -790,11 +797,11 @@ def _two_type_algorithm(self) -> None: ] to_save = [] - for _ in range(self.max_samples): + for _ in range(self.used_samples): rng.shuffle(options) # Build an edge selection. - combination = [ - tuple(sorted((i, j))) + combination: abc.Sequence[tuple[int, int]] = [ + tuple(sorted((i, j))) # type:ignore[misc] for i, j in zip(itera1, options, strict=True) ] @@ -849,7 +856,7 @@ def _two_type_algorithm(self) -> None: def _three_type_algorithm(self) -> None: combinations_tested = set() - run_topology_codes = [] + run_topology_codes: list[TopologyCode] = [] type1, type2, type3 = sorted( self.vertex_types_by_fg.keys(), reverse=True @@ -874,14 +881,14 @@ def _three_type_algorithm(self) -> None: ] to_save = [] - for _ in range(self.max_samples): + for _ in range(self.used_samples): # Merging options1 and options2 because they both bind to itera. mixed_options = options1 + options2 rng.shuffle(mixed_options) # Build an edge selection. - combination = [ - tuple(sorted((i, j))) + combination: abc.Sequence[tuple[int, int]] = [ + tuple(sorted((i, j))) # type:ignore[misc] for i, j in zip(itera1, mixed_options, strict=True) ] From 5e60a2e741b1d28b071af031e29a714c2d6bbcec Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 18 Jan 2025 19:30:54 +0100 Subject: [PATCH 43/47] Typing. --- src/cgexplore/_internal/scram/topology_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cgexplore/_internal/scram/topology_code.py b/src/cgexplore/_internal/scram/topology_code.py index 4ef831e8..5e9fe09f 100644 --- a/src/cgexplore/_internal/scram/topology_code.py +++ b/src/cgexplore/_internal/scram/topology_code.py @@ -69,7 +69,7 @@ def get_weighted_graph(self) -> rx.PyGraph: def edges_from_connection( self, - vertex_prototypes: list[stk.Vertex], + vertex_prototypes: abc.Sequence[stk.Vertex], ) -> list[stk.Edge]: """Get stk Edges from topology code.""" return [ From bf1e2ae967b6c971ce7429f423cd2a05b71e806b Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 18 Jan 2025 19:31:14 +0100 Subject: [PATCH 44/47] Remove parent class. --- .../optimisation/openmm_optimizer.py | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/cgexplore/_internal/optimisation/openmm_optimizer.py b/src/cgexplore/_internal/optimisation/openmm_optimizer.py index ce6b0ca9..ba0ba0cd 100644 --- a/src/cgexplore/_internal/optimisation/openmm_optimizer.py +++ b/src/cgexplore/_internal/optimisation/openmm_optimizer.py @@ -17,7 +17,10 @@ import stko from openmm import app, openmm -from cgexplore._internal.forcefields.assigned_system import ForcedSystem +from cgexplore._internal.forcefields.assigned_system import ( + AssignedSystem, + MartiniSystem, +) from cgexplore._internal.molecular.ensembles import Timestep logging.basicConfig( @@ -230,7 +233,7 @@ def _add_atom_constraints( def _setup_simulation( self, - assigned_system: ForcedSystem, + assigned_system: AssignedSystem | MartiniSystem, ) -> tuple[app.Simulation, openmm.System]: system = assigned_system.get_openmm_system() topology = assigned_system.get_openmm_topology() @@ -331,20 +334,22 @@ def _update_stk_molecule( positions = state.getPositions(asNumpy=True) return molecule.with_position_matrix(positions * 10) - def calculate_energy(self, assigned_system: ForcedSystem) -> float: + def calculate_energy( + self, assigned_system: AssignedSystem | MartiniSystem + ) -> float: """Calculate energy of a system.""" simulation, _ = self._setup_simulation(assigned_system) return self._get_energy(simulation) def calculate_energy_decomposition( self, - assigned_system: ForcedSystem, + assigned_system: AssignedSystem | MartiniSystem, ) -> dict[str, float]: """Calculate energy of a system.""" simulation, system = self._setup_simulation(assigned_system) return self._run_energy_decomp(simulation, system) - def read_final_energy_decomposition(self) -> dict: + def read_final_energy_decomposition(self) -> dict[str, tuple[float, str]]: """Read the final energy decomposition in an output file.""" decomp_data = ( self._output_string.split("energy decomposition:")[-1] @@ -357,11 +362,12 @@ def read_final_energy_decomposition(self) -> dict: continue force, value_unit = i.split(":") value, unit = value_unit.split() - value = float(value) # type: ignore[assignment] - decomposition[force] = (value, unit) + decomposition[force] = (float(value), unit) return decomposition - def optimize(self, assigned_system: ForcedSystem) -> stk.Molecule: + def optimize( + self, assigned_system: AssignedSystem | MartiniSystem + ) -> stk.Molecule: """Optimize a molecule.""" start_time = time.time() self._output_string += f"start time: {start_time}\n" @@ -391,7 +397,7 @@ def __init__( self, fileprefix: str, output_dir: pathlib.Path, - assigned_system: ForcedSystem, + assigned_system: AssignedSystem | MartiniSystem, platform: str | None = None, ) -> None: """Initialize CGOMMSinglePoint.""" @@ -420,7 +426,7 @@ def __init__( def _setup_simulation( self, - assigned_system: ForcedSystem, + assigned_system: AssignedSystem | MartiniSystem, ) -> tuple[app.Simulation, openmm.System]: system = assigned_system.get_openmm_system() topology = assigned_system.get_openmm_topology() @@ -613,7 +619,9 @@ def _get_trajectory(self, molecule: stk.Molecule) -> OMMTrajectory: traj_freq=self._traj_freq, ) - def run_dynamics(self, assigned_system: ForcedSystem) -> OMMTrajectory: + def run_dynamics( + self, assigned_system: AssignedSystem | MartiniSystem + ) -> OMMTrajectory: """Run dynamics on an assigned system.""" start_time = time.time() self._output_string += f"start time: {start_time}\n" From 120f7089c13c37faac8b4eac8c98dc703caf3eee Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 18 Jan 2025 19:31:22 +0100 Subject: [PATCH 45/47] Remove parent class. --- src/cgexplore/_internal/forcefields/forcefield.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cgexplore/_internal/forcefields/forcefield.py b/src/cgexplore/_internal/forcefields/forcefield.py index 1f2d9eb4..924f5e93 100644 --- a/src/cgexplore/_internal/forcefields/forcefield.py +++ b/src/cgexplore/_internal/forcefields/forcefield.py @@ -49,7 +49,7 @@ from cgexplore._internal.utilities.errors import ForceFieldUnitError from cgexplore._internal.utilities.utilities import convert_pyramid_angle -from .assigned_system import AssignedSystem, ForcedSystem, MartiniSystem +from .assigned_system import AssignedSystem, MartiniSystem logging.basicConfig( level=logging.INFO, @@ -571,7 +571,7 @@ def assign_terms( molecule: stk.Molecule, name: str, output_dir: pathlib.Path, - ) -> ForcedSystem: + ) -> AssignedSystem: """Assign forcefield terms to molecule.""" assigned_terms = { "bond": self._assign_bond_terms(molecule), @@ -821,12 +821,12 @@ def __init__( # noqa: PLR0913 self._constraints = constraints self._hrprefix = "mffhr" - def assign_terms( + def assign_terms( # type:ignore[override] self, molecule: stk.Molecule, name: str, output_dir: pathlib.Path, - ) -> ForcedSystem: + ) -> MartiniSystem: """Assign forcefield terms to molecule.""" assigned_terms = { "bond": self._assign_bond_terms(molecule), From 3cf4705e9e17935e9a575c69c42d890b24222b31 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 18 Jan 2025 19:31:31 +0100 Subject: [PATCH 46/47] Typing. --- src/cgexplore/_internal/molecular/ensembles.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/cgexplore/_internal/molecular/ensembles.py b/src/cgexplore/_internal/molecular/ensembles.py index 50b95f53..4baae455 100644 --- a/src/cgexplore/_internal/molecular/ensembles.py +++ b/src/cgexplore/_internal/molecular/ensembles.py @@ -24,14 +24,18 @@ class Timestep: class Ensemble: - """Class to contain ensemble information.""" + """Class to contain ensemble information. + + TODO: Rewrite into a .db. + + """ def __init__( self, base_molecule: stk.Molecule, - base_mol_path: str, - conformer_xyz: str, - data_json: str, + base_mol_path: str | pathlib.Path, + conformer_xyz: str | pathlib.Path, + data_json: str | pathlib.Path, overwrite: bool, ) -> None: """Initialize Ensemble class.""" @@ -88,7 +92,7 @@ def add_conformer(self, conformer: Conformer, source: str) -> None: i: conformer.energy_decomposition[i] for i in conformer.energy_decomposition } - conf_data["source"] = source + conf_data["source"] = source # type:ignore[assignment] self._data[conf_id] = conf_data def load_trajectory(self) -> dict[int, list[str]]: From 44802a7786cf95837479f24b3bd47ff82f030739 Mon Sep 17 00:00:00 2001 From: andrewtarzia Date: Sat, 18 Jan 2025 19:31:41 +0100 Subject: [PATCH 47/47] Typing. --- src/cgexplore/_internal/molecular/conformer.py | 2 +- .../_internal/utilities/generation_utilities.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cgexplore/_internal/molecular/conformer.py b/src/cgexplore/_internal/molecular/conformer.py index cb26bd95..a529ba4e 100644 --- a/src/cgexplore/_internal/molecular/conformer.py +++ b/src/cgexplore/_internal/molecular/conformer.py @@ -13,7 +13,7 @@ @dataclass(frozen=True, slots=True) class Conformer: molecule: stk.Molecule - energy_decomposition: dict + energy_decomposition: dict[str, tuple[float, str]] conformer_id: int | None = None source: str | None = None diff --git a/src/cgexplore/_internal/utilities/generation_utilities.py b/src/cgexplore/_internal/utilities/generation_utilities.py index bbeed9db..2b95df3f 100644 --- a/src/cgexplore/_internal/utilities/generation_utilities.py +++ b/src/cgexplore/_internal/utilities/generation_utilities.py @@ -8,7 +8,7 @@ import logging import pathlib -from collections.abc import Iterator +from collections import abc import numpy as np import stk @@ -202,7 +202,7 @@ def run_soft_md_cycle( # noqa: PLR0913 friction: openmm.unit.Quantity, reporting_freq: float, traj_freq: float, - platform: str, + platform: str | None, ) -> OMMTrajectory | None: """Run MD exploration with soft potentials. @@ -296,7 +296,7 @@ def run_constrained_optimisation( # noqa: PLR0913 bond_ff_scale: float, angle_ff_scale: float, max_iterations: int, - platform: str, + platform: str | None, ) -> stk.Molecule: """Run optimisation with constraints and softened potentials. @@ -362,7 +362,7 @@ def run_optimisation( # noqa: PLR0913 name: str, file_suffix: str, output_dir: pathlib.Path, - platform: str, + platform: str | None, max_iterations: int | None = None, ensemble: Ensemble | None = None, ) -> Conformer: @@ -423,7 +423,7 @@ def yield_near_models( name: str, output_dir: pathlib.Path | str, neighbour_library: list, -) -> Iterator[stk.Molecule]: +) -> abc.Iterator[stk.Molecule]: """Yield structures of models with neighbouring force field parameters. Keywords: @@ -497,8 +497,8 @@ def shift_beads( def yield_shifted_models( molecule: stk.Molecule, forcefield: ForceField, - kicks: tuple[int], -) -> Iterator[stk.Molecule]: + kicks: abc.Sequence[int], +) -> abc.Iterator[stk.Molecule]: """Yield conformers with atom positions of particular beads shifted. Keywords: