Skip to content

Commit

Permalink
Merge pull request #75 from xiaoruiDong/fix_mol
Browse files Browse the repository at this point in the history
More molecule fixes and helper reactions for checking molecules / reactions
  • Loading branch information
xiaoruiDong authored Nov 6, 2023
2 parents 26c1674 + aebc2ae commit 1c42c87
Show file tree
Hide file tree
Showing 15 changed files with 1,459 additions and 736 deletions.
7 changes: 7 additions & 0 deletions docs/source/reference/fix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
rdmc.fix
========

.. automodule:: rdmc.fix
:members:
:undoc-members:
:show-inheritance:
7 changes: 7 additions & 0 deletions docs/source/reference/mol_compare.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
rdmc.mol_compare
================

.. automodule:: rdmc.mol_compare
:members:
:undoc-members:
:show-inheritance:
8 changes: 5 additions & 3 deletions docs/source/reference/rdmc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ RDMC APIs
.. toctree::
:maxdepth: 4

conf
forcefield
mol
mol_compare
fix
reaction
ts
utils
view
conf
forcefield
utils
6 changes: 4 additions & 2 deletions rdmc/external/logparser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -995,7 +995,9 @@ def interact_irc(self,
sanitize: bool = False,
converged: bool = True,
backend: str = 'openbabel',
bothway: bool = False,
continuous_update: bool = False,
**kwargs,
) -> interact:
"""
Create a IPython interactive widget to investigate the IRC results.
Expand All @@ -1009,7 +1011,7 @@ def interact_irc(self,
Returns:
interact
"""
mol = self._process_irc_mol(sanitize=sanitize, converged=converged, backend=backend)
mol = self._process_irc_mol(sanitize=sanitize, converged=converged, backend=backend, bothway=bothway)
sdfs = [mol.ToMolBlock(confId=i) for i in range(mol.GetNumConformers())]
xyzs = self.get_xyzs(converged=converged)
y_params = self.get_scf_energies(converged=converged)
Expand All @@ -1024,7 +1026,7 @@ def interact_irc(self,
ylabel = 'E(SCF) [kcal/mol]'

def visual(idx):
mol_viewer(sdfs[idx - 1], 'sdf').update()
mol_viewer(sdfs[idx - 1], 'sdf', **kwargs).update()
ax = plt.axes()
ax.plot(x_params, y_params)
ax.set(xlabel=xlabel, ylabel=ylabel)
Expand Down
9 changes: 7 additions & 2 deletions rdmc/external/xyz2mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,8 +528,13 @@ def AC2mol(mol, AC, atoms, charge, allow_charged_fragments=True,
return []

# BO2mol returns an arbitrary resonance form. Let's make the rest
mols = rdchem.ResonanceMolSupplier(mol, Chem.UNCONSTRAINED_CATIONS, Chem.UNCONSTRAINED_ANIONS)
mols = [mol for mol in mols]
mols = [mol for mol in rdchem.ResonanceMolSupplier(
mol,
Chem.ALLOW_INCOMPLETE_OCTETS | Chem.UNCONSTRAINED_CATIONS | Chem.UNCONSTRAINED_ANIONS,
) if mol is not None]

if not mols:
mols = [mol] # For some cases, resonance structure supplier creates Nones

return mols

Expand Down
220 changes: 204 additions & 16 deletions rdmc/fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,50 +8,127 @@
from functools import reduce
from typing import List

import numpy as np

from rdmc import RDKitMol
from rdkit.Chem import rdChemReactions, rdmolops
from rdkit.Chem import BondType, rdChemReactions, rdmolops


DEFAULT_REMEDIES = [
RECOMMEND_REMEDIES = [
# Remedy 1 - Carbon monoxide: [C]=O to [C-]#[O+]
rdChemReactions.ReactionFromSmarts(
"[O+0-0v2X1:1]=[C+0-0v2X1:2]>>[O+1v3X1:1]#[C-1v3X1:2]"
),
# Remedy 2 - Oxygen Molecule: O=O to [O]-[O]
# Remedy 2 - Carbon monoxide: [C]=O to [C-]#[O+]
rdChemReactions.ReactionFromSmarts(
"[O+0-0v3X1:1]#[C+0-0v3X1:2]>>[O+1v3X1:1]#[C-1v3X1:2]"
),
# Remedy 3 - Oxygen Molecule: O=O to [O]-[O]
rdChemReactions.ReactionFromSmarts(
"[O+0-0v2X1:1]=[O+0-0v2X1:2]>>[O+0-0v1X1:1]-[O+0-0v1X1:2]"
),
# Remedy 3 - isocyanide: R[N]#[C] to R[N+]#[C-]
# Remedy 4 - isocyanide: R[N]#[C] to R[N+]#[C-]
rdChemReactions.ReactionFromSmarts(
"[N+0-0v4X2:1]#[C+0-0v3X1:2]>>[N+v4X2:1]#[C-v3X1:2]"
),
# Remedy 4 - amine radical: RC(R)-N(R)(R)R to R[C-](R)-[N+](R)(R)R
# Remedy 5 - azide: RN=N=[N] to RN=[N+]=[N-]
rdChemReactions.ReactionFromSmarts(
"[N+0-0v3X2:1]=[N+0-0v4X2:2]=[N+0-0v2X1:3]>>[N+0-0v3X2:1]=[N+1v4X2:2]=[N-1v2X1:3]"
),
# Remedy 6 - amine oxide: RN(R)(R)-O to R[N+](R)(R)-[O-]
rdChemReactions.ReactionFromSmarts(
"[N+0-0v4X4:1]-[O+0-0v1X1:2]>>[N+1v4X4:1]-[O-1v1X1:2]"
),
# Remedy 7 - amine radical: R[C](R)-N(R)(R)R to R[C-](R)-[N+](R)(R)R
rdChemReactions.ReactionFromSmarts(
"[N+0-0v4X4:1]-[C+0-0v3X3:2]>>[N+1v4X4:1]-[C-1v3X3:2]"
),
# Remedy 5 - amine radical: RN(R)=C to RN(R)-[C]
# Remedy 8 - amine radical: RN(R)=C to RN(R)-[C]
rdChemReactions.ReactionFromSmarts(
"[N+0-0v4X3:1]=[C+0-0v4X3:2]>>[N+0-0v3X3:1]-[C+0-0v3X3:2]"
),
# Remedy 5 - quintuple C bond, usually due to RC(=O)=O: R=C(R)=O to R=[C+](R)-[O-]
# Remedy 9 - quintuple C bond, usually due to RC(=O)=O: R=C(R)=O to R=C(R)-[O]
rdChemReactions.ReactionFromSmarts(
"[C+0-0v5X3:1]=[O+0-0v2X1:2]>>[C+0-0v4X3:1]-[O+0-0v1X1:2]"
),
# Remedy 6 - amine oxide: RN(R)(R)-O to R[N+](R)(R)-[O-]
# Remedy 10 - sulphuric bi-radicals: R[S](R)(-[O])-[O] to R[S](R)(=O)(=O)
rdChemReactions.ReactionFromSmarts(
"[N+0-0v4X4:1]-[O+0-0v1X1:2]>>[N+1v4X4:1]-[O-1v1X1:2]"
"[S+0-0v4X4:1](-[O+0-0v1X1:2])-[O+0-0v1X1:3]>>[S+0-0v6X4:1](=[O+0-0v2X1:2])=[O+0-0v2X1:3]"
),
# Remedy 11 - Triazinane: C1=N=C=N=C=N=1 to c1ncncn1
rdChemReactions.ReactionFromSmarts(
"[C+0-0v5X3:1]1=[N+0-0v4X2:2]=[C+0-0v5X3:3]=[N+0-0v4X2:4]=[C+0-0v5X3:5]=[N+0-0v4X2:6]=1"
">>[C+0-0v5X3:1]1[N+0-0v4X2:2]=[C+0-0v5X3:3][N+0-0v4X2:4]=[C+0-0v5X3:5][N+0-0v4X2:6]=1"
),
]


ZWITTERION_REMEDIES = [
# Remedy 1 - criegee Intermediate: R[C](R)O[O] to RC=(R)[O+][O-]
rdChemReactions.ReactionFromSmarts(
"[C+0-0v3X3:1]-[O+0-0v2X2:2]-[O+0-0v1X1:3]>>[C+0-0v4X3:1]=[O+1v3X2:2]-[O-1v1X1:3]"
),
# Remedy 2 - criegee Intermediate: [C]-C=C(R)O[O] to C=C-C=(R)[O+][O-]
rdChemReactions.ReactionFromSmarts(
"[C+0-0v3X3:1]-[C:2]=[C+0-0v4X3:3]-[O+0-0v2X2:4]-[O+0-0v1X1:5]>>[C+0-0v4X3:1]=[C:2]-[C+0-0v4X3:3]=[O+1v3X2:4]-[O-1v1X1:5]"
),
# Remedy 7 - criegee like molecule: RN(R)(R)-C(R)(R)=O to R[N+](R)(R)-[C](R)(R)-[O-]
# Remedy 3 - criegee like molecule: RN(R)(R)-C(R)(R)=O to R[N+](R)(R)-[C](R)(R)-[O-]
rdChemReactions.ReactionFromSmarts(
"[N+0-0v4X4:1]-[C+0-0v4X3:2]=[O+0-0v2X1:3]>>[N+1v4X4:1]-[C+0-0v3X3:2]-[O-1v1X1:3]"
),
# Remedy 8 - criegee like molecule: RN(R)(R)-C(R)(R)=O to R[N+](R)(R)-[C](R)(R)-[O-]
# Remedy 4 - criegee like molecule: R[N+](R)(R)-[C-](R)(R)[O] to R[N+](R)(R)-[C](R)(R)-[O-]
rdChemReactions.ReactionFromSmarts(
"[N+0-0v4X4:1]-[C+0-0v3X3:2]-[O+0-0v1X1:3]>>[N+1v4X4:1]-[C+0-0v3X3:2]-[O-1v1X1:3]"
"[N+1v4X4:1]-[C-1v3X3:2]-[O+0-0v1X1:3]>>[N+1v4X4:1]-[C+0-0v3X3:2]-[O-1v1X1:3]"
),
# Remedy 5 - ammonium + carboxylic: ([N]R4.C(=O)[O]) to ([N+]R4.C(=O)[O-])
rdChemReactions.ReactionFromSmarts(
"([N+0-0v4X4:1].[O+0-0v2X1:2]=[C+0-0v4X3:3]-[O+0-0v1X1:4])>>([N+1v4X4:1].[O+0-0v2X1:2]=[C+0-0v4X3:3]-[O-1v1X1:4])"
),
# Remedy 6 - ammonium + phosphoric: ([N]R4.P(=O)[O]) to ([N+]R4.P(=O)[O-])
rdChemReactions.ReactionFromSmarts(
"([N+0-0v4X4:1].[P+0-0v5X4:2]-[O+0-0v1X1:3])>>([N+1v4X4:1].[P+0-0v5X4:2]-[O-1v1X1:3])"
),
# Remedy 7 - ammonium + sulphuric: ([N]R4.S(=O)(=O)[O]) to ([N+]R4.S(=O)(=O)[O-])
rdChemReactions.ReactionFromSmarts(
"([N+0-0v4X4:1].[S+0-0v6X4:2]-[O+0-0v1X1:3])>>([N+1v4X4:1].[S+0-0v6X4:2]-[O-1v1X1:3])"
),
# Remedy 8 - ammonium + carbonyl in ring: ([N]R4.C=O) to ([N+]R4.[C.]-[O-])
rdChemReactions.ReactionFromSmarts(
"([N+0-0v4X4:1].[C+0-0v4X3R:2]=[O+0-0v2X1:3])>>([N+1v4X4:1].[C+0-0v3X3R:2]-[O-1v1X1:3])"
),
]


RING_REMEDIES = [
# The first four elements' sequence matters
# TODO: Find a better solution to avoid the impact of sequence
# Remedy 1 - quintuple C in ring: R1=C(R)=N-R1 to R1=C(R)[N]-R1
rdChemReactions.ReactionFromSmarts(
"[C+0-0v5X3R:1]=[N+0-0v3X2R:2]>>[C+0-0v4X3R:1]-[N+0-0v2X2R:2]"
),
# Remedy 2 - quadruple N in ring: R1=N=C(R)R1 to R1=N-[C](R)R1
rdChemReactions.ReactionFromSmarts(
"[N+0-0v4X2R:1]=[C+0-0v4X3R:2]>>[N+0-0v3X2R:1]-[C+0-0v3X3R:2]"
),
# Remedy 3 - ring =C(R)=N-[C]: R1=C(R)=N-[C](R)R1 to R1=C(R)-N=C(R)R1
rdChemReactions.ReactionFromSmarts(
"[C+0-0v5X3R:1]=[N+0-0v3X2R:2]-[C+0-0v3X3:3]>>[C+0-0v4X3R:1]-[N+0-0v3X2R:2]=[C+0-0v4X3:3]"
),
# Remedy 4 - ring -N-N-: R1-N-N-R1 to R1-N=N-R1
rdChemReactions.ReactionFromSmarts(
"[N+0-0v2X2R:1]-[N+0-0v2X2R:2]>>[N+0-0v3X2R:1]=[N+0-0v3X2R:2]"
),
# Remedy 5 - bicyclic radical
rdChemReactions.ReactionFromSmarts(
"[C+0-0v4:1]1[C+0-0v4X4:2]23[C+0-0v4:3][N+0-0v4X4:4]12[C+0-0v4:5]3>>[C+0-0v4:1]1[C+0-0v3X3:2]2[C+0-0v4:3][N+0-0v3X3:4]1[C+0-0v4:5]2"
),
]


DEFAULT_REMEDIES = RECOMMEND_REMEDIES
ALL_REMEDIES = RECOMMEND_REMEDIES + ZWITTERION_REMEDIES + RING_REMEDIES


def update_product_atom_map_after_reaction(
mol: "RDKitMol",
ref_mol: "RDKitMol",
Expand Down Expand Up @@ -103,7 +180,8 @@ def fix_mol_by_remedy(
fix_flag = False

for _ in range(max_attempts):
tmp_mol.UpdatePropertyCache(False)
tmp_mol.UpdatePropertyCache(False) # Update connectivity
rdmolops.GetSymmSSSR(tmp_mol) # Update ring information
try:
# Remedy are designed to be unimolecular (group transformation), so the product will be unimolecular as well
# If no match, RunReactants will return an empty tuple and thus cause an IndexError.
Expand Down Expand Up @@ -202,10 +280,11 @@ def fix_mol(
max_attempts: int = 10,
sanitize: bool = True,
fix_spin_multiplicity: bool = False,
mult: int = 1,
mult: int = 0,
renumber_atoms: bool = True,
) -> "RDKitMol":
"""
Fix the molecule by applying the given remedies and saturating the radical sites to full fill the desired spin multiplicity.
Fix the molecule by applying the given remedies and saturating bi-radical or carbene to fix spin multiplicity.
Args:
mol (RDKitMol): The molecule to be fixed.
Expand All @@ -215,10 +294,14 @@ def fix_mol(
max_attempts (int, optional): The maximum number of attempts to fix the molecule.
Defaults to ``10``.
sanitize (bool, optional): Whether to sanitize the molecule after the fix. Defaults to ``True``.
Using ``False`` is only recommended for debugging and testing.
fix_spin_multiplicity (bool, optional): Whether to fix the spin multiplicity of the molecule.
Defaults to ``False``.
mult (int, optional): The desired spin multiplicity. Defaults to ``1``.
mult (int, optional): The desired spin multiplicity. Defaults to ``0``, which means the lowest possible
spin multiplicity will be inferred from the number of unpaired electrons.
Only used when ``fix_spin_multiplicity`` is ``True``.
renumber_atoms (bool, optional): Whether to renumber the atoms after the fix. Defaults to ``True``.
Turn this off when the atom map number is not important.
Returns:
RDKitMol: The fixed molecule.
Expand All @@ -231,6 +314,111 @@ def fix_mol(
)

if fix_spin_multiplicity:
if mult == 0:
# Infer the possible lowest spin multiplicity from the number of unpaired electrons
mult = 1 if mol.GetSpinMultiplicity() % 2 else 2
mol = fix_mol_spin_multiplicity(mol, mult)

if renumber_atoms:
mol = mol.RenumberAtoms()

return mol


def find_oxonium_bonds(
mol: "RDKitMol",
threshold: float = 1.65,
) -> List[tuple]:
"""
Find the potential oxonium atom.
Args:
mol (RDKitMol): The molecule to be fixed.
threshold (float, optional): The threshold to determine if two atoms are connected.
Returns:
List[tuple]: a list of (oxygen atom index, the other atom index).
"""
heavy_idxs = [atom.GetIdx() for atom in mol.GetHeavyAtoms()]
oxygen_idxs = [
atom.GetIdx() for atom in mol.GetHeavyAtoms() if atom.GetAtomicNum() == 8
]

if len(oxygen_idxs) == 0:
return []

dist_mat = mol.GetDistanceMatrix()
dist_mat[oxygen_idxs, oxygen_idxs] = 100 # Set the self distance to a large number

# A detailed check may be done by element type
# for now we will use the threshold based on the longest C-O bond 1.65 A
infer_conn_mat = (dist_mat[oxygen_idxs][:, heavy_idxs] <= threshold).astype(int)
actual_conn_mat = mol.GetAdjacencyMatrix()[oxygen_idxs][:, heavy_idxs]

# Find potentially missing bonds
raw_miss_bonds = np.transpose(np.where((infer_conn_mat - actual_conn_mat) == 1))
miss_bonds = np.unique(raw_miss_bonds, axis=0).tolist()

return [
(oxygen_idxs[miss_bond[0]], heavy_idxs[miss_bond[1]])
for miss_bond in miss_bonds
]


def fix_oxonium_bonds(
mol: "RDKitMol",
threshold: float = 1.65,
sanitize: bool = True,
) -> "RDKitMol":
"""
Fix the oxonium atom. Openbabel and Jensen perception algorithm do not perceive the oxonium atom correctly.
This is a fix to detect if the molecule contains oxonium atom and fix it.
Args:
mol (RDKitMol): The molecule to be fixed.
threshold (float, optional): The threshold to determine if two atoms are connected.
sanitize (bool, optional): Whether to sanitize the molecule after the fix. Defaults to ``True``.
Using ``False`` is only recommended for debugging and testing.
Returns:
RDKitMol: The fixed molecule.
"""
oxonium_bonds = find_oxonium_bonds(mol, threshold=threshold)

if len(oxonium_bonds) == 0:
return mol

mol = mol.Copy()
for miss_bond in oxonium_bonds:
try:
mol.AddBond(*miss_bond, order=BondType.SINGLE)
except RuntimeError:
# Oxygen may get double counted
continue

# Usually the connected atom is a radical site
# So, update the number of radical electrons afterward
rad_atom = mol.GetAtomWithIdx(miss_bond[1])
if rad_atom.GetNumRadicalElectrons() > 0:
rad_atom.SetNumRadicalElectrons(rad_atom.GetNumRadicalElectrons() - 1)

# This remedy is only used for oxonium
remedies = [
# Remedy 1 - R[O](R)[O] to R[O+](R)[O-]
# This is a case combining two radicals R-O-[O] and [R]
rdChemReactions.ReactionFromSmarts(
"[O+0-0v3X3:1]-[O+0-0v1X1:2]>>[O+1v3X3:1]-[O-1v1X1:2]"
),
# Remedy 2 - R[O](R)C(R)=O to R[O+](R)[C](R)[O-]
# This is a case combining a closed shell ROR with a radical R[C]=O
rdChemReactions.ReactionFromSmarts(
"[O+0-0v3X3:1]-[C+0-0v4X3:2]=[O+0-0v2X1:3]>>[O+1v3X3:1]-[C+0-0v3X3:2]-[O-1v1X1:3]"
),
# Remedy 3 - R[O](R)[C](R)R to R[O+](R)[C-](R)R
# This is a case combining a radical R[C](R)(R) with a radical R[O]
rdChemReactions.ReactionFromSmarts(
"[O+0-0v3X3:1]-[C+0-0v3X3:2]>>[O+1v3X3:1]-[C-1v3X3:2]"
),
]

return fix_mol(mol, remedies=remedies, sanitize=sanitize)
Loading

0 comments on commit 1c42c87

Please sign in to comment.