Skip to content

Commit

Permalink
Fix generate resonance structure and update unit test
Browse files Browse the repository at this point in the history
1. Update how flags are used in generating the resonance structure and sanitize kekulized molecules
2. Update a method to remove atom highlighting after filtration
3. Avoid `Kekulize` to reassigning bond orders
4. Correct default arg and docstring
5. In unit tests, use benzene, 1-ethyl benzene radical, and C=C[CH]C=C as examples. Remove phenyl as the default RDKit behavior is not as expected.
  • Loading branch information
xiaoruiDong committed Sep 13, 2023
1 parent 938f2c7 commit dacf738
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 42 deletions.
67 changes: 42 additions & 25 deletions rdmc/mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -1913,29 +1913,34 @@ def generate_vdw_mat(rd_mol,

def generate_radical_resonance_structures(mol: RDKitMol,
unique: bool = True,
consider_atommap: bool = True,
consider_atommap: bool = False,
kekulize: bool = False):
"""
Generate resonance structures for a radical molecule. RDKit by design doesn't work
for radical resonance. The approach is a temporary workaround by replacing radical electrons by positive
charges and generating resonance structures by RDKit ResonanceMolSupplier.
Currently, this function only works for neutral radicals.
Known issues:
- Phenyl radical only generate one resonance structure when ``kekulize=True``, expecting 2.
Args:
mol (RDKitMol): A radical molecule.
unique (bool, optional): Filter out duplicate resonance structures from the list. Defaults to ``True``.
consider_atommap (bool, atommap): If consider atom map numbers in filtration duplicates.
Only effective when uniquify=True. Defaults to ``False``.
kekulize (bool, optional): Whether to kekulize the molecule. Defaults to ``False``. When ``True``, uniquifying
process will be skipped.
Only effective when ``unique=True``. Defaults to ``False``.
kekulize (bool, optional): Whether to kekulize the molecule. Defaults to ``False``. As an example,
benzene have one resonance structure if not kekulized (``False``) and
two resonance structures if kekulized (``True``).
Returns:
list: a list of molecules with resonance structures.
"""
assert mol.GetFormalCharge() == 0, "The current function only works for radical species."
mol_copy = mol.Copy(quickCopy=True) # Make a copy of the original molecule

# Modify the original molecule to make it a postively charged species
# Modify the original molecule to make it a positively charged species
recipe = {} # Used to record changes. Temporarily not used now.
for atom in mol_copy.GetAtoms():
radical_electrons = atom.GetNumRadicalElectrons()
Expand All @@ -1957,7 +1962,9 @@ def generate_radical_resonance_structures(mol: RDKitMol,
mol_copy.UpdatePropertyCache() # Make sure the assignment is boardcast to atoms / bonds

# Generate Resonance Structures
flags = Chem.KEKULE_ALL | Chem.ALLOW_INCOMPLETE_OCTETS | Chem.UNCONSTRAINED_CATIONS
flags = Chem.ALLOW_INCOMPLETE_OCTETS | Chem.UNCONSTRAINED_CATIONS
if kekulize:
flags |= Chem.KEKULE_ALL
suppl = Chem.ResonanceMolSupplier(mol_copy._mol, flags=flags)
res_mols = [RDKitMol(RWMol(mol)) for mol in suppl]

Expand All @@ -1974,38 +1981,48 @@ def generate_radical_resonance_structures(mol: RDKitMol,
elif charge < 0: # Shouldn't appear, just for bug detection
raise RuntimeError('Encounter charge separation during resonance structure generation.')

# For aromatic molecules:
# Aromaticity flag is incorrectly inherit from the parent molecule
# and can cause issues in kekulizing children's structure. Reset all atomic
# aromaticity flag does the trick to initiate aromaticity perception in sanitization
# Tried other ways but none of them works (e.g., mol.ClearComputedProps())
atom.SetIsAromatic(False)

# If a structure cannot be sanitized, removed it
try:
res_mol.Sanitize()
except BaseException:
# Sanitization strategy is inspired by
# https://github.com/rdkit/rdkit/discussions/6358
flags = Chem.SanitizeFlags.SANITIZE_ALL
if kekulize:
flags ^= (Chem.SanitizeFlags.SANITIZE_KEKULIZE | Chem.SanitizeFlags.SANITIZE_SETAROMATICITY)
res_mol.Sanitize(sanitizeOps=flags)
except BaseException as e:
print(e)
# todo: make error type more specific and add a warning message
continue
if kekulize:
res_mol.Kekulize()
_unset_aromatic_flags(res_mol)
cleaned_mols.append(res_mol)

# To remove duplicate resonance structures
if unique and not kekulize:
if unique:
cleaned_mols = get_unique_mols(cleaned_mols,
consider_atommap=consider_atommap)
# Temporary fix to remove highlight flag
# TODO: replace with a better method after knowing the mechanism of highlighting substructures
cleaned_mols = [RDKitMol.FromSmiles(
mol.ToSmiles(removeAtomMap=False,
removeHs=False,
kekule=kekulize,)
)
for mol in cleaned_mols]
for mol in cleaned_mols:
# According to
# https://github.com/rdkit/rdkit/blob/9249ca5cc840fc72ea3bb73c2ff1d71a1fbd3f47/rdkit/Chem/Draw/IPythonConsole.py#L152
# highlight info is stored in __sssAtoms
mol._mol.__setattr__('__sssAtoms', [])
return cleaned_mols


def _unset_aromatic_flags(mol):
"""
A helper function to unset aromatic flags in a molecule.
This is useful when cleaning up the molecules from resonance structure generation.
In such case, a molecule may have single-double bonds but are marked as aromatic bonds.
"""
for bond in mol.GetBonds():
if bond.GetBondType() != Chem.BondType.AROMATIC and bond.GetIsAromatic():
bond.SetIsAromatic(False)
bond.GetBeginAtom().SetIsAromatic(False)
bond.GetEndAtom().SetIsAromatic(False)
return mol


def has_matched_mol(mol: RDKitMol,
mols: List[RDKitMol],
consider_atommap: bool = False,
Expand Down
92 changes: 75 additions & 17 deletions test/test_mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,43 +609,101 @@ def test_generate_radical_resonance_structures(self):
RDKitMol.FromSmiles(smi)
)

# Test case 1: 1-Phenylethyl radical
# Test case 1: benzene
smi = 'c1ccccc1'
mol = RDKitMol.FromSmiles(smi)
# Without kekulization, RDKit returns 1 resonance structures
assert len(generate_radical_resonance_structures(
mol,
unique=False,
kekulize=False,
)) == 1
# With kekulization, RDKit returns 2 resonance structures
assert len(generate_radical_resonance_structures(
mol,
unique=False,
kekulize=True,
)) == 2
# With kekulization, RDKit returns 1 resonance structures
# during uniquifyication without considering atom map
assert len(generate_radical_resonance_structures(
mol,
unique=True,
consider_atommap=False,
kekulize=True,
)) == 1
# With kekulization, RDKit returns 2 resonance structures
# during uniquifyication with considering atom map
assert len(generate_radical_resonance_structures(
mol,
unique=True,
consider_atommap=True,
kekulize=True,
)) == 2

# Test case 2: 1-Phenylethyl radical
smi = 'c1ccccc1[CH]C'
# Without filtration, RDKit returns 5 resonance structures
mol = RDKitMol.FromSmiles(smi)
# Without kekulization, RDKit returns 4 resonance structures
# 3 with radical site on the ring and 1 with differently kekulized benzene
assert len(generate_radical_resonance_structures(
mol,
unique=False,
kekulize=False,
)) == 4
# With kekulization, RDKit returns 5 resonance structures
# 3 with radical site on the ring and 2 with differently kekulized benzene
assert len(generate_radical_resonance_structures(
RDKitMol.FromSmiles(smi),
unique=False
mol,
unique=False,
kekulize=True,
)) == 5
# With filtration and not considering atom map, RDKit returns 3 structures
# With filtration, kekulization, and not considering atom map,
# There will be 3 resonance structures, 2 with radical site on the ring
# and 1 with radial site on the alkyl chain
assert len(generate_radical_resonance_structures(
RDKitMol.FromSmiles(smi),
unique=True,
consider_atommap=False
consider_atommap=False,
kekulize=True,
)) == 3
# With filtration and considering atom map, RDKit returns 4 structures
# With filtration and considering atom map, and without kekulization,
# RDKit returns 4 structures, 3 with radical site on the ring
# and 1 with radial site on the alkyl chain
assert len(generate_radical_resonance_structures(
RDKitMol.FromSmiles(smi),
unique=True,
consider_atommap=True
)) == 4
# Test case 2: Phenyl radical
smi = '[c:1]1[c:2]([H:7])[c:3]([H:8])[c:4]([H:9])[c:5]([H:10])[c:6]1[H:11]'
# Without filtration, RDKit returns 3 resonance structures

# Test case 3: Phenyl radical
smi = 'C=C[CH]C=C'
# No dependence on kekulization
assert len(generate_radical_resonance_structures(
RDKitMol.FromSmiles(smi),
unique=False
unique=False,
kekulize=True,
)) == len(generate_radical_resonance_structures(
RDKitMol.FromSmiles(smi),
unique=False,
kekulize=False,
)) == 3
# With filtering and considering atom map, RDKit returns 3 resonance structures
# radical site at two ends and the middle
assert len(generate_radical_resonance_structures(
RDKitMol.FromSmiles(smi),
unique=True,
consider_atommap=True,
kekulize=True,
)) == 3
# With filtration and not considering atom map, RDKit returns 2 structures
res_mols = generate_radical_resonance_structures(
# radical site at the end and the middle
assert len(generate_radical_resonance_structures(
RDKitMol.FromSmiles(smi),
unique=True,
consider_atommap=False,
)
assert len(res_mols) == 2
# The first one (itself) should be aromatic and the second should not
for i, value in zip([0, 1], [True, False]):
assert res_mols[i].GetAtomWithIdx(0).GetIsAromatic() == value
kekulize=True,
)) == 2


if __name__ == '__main__':
Expand Down

0 comments on commit dacf738

Please sign in to comment.