Skip to content

Commit

Permalink
Add code and test for checking if mol is symmetric to a substructure
Browse files Browse the repository at this point in the history
  • Loading branch information
jonwzheng committed Jun 4, 2024
1 parent 578f05b commit 2058cdb
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 1 deletion.
68 changes: 68 additions & 0 deletions rdtools/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import traceback

import numpy as np
from collections import defaultdict

from rdkit import Chem
from rdkit.Chem.rdMolDescriptors import CalcMolFormula
Expand Down Expand Up @@ -282,3 +283,70 @@ def is_same_connectivity_conf(

else:
return is_same_connectivity_mol(mol, new_mol)


def is_symmetric_to_substructure(mol : Chem.Mol, substructure: Chem.Mol) -> bool:
'''
Check whether a mol is symmetric to a provided substructure.
Args:
mol1 (RWMol): The molecule to check.
substructure (RWMol): A molecule representing the SMARTS substructure to check.
Returns:
bool: Whether the molecule is symmetric w.r.t. the substructure.
'''
matches = mol.GetSubstructMatches(substructure)

classes = find_symmetry_classes(mol)

if len(matches) == 0: # Substructure isn't in molecule.
return False
elif len(matches) == 1: # Molecule has only one match and is therefore symmetric w.r.t. substructure
return True

# Assumes that 'matches' contains sets of equal size.
length_matches = len(matches[0])
num_matches = len(matches)
for match in matches:
assert len(match) == length_matches

# There is a match if all of the nth elements of each list in the matches is in the classes set.
for cla in classes: # Example: classes = {(2, 4), (1,3), (0,5)}; cla = (2, 4)

# Loop through the matches.
for j in range(length_matches): # Example: 0, 1 (length_matches = 2, the substructure is 2 atoms long)

match_index = 0
for i in range(num_matches): # Example: 0, 1 (num_matches = 2, we have 2 substructure matches)
# Logic here is that matches[i][j] should be in the cla set for all i.
if matches[i][j] in cla:
match_index += 1

# 2 possibilities:
if match_index == num_matches: # symmetric: all symmetry classes match all substructure matches at the same ID
pass
elif match_index == 0: # nothing matches, but other iterations of i, j, and cla might match
pass
else: # asymmetric, matches are out of order
return False

return True

def find_symmetry_classes(mol : Chem.Mol) -> set:
'''
Find set of symmetry classes for a given mol.
Adapted from code by Greg Landrum, 2011:
https://sourceforge.net/p/rdkit/mailman/rdkit-discuss/thread/CAD4fdRSofifYy_GFeZfxoHd_z4Y=4tVNR3UuBTea3sr81e8UMQ@mail.gmail.com/
Args:
mol: Molecule to examine symmetry classes.
'''

equivs = defaultdict(set)
matches = mol.GetSubstructMatches(mol,uniquify=False)
for match in matches:
for idx1,idx2 in enumerate(match): equivs[idx1].add(idx2)
classes = set()
for s in equivs.values(): classes.add(tuple(s))
return classes
18 changes: 17 additions & 1 deletion test/rdtools/test_compare.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import pytest

from rdtools.compare import get_match_and_recover_recipe, get_unique_mols, has_matched_mol, is_same_connectivity_mol
from rdtools.compare import get_match_and_recover_recipe, get_unique_mols, has_matched_mol, is_same_connectivity_mol, is_symmetric_to_substructure
from rdtools.conversion import mol_from_smiles, mol_to_smiles

from rdkit.Chem import MolFromSmarts

@pytest.mark.parametrize(
"smi1, smi2, expected",
Expand Down Expand Up @@ -147,3 +148,18 @@ def test_has_same_connectivity(smi1, smi2, expect_match):
mol1 = mol_from_smiles(smi1)
mol2 = mol_from_smiles(smi2)
assert is_same_connectivity_mol(mol1, mol2) == expect_match


@pytest.mark.parametrize(
'smi, sma, expect_match',
[
('CC(=O)C', '[CX3]=[OX1]', True),
('CC(=O)C(=O)C', '[CX3]=[OX1]', True),
('CC(=O)CC(=O)', '[CX3]=[OX1]', False),
('C', '[CX3]=[OX1]', False),
('OCC(CO)(CO)CO', '[CX4]-[OX2]', True),
])
def test_is_symmetric_to_substructure(smi, sma, expect_match):
mol = mol_from_smiles(smi)
substructure = MolFromSmarts(sma)
assert is_symmetric_to_substructure(mol, substructure) == expect_match

0 comments on commit 2058cdb

Please sign in to comment.