Skip to content

Commit

Permalink
add sub-functions related to protonation (de/protonate_at_site, check…
Browse files Browse the repository at this point in the history
… is_implicit) and unit test for uncharge_mol
  • Loading branch information
jonwzheng committed Jun 4, 2024
1 parent 8e9f0fd commit 578f05b
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 26 deletions.
99 changes: 73 additions & 26 deletions rdtools/mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,17 @@ def fast_sanitize(mol: Chem.RWMol):
)


def is_implicit(mol : Chem.RWMol):
"""
Infer whether a molecule has implicit hydrogens.
"""

for atom in mol.GetAtoms():
if atom.GetNumImplicitHs() > 0:
return True

return False


def uncharge_mol(mol : Chem.RWMol,
method = "all"):
Expand Down Expand Up @@ -259,45 +270,86 @@ def uncharge_mol(mol : Chem.RWMol,
# Algorithm adapted from Noel O’Boyle (Vincent Scalfani adapted code for RDKit)
# See https://www.rdkit.org/docs/Cookbook.html#neutralizing-molecules)

# Check if H's are explicit or implicit
implicit_h = False
pattern = Chem.MolFromSmarts("[+1!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")

for atom in mol.GetAtoms():
if (not atom.GetNoImplicit() and atom.GetAtomicNum() != 1) or atom.GetNumImplicitHs() > 0:
implicit_h = True
print(atom.GetNumImplicitHs())
pattern = Chem.MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")
break
implicit_h = is_implicit(mol)
if implicit_h:
pattern = Chem.MolFromSmarts("[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")
else:
pattern = Chem.MolFromSmarts("[+1!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")

at_matches = mol.GetSubstructMatches(pattern)
at_matches_list = [y[0] for y in at_matches]
if len(at_matches_list) > 0:
for at_idx in at_matches_list:
atom = mol.GetAtomWithIdx(at_idx)
chg = atom.GetFormalCharge()
hcount = atom.GetTotalNumHs()
atom.SetFormalCharge(0)
if implicit_h:
atom.SetNumExplicitHs(hcount - chg)
else: # find a neighboring H atom and remove it
for neighbor in atom.GetNeighbors():
if neighbor.GetAtomicNum() == 1:
mol.RemoveAtom(neighbor.GetIdx())
break
atom.UpdatePropertyCache()

if chg > 0:
mol = deprotonate_at_site(mol, at_idx)
elif chg < 0:
mol = protonate_at_site(mol, at_idx)

if get_formal_charge(mol) == 0:
return mol

# TODO: identify if we want the final form to be re-atom mapped, especially if new atoms are added/removed.
# Also whether these should have implicit H, e.g. "CC(=O)[O-]" --> "[C:1]([C:2](=[O:3])[OH:4])([H:5])([H:6])[H:7]"


warnings.warn(f"Unable to uncharge: got {mol_to_smiles(mol)}")
return mol


def protonate_at_site(mol, site):
'''
Add a proton of a mol object at the provided index.
Args:
mol: Mol object
site: RDKit atom index of the site to be de/protonated.
'''

length = mol.GetNumAtoms()
atom = mol.GetAtomWithIdx(site)
atom.SetFormalCharge(atom.GetFormalCharge() + 1)

if is_implicit(mol):
hcount = atom.GetTotalNumHs(includeNeighbors=True)
newcharge = hcount + 1
atom.SetNumExplicitHs(newcharge)
else:
h_atom = Chem.MolFromSmiles('[H]')
mol = combine_mols(mol, h_atom)
mol = Chem.RWMol(mol) # as it appears to get un-RWmol from combining
mol.AddBond(site, length, order=Chem.rdchem.BondType.SINGLE)

return mol


def deprotonate_at_site(mol, site):
'''
Remove a proton of a mol object at the provided index.
Args:
mol: Mol object
site: RDKit atom index of the site to be de/protonated.
'''

atom = mol.GetAtomWithIdx(site)
atom.SetFormalCharge(atom.GetFormalCharge() - 1)

if is_implicit(mol):
hcount = atom.GetTotalNumHs(includeNeighbors=True)
newcharge = hcount - 1
atom.SetNumExplicitHs(newcharge)
else:
for neighbor in atom.GetNeighbors():
if neighbor.GetAtomicNum() == 1:
mol.RemoveAtom(neighbor.GetIdx())
break

return mol


def get_closed_shell_mol(
mol: Chem.RWMol,
sanitize: bool = True,
Expand Down Expand Up @@ -424,8 +476,3 @@ def set_mol_positions(
raise ValueError(f"Conformer {conf_id} does not exist")
else:
set_conformer_coordinates(conf, coords)


mol = mol_from_smiles("CC(=O)[O-]")
z = uncharge_mol(mol, method = "nocharge")
print(Chem.MolToSmiles(z))
22 changes: 22 additions & 0 deletions test/rdtools/test_mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
get_atomic_nums,
get_atom_masses,
get_closed_shell_mol,
is_implicit,
uncharge_mol
)
from rdtools.conf import (
embed_multiple_null_confs,
Expand Down Expand Up @@ -175,3 +177,23 @@ def test_get_closed_shell_mol(rad_smi, expect_smi, cheap, atommap):
rad_mol = mol_from_smiles(rad_smi, assign_atom_map=atommap)
cs_mol = get_closed_shell_mol(rad_mol, cheap=cheap)
assert mol_to_smiles(cs_mol) == expect_smi


@pytest.mark.parametrize(
"ion_smi, expected_smi",
[
("CCCCC(=O)[O-]", "CCCCC(=O)O"),
("c1ccccc1[O-]", "Oc1ccccc1"),
("CCC[NH3+]", "CCCN"),
("[NH3+]CC(=O)[O-]", "NCC(=O)O"),
("S(=O)(=O)([O-])[O-]", "O=S(=O)(O)O"),
("C", "C"),
],
)
@pytest.mark.parametrize("method", ["all", "rdkit", "nocharge"])
def test_uncharge_mol(ion_smi, expected_smi, method):

ion_mol = mol_from_smiles(ion_smi)
neut_mol = uncharge_mol(ion_mol, method=method)
assert mol_to_smiles(neut_mol) == expected_smi

0 comments on commit 578f05b

Please sign in to comment.