Skip to content

Commit

Permalink
Merge pull request #212 from nlesc-nano/get_formula
Browse files Browse the repository at this point in the history
MAINT: Backport the PLAMS <= 1.5.1 `Molecule.get_formula` method
  • Loading branch information
BvB93 authored Dec 2, 2021
2 parents 5f490b3 + 72d90c1 commit b78488d
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 14 deletions.
4 changes: 2 additions & 2 deletions CAT/attachment/ligand_anchoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from rdkit import Chem

from ..logger import logger
from ..utils import get_template, AnchorTup, KindEnum
from ..utils import get_template, AnchorTup, KindEnum, get_formula
from ..mol_utils import separate_mod # noqa: F401
from ..workflows import MOL, FORMULA, HDF5_INDEX, OPT
from ..settings_dataframe import SettingsDataFrame
Expand Down Expand Up @@ -114,7 +114,7 @@ def _get_df(
# Create, fill and return the dataframe
df = SettingsDataFrame(-1, index=idx, columns=columns, settings=settings)
df[MOL] = mol_list
df[FORMULA] = [lig.get_formula() for lig in df[MOL]]
df[FORMULA] = [get_formula(lig) for lig in df[MOL]]
df[OPT] = False
return df[~df.index.duplicated(keep='first')] # Remove duplicate indices

Expand Down
3 changes: 2 additions & 1 deletion CAT/data_handling/mol_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

from rdkit import Chem, RDLogger

from ..utils import get_formula
from ..logger import logger
from ..data_handling.validate_mol import validate_mol

Expand Down Expand Up @@ -353,7 +354,7 @@ def set_mol_prop(mol: Molecule, mol_dict: Settings) -> None:
"""Set molecular and atomic properties."""
if mol_dict.is_core:
residue_name = 'COR'
mol.properties.name = mol.get_formula()
mol.properties.name = get_formula(mol)
else:
residue_name = 'LIG'
mol.properties.name = mol_dict.name
Expand Down
4 changes: 2 additions & 2 deletions CAT/multi_ligand.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from scm.plams import Molecule, MoleculeError

from .utils import AnchorTup
from .utils import AnchorTup, get_formula
from .workflows import WorkFlow
from .mol_utils import to_symbol
from .data_handling import mol_to_file
Expand Down Expand Up @@ -95,7 +95,7 @@ def _multi_lig_anchor(qd_series, ligands, path, anchor, allignment) -> np.ndarra
assert atoms
except AssertionError as ex:
raise MoleculeError(f'Failed to identify {to_symbol(atnum)!r} in '
f'{qd.get_formula()!r}') from ex
f'{get_formula(q)!r}') from ex

coords = Molecule.as_array(None, atom_subset=atoms)
qd.properties.dummies = np.array(coords, ndmin=2, dtype=float)
Expand Down
12 changes: 11 additions & 1 deletion CAT/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from os.path import join, isdir, isfile, exists
from itertools import cycle, chain, repeat
from contextlib import redirect_stdout
from collections import abc
from collections import abc, Counter
from typing import (
Iterable, Union, TypeVar, Mapping, Type, Generator, Iterator, Optional,
Any, NoReturn, Dict, overload, Callable, NamedTuple, Tuple,
Expand Down Expand Up @@ -567,3 +567,13 @@ class AllignmentTup(NamedTuple):

kind: AllignmentEnum
invert: bool


def get_formula(mol: Molecule) -> str:
"""Backport of the PLAMS <= 1.5.1 ``Molecule.get_formula`` method.
The resulting atoms are reported in alphabetical order,
contrary to the Hill system (that prioritizes ``CH`` pairs) utilized after 1.5.1.
"""
dct = Counter(at.symbol for at in mol)
return "".join(f"{at}{i}" for at, i in sorted(dct.items()))
3 changes: 2 additions & 1 deletion tests/test_gen_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from assertionlib import assertion

from CAT.gen_job_manager import GenJobManager
from CAT.utils import get_formula

SETTINGS = Settings({'counter_len': 3, 'hashing': 'input', 'remove_empty_directories': True})
PATH = join('tests', 'test_files')
Expand Down Expand Up @@ -50,7 +51,7 @@ def test_load_job() -> None:
assertion.isinstance(job.settings, Settings)
assertion.eq(job.depend, [])
assertion.eq(job._dont_pickle, [])
assertion.eq(job.molecule.get_formula(), 'C78Cd68H182O26Se55')
assertion.eq(get_formula(job.molecule), 'C78Cd68H182O26Se55')


def _test_check_hash() -> None:
Expand Down
3 changes: 2 additions & 1 deletion tests/test_mol_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import scm.plams.interfaces.molecule.rdkit as molkit
from assertionlib import assertion

from CAT.utils import get_formula
from CAT.data_handling.mol_import import (
read_mol_xyz, read_mol_pdb, read_mol_mol, read_mol_smiles, read_mol_plams, read_mol_rdkit,
read_mol_folder, read_mol_txt, get_charge_dict, set_mol_prop, canonicalize_mol
Expand Down Expand Up @@ -92,7 +93,7 @@ def test_read_mol_folder() -> None:
"""Test :func:`CAT.data_handling.validate_input.read_mol_folder`."""
mol_dict = Settings({'mol': PATH, 'path': PATH, 'guess_bonds': True, 'is_core': False})
_mol_list = read_mol_folder(mol_dict)
mol_list = [mol for mol in _mol_list if mol.get_formula() == 'C1H4O1']
mol_list = [mol for mol in _mol_list if get_formula(mol) == 'C1H4O1']

for mol in mol_list:
assertion.isinstance(mol, Molecule)
Expand Down
25 changes: 19 additions & 6 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Tests for :mod:`CAT.utils`."""

import os
from os.path import join
import re
from pathlib import Path

from unittest import mock

from scm.plams import config
from scm.plams import config, Molecule
from scm.plams.interfaces.adfsuite.ams import AMSJob
from scm.plams.interfaces.adfsuite.adf import ADFJob
from scm.plams.interfaces.thirdparty.orca import ORCAJob
Expand All @@ -15,10 +16,16 @@
from assertionlib import assertion

from CAT.utils import (
type_to_string, dict_concatenate, get_template, validate_path, check_sys_var, restart_init
type_to_string,
dict_concatenate,
get_template,
validate_path,
check_sys_var,
restart_init,
get_formula,
)

PATH = join('tests', 'test_files')
PATH = Path('tests') / 'test_files'
FOLDER = 'test_plams_workdir'


Expand Down Expand Up @@ -60,8 +67,8 @@ def test_validate_path() -> None:
assertion.eq(validate_path(''), os.getcwd())
assertion.eq(validate_path('.'), os.getcwd())
assertion.eq(validate_path(PATH), PATH)
assertion.assert_(validate_path, join(PATH, 'bob'), exception=FileNotFoundError)
assertion.assert_(validate_path, join(PATH, 'Methanol.xyz'), exception=NotADirectoryError)
assertion.assert_(validate_path, PATH / 'bob', exception=FileNotFoundError)
assertion.assert_(validate_path, PATH / 'Methanol.xyz', exception=NotADirectoryError)


@mock.patch.dict(
Expand All @@ -80,3 +87,9 @@ def test_restart_init() -> None:

_hash = '0da9b13507022986d26bbc57b4c366cf1ead1fe70ff750e071e79e393b14dfb5'
assertion.contains(manager.hashes, _hash)


def test_get_formula() -> None:
formula = get_formula(Molecule(PATH / "multi_ligand.pdb"))
matches = re.findall(f"([a-zA-Z]+)[0-9+]", formula)
assertion.eq(matches, ["C", "Cd", "F", "H", "O", "Se"])

0 comments on commit b78488d

Please sign in to comment.