From 6a940f1cb6e9a7706902703ebf189863ead5ba85 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Thu, 2 Dec 2021 15:55:12 +0100 Subject: [PATCH 1/2] MAINT: Backport the PLAMS <= 1.5.1 `Molecule.get_formula` method --- CAT/attachment/ligand_anchoring.py | 4 ++-- CAT/data_handling/mol_import.py | 3 ++- CAT/multi_ligand.py | 4 ++-- CAT/utils.py | 12 +++++++++++- tests/test_gen_job_manager.py | 3 ++- tests/test_mol_import.py | 3 ++- tests/test_utils.py | 25 +++++++++++++++++++------ 7 files changed, 40 insertions(+), 14 deletions(-) diff --git a/CAT/attachment/ligand_anchoring.py b/CAT/attachment/ligand_anchoring.py index 405b3600..0ab878e8 100644 --- a/CAT/attachment/ligand_anchoring.py +++ b/CAT/attachment/ligand_anchoring.py @@ -34,7 +34,7 @@ from rdkit import Chem from ..logger import logger -from ..utils import get_template, AnchorTup, KindEnum +from ..utils import get_template, AnchorTup, KindEnum, get_formula from ..mol_utils import separate_mod # noqa: F401 from ..workflows import MOL, FORMULA, HDF5_INDEX, OPT from ..settings_dataframe import SettingsDataFrame @@ -114,7 +114,7 @@ def _get_df( # Create, fill and return the dataframe df = SettingsDataFrame(-1, index=idx, columns=columns, settings=settings) df[MOL] = mol_list - df[FORMULA] = [lig.get_formula() for lig in df[MOL]] + df[FORMULA] = [get_formula(lig) for lig in df[MOL]] df[OPT] = False return df[~df.index.duplicated(keep='first')] # Remove duplicate indices diff --git a/CAT/data_handling/mol_import.py b/CAT/data_handling/mol_import.py index d1055c34..78da4295 100644 --- a/CAT/data_handling/mol_import.py +++ b/CAT/data_handling/mol_import.py @@ -51,6 +51,7 @@ from rdkit import Chem, RDLogger +from ..utils import get_formula from ..logger import logger from ..data_handling.validate_mol import validate_mol @@ -353,7 +354,7 @@ def set_mol_prop(mol: Molecule, mol_dict: Settings) -> None: """Set molecular and atomic properties.""" if mol_dict.is_core: residue_name = 'COR' - mol.properties.name = mol.get_formula() + mol.properties.name = get_formula(mol) else: residue_name = 'LIG' mol.properties.name = mol_dict.name diff --git a/CAT/multi_ligand.py b/CAT/multi_ligand.py index da6a1283..7790bda3 100755 --- a/CAT/multi_ligand.py +++ b/CAT/multi_ligand.py @@ -8,7 +8,7 @@ from scm.plams import Molecule, MoleculeError -from .utils import AnchorTup +from .utils import AnchorTup, get_formula from .workflows import WorkFlow from .mol_utils import to_symbol from .data_handling import mol_to_file @@ -95,7 +95,7 @@ def _multi_lig_anchor(qd_series, ligands, path, anchor, allignment) -> np.ndarra assert atoms except AssertionError as ex: raise MoleculeError(f'Failed to identify {to_symbol(atnum)!r} in ' - f'{qd.get_formula()!r}') from ex + f'{get_formula(q)!r}') from ex coords = Molecule.as_array(None, atom_subset=atoms) qd.properties.dummies = np.array(coords, ndmin=2, dtype=float) diff --git a/CAT/utils.py b/CAT/utils.py index cb749c09..ddce6c85 100644 --- a/CAT/utils.py +++ b/CAT/utils.py @@ -39,7 +39,7 @@ from os.path import join, isdir, isfile, exists from itertools import cycle, chain, repeat from contextlib import redirect_stdout -from collections import abc +from collections import abc, Counter from typing import ( Iterable, Union, TypeVar, Mapping, Type, Generator, Iterator, Optional, Any, NoReturn, Dict, overload, Callable, NamedTuple, Tuple, @@ -567,3 +567,13 @@ class AllignmentTup(NamedTuple): kind: AllignmentEnum invert: bool + + +def get_formula(mol: Molecule) -> str: + """Backport of the PLAMS <= 1.5.1 ``Molecule.get_formula`` method. + + The resulting atoms are reported in alphabetical order, + contrary to the Hill system (that prioritizes ``CH`` pairs) utilized after 1.5.1. + """ + dct = Counter(at.symbol for at in mol) + return "".join(f"{at}{i}" for at, i in sorted(dct.items())) diff --git a/tests/test_gen_job_manager.py b/tests/test_gen_job_manager.py index f41aea96..61893532 100644 --- a/tests/test_gen_job_manager.py +++ b/tests/test_gen_job_manager.py @@ -7,6 +7,7 @@ from assertionlib import assertion from CAT.gen_job_manager import GenJobManager +from CAT.utils import get_formula SETTINGS = Settings({'counter_len': 3, 'hashing': 'input', 'remove_empty_directories': True}) PATH = join('tests', 'test_files') @@ -50,7 +51,7 @@ def test_load_job() -> None: assertion.isinstance(job.settings, Settings) assertion.eq(job.depend, []) assertion.eq(job._dont_pickle, []) - assertion.eq(job.molecule.get_formula(), 'C78Cd68H182O26Se55') + assertion.eq(get_formula(job.molecule), 'C78Cd68H182O26Se55') def _test_check_hash() -> None: diff --git a/tests/test_mol_import.py b/tests/test_mol_import.py index 032981a0..78d7a5e5 100644 --- a/tests/test_mol_import.py +++ b/tests/test_mol_import.py @@ -9,6 +9,7 @@ import scm.plams.interfaces.molecule.rdkit as molkit from assertionlib import assertion +from CAT.utils import get_formula from CAT.data_handling.mol_import import ( read_mol_xyz, read_mol_pdb, read_mol_mol, read_mol_smiles, read_mol_plams, read_mol_rdkit, read_mol_folder, read_mol_txt, get_charge_dict, set_mol_prop, canonicalize_mol @@ -92,7 +93,7 @@ def test_read_mol_folder() -> None: """Test :func:`CAT.data_handling.validate_input.read_mol_folder`.""" mol_dict = Settings({'mol': PATH, 'path': PATH, 'guess_bonds': True, 'is_core': False}) _mol_list = read_mol_folder(mol_dict) - mol_list = [mol for mol in _mol_list if mol.get_formula() == 'C1H4O1'] + mol_list = [mol for mol in _mol_list if get_formula(mol) == 'C1H4O1'] for mol in mol_list: assertion.isinstance(mol, Molecule) diff --git a/tests/test_utils.py b/tests/test_utils.py index 4dd3434d..ea835bc4 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,11 +1,12 @@ """Tests for :mod:`CAT.utils`.""" import os -from os.path import join +import re +from pathlib import Path from unittest import mock -from scm.plams import config +from scm.plams import config, Molecule from scm.plams.interfaces.adfsuite.ams import AMSJob from scm.plams.interfaces.adfsuite.adf import ADFJob from scm.plams.interfaces.thirdparty.orca import ORCAJob @@ -15,10 +16,16 @@ from assertionlib import assertion from CAT.utils import ( - type_to_string, dict_concatenate, get_template, validate_path, check_sys_var, restart_init + type_to_string, + dict_concatenate, + get_template, + validate_path, + check_sys_var, + restart_init, + get_formula, ) -PATH = join('tests', 'test_files') +PATH = Path('tests') / 'test_files' FOLDER = 'test_plams_workdir' @@ -60,8 +67,8 @@ def test_validate_path() -> None: assertion.eq(validate_path(''), os.getcwd()) assertion.eq(validate_path('.'), os.getcwd()) assertion.eq(validate_path(PATH), PATH) - assertion.assert_(validate_path, join(PATH, 'bob'), exception=FileNotFoundError) - assertion.assert_(validate_path, join(PATH, 'Methanol.xyz'), exception=NotADirectoryError) + assertion.assert_(validate_path, PATH / 'bob', exception=FileNotFoundError) + assertion.assert_(validate_path, PATH / 'Methanol.xyz', exception=NotADirectoryError) @mock.patch.dict( @@ -80,3 +87,9 @@ def test_restart_init() -> None: _hash = '0da9b13507022986d26bbc57b4c366cf1ead1fe70ff750e071e79e393b14dfb5' assertion.contains(manager.hashes, _hash) + + +def test_get_formula() -> None: + formula = get_formula(Molecule(PATH / "multi_ligand.pdb")) + matches = re.findall(f"([a-zA-Z]+)[0-9+]", formula) + assertion.eq(matches, ["C", "Cd", "F", "H", "O", "Se"]) From 72d90c17b72bf883043268892456771a61d57712 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Thu, 2 Dec 2021 16:16:54 +0100 Subject: [PATCH 2/2] BLD: Avoid schema 0.7.5 Xref keleshev/schema#272 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f25017c7..59804b66 100644 --- a/setup.py +++ b/setup.py @@ -119,7 +119,7 @@ 'scipy', 'pandas', 'pyyaml>=5.1', - 'schema', + 'schema!=0.7.5', 'AssertionLib>=2.2.3', 'plams>=1.5.1', 'contextlib2>=0.6.0; python_version=="3.6"',