diff --git a/README.md b/README.md index 249f142..b38a354 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,6 @@ [![Conda](https://img.shields.io/conda/v/conda-forge/safe-mol?label=conda&color=success)](https://anaconda.org/conda-forge/safe-mol) [![PyPI - Downloads](https://img.shields.io/pypi/dm/safe-mol)](https://pypi.org/project/safe-mol/) [![Conda](https://img.shields.io/conda/dn/conda-forge/safe-mol)](https://anaconda.org/conda-forge/safe-mol) -[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/safe-mol)](https://pypi.org/project/safe-mol/) [![Code license](https://img.shields.io/badge/Code%20License-Apache_2.0-green.svg)](https://github.com/datamol-io/safe/blob/main/LICENSE) [![Data License](https://img.shields.io/badge/Data%20License-CC%20BY%204.0-red.svg)](https://github.com/datamol-io/safe/blob/main/DATA_LICENSE) [![GitHub Repo stars](https://img.shields.io/github/stars/datamol-io/safe)](https://github.com/datamol-io/safe/stargazers) @@ -60,6 +59,13 @@ The construction of a SAFE strings requires defining a molecular fragmentation a +## News + +#### 2024/01/15 +1. We have updated the model with the version used for the paper. The revision number is `` +2. @IanAWatson has a C++ implementation of SAFE in [LillyMol](https://github.com/IanAWatson/LillyMol/tree/bazel_version_float) that is quite fast and use a custom fragmentation algorithm. Follow the installation instruction on the repo and checkout the docs of the CLI here: [docs/Molecule_Tools/SAFE.md](https://github.com/IanAWatson/LillyMol/blob/bazel_version_float/docs/Molecule_Tools/SAFE.md) + + ### Installation You can install `safe` using pip: diff --git a/safe/__init__.py b/safe/__init__.py index b58b0cf..4bbdbc3 100644 --- a/safe/__init__.py +++ b/safe/__init__.py @@ -1,12 +1,6 @@ -from .converter import encode -from .converter import decode -from .converter import SAFEConverter -from .viz import to_image -from .tokenizer import SAFETokenizer -from .tokenizer import split +from . import trainer, utils +from ._exception import SAFEDecodeError, SAFEEncodeError, SAFEFragmentationError +from .converter import SAFEConverter, decode, encode from .sample import SAFEDesign -from ._exception import SAFEDecodeError -from ._exception import SAFEEncodeError -from ._exception import SAFEFragmentationError -from . import trainer -from . import utils +from .tokenizer import SAFETokenizer, split +from .viz import to_image diff --git a/safe/converter.py b/safe/converter.py index b2b2f31..7611168 100644 --- a/safe/converter.py +++ b/safe/converter.py @@ -1,21 +1,15 @@ -from typing import Union -from typing import Optional -from typing import List -from typing import Callable - -import re -import datamol as dm import itertools -import numpy as np - -from contextlib import suppress +import re from collections import Counter +from contextlib import suppress +from typing import Callable, List, Optional, Union +import datamol as dm +import numpy as np from rdkit import Chem from rdkit.Chem import BRICS -from ._exception import SAFEDecodeError -from ._exception import SAFEEncodeError -from ._exception import SAFEFragmentationError + +from ._exception import SAFEDecodeError, SAFEEncodeError, SAFEFragmentationError from .utils import standardize_attach @@ -110,8 +104,8 @@ def _find_branch_number(cls, inp: str): Args: inp: input smiles """ - - matching_groups = re.findall(r"((?<=%)\d{2})|((?