diff --git a/safe/converter.py b/safe/converter.py index 7611168..7bd0ee0 100644 --- a/safe/converter.py +++ b/safe/converter.py @@ -332,7 +332,13 @@ def encoder( val = str(starting_num) if starting_num < 10 else f"%{starting_num}" # we cannot have anything of the form "\([@=-#-$/\]*\d+\)" attach_regexp = re.compile(r"(" + re.escape(attach) + r")") - scaffold_str = attach_regexp.sub(val, scaffold_str) + # check if we have at least 2 matches, if not, we have a dummy + n_matches = len(attach_regexp.findall(scaffold_str)) + scaffold_str = ( + attach_regexp.sub(val, scaffold_str) + if n_matches > 1 + else scaffold_str.replace(attach, "*") + ) starting_num += 1 # now we need to remove all the parenthesis around digit only number wrong_attach = re.compile(r"\(([\%\d]*)\)") diff --git a/tests/test_safe.py b/tests/test_safe.py index fdfc288..b266249 100644 --- a/tests/test_safe.py +++ b/tests/test_safe.py @@ -85,6 +85,7 @@ def test_rdkit_smiles_parser_issues(): "c1cc2c(cc1[C@@H]1CCC[NH2+]1)OCCO2", "[13C]1CCCCC1C[238U]C[NH3+]", "COC[CH2:1][CH2:2]O[CH:2]C[OH:3]", + "C1*CCC1COO", ], ) def test_bracket_smiles_issues(input_sm):