Skip to content

Commit

Permalink
Merge pull request #8 from valence-labs/evaluate
Browse files Browse the repository at this point in the history
Sampling with SAFE
  • Loading branch information
maclandrol authored Aug 29, 2023
2 parents 2ad3241 + 86fab69 commit 8d21a22
Show file tree
Hide file tree
Showing 25 changed files with 13,437 additions and 270 deletions.
12 changes: 10 additions & 2 deletions docs/api/safe.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
members:
- encode
- decode
- SafeConverter
- SAFEConverter
show_root_heading: false


Expand All @@ -18,4 +18,12 @@
members:
- SAFESplitter
- SAFETokenizer
show_root_heading: false
show_root_heading: false



---

## Utils

::: safe.trainer.safe_utils
1 change: 0 additions & 1 deletion docs/api/safe.models.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,4 @@ The input config file for training a `SAFE` model is very similar to the GPT2 co
## Data Utils
::: safe.trainer.data_utils

::: safe.trainer.safe_utils

11,733 changes: 11,732 additions & 1 deletion docs/tutorials/design-with-safe.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion expts/notebook/1.5-play-with-tokenizer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
"from functools import partial\n",
"ALLOWED_DESCRIPTORS = [\"mw\", \"fsp3\", \"n_lipinski_hba\", \"n_lipinski_hbd\", \"n_rings\", \"n_heavy_atoms\", \"n_hetero_atoms\", \"n_rotatable_bonds\", \"tpsa\"]\n",
"def apply_converter(row):\n",
" row[\"inputs\"] = sf.trainer.safe_utils.convert_to_safe(row[\"smiles\"], canonical=False, randomize=True, fraction_hs=0.4)\n",
" row[\"inputs\"] = sf.utils.convert_to_safe(row[\"smiles\"], canonical=False, randomize=True, fraction_hs=0.4)\n",
" descriptors_dict = dm.descriptors.compute_many_descriptors(dm.to_mol(row[\"smiles\"]))\n",
" row[\"descriptors\"] = [descriptors_dict[x] for x in ALLOWED_DESCRIPTORS]\n",
" return row"
Expand Down
2 changes: 1 addition & 1 deletion expts/notebook/1.6-training-toy.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@
"from functools import partial\n",
"ALLOWED_DESCRIPTORS = [\"mw\", \"fsp3\", \"n_lipinski_hba\", \"n_lipinski_hbd\", \"n_rings\", \"n_heavy_atoms\", \"n_hetero_atoms\", \"n_rotatable_bonds\", \"tpsa\"]\n",
"def apply_converter(row):\n",
" row[\"inputs\"] = sf.trainer.safe_utils.convert_to_safe(row[\"smiles\"], canonical=False, randomize=True, fraction_hs=0.4)\n",
" row[\"inputs\"] = sf.utils.convert_to_safe(row[\"smiles\"], canonical=False, randomize=True, fraction_hs=0.4)\n",
" descriptors_dict = dm.descriptors.compute_many_descriptors(dm.to_mol(row[\"smiles\"]))\n",
" row[\"descriptors\"] = [descriptors_dict[x] for x in ALLOWED_DESCRIPTORS]\n",
" return row"
Expand Down
1 change: 1 addition & 0 deletions expts/notebook/tmp_data/proc_data/dataset_dict.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"splits": ["train", "test", "validation"]}
Binary file not shown.
19 changes: 19 additions & 0 deletions expts/notebook/tmp_data/proc_data/test/dataset_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"citation": "",
"description": "",
"features": {
"inputs": {
"dtype": "string",
"_type": "Value"
},
"descriptors": {
"feature": {
"dtype": "float64",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "",
"license": ""
}
13 changes: 13 additions & 0 deletions expts/notebook/tmp_data/proc_data/test/state.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "739e961c83556f0c",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}
Binary file not shown.
19 changes: 19 additions & 0 deletions expts/notebook/tmp_data/proc_data/train/dataset_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"citation": "",
"description": "",
"features": {
"inputs": {
"dtype": "string",
"_type": "Value"
},
"descriptors": {
"feature": {
"dtype": "float64",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "",
"license": ""
}
13 changes: 13 additions & 0 deletions expts/notebook/tmp_data/proc_data/train/state.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "8232d33a79926636",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}
Binary file not shown.
19 changes: 19 additions & 0 deletions expts/notebook/tmp_data/proc_data/validation/dataset_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"citation": "",
"description": "",
"features": {
"inputs": {
"dtype": "string",
"_type": "Value"
},
"descriptors": {
"feature": {
"dtype": "float64",
"_type": "Value"
},
"_type": "Sequence"
}
},
"homepage": "",
"license": ""
}
13 changes: 13 additions & 0 deletions expts/notebook/tmp_data/proc_data/validation/state.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"_data_files": [
{
"filename": "data-00000-of-00001.arrow"
}
],
"_fingerprint": "739e961c83556f0c",
"_format_columns": null,
"_format_kwargs": {},
"_format_type": null,
"_output_all_columns": false,
"_split": null
}
10 changes: 7 additions & 3 deletions safe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@

from .converter import encode
from .converter import decode
from .converter import SAFEConverter
from .viz import to_image
from ._exception import SafeDecodeError
from ._exception import SafeEncodeError
from ._exception import SafeFragmentationError
from .tokenizer import SAFETokenizer
from .sample import SAFEDesign
from ._exception import SAFEDecodeError
from ._exception import SAFEEncodeError
from ._exception import SAFEFragmentationError
from . import trainer
from . import utils
6 changes: 3 additions & 3 deletions safe/_exception.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
class SafeDecodeError(Exception):
class SAFEDecodeError(Exception):
"""Raised when a string cannot be decoded with the given encoding."""

pass


class SafeEncodeError(Exception):
class SAFEEncodeError(Exception):
"""Raised when a molecule cannot be encoded using SAFE."""

pass


class SafeFragmentationError(Exception):
class SAFEFragmentationError(Exception):
"""Raised when a the slicing algorithm return empty bonds."""

pass
Loading

0 comments on commit 8d21a22

Please sign in to comment.