diff --git a/README.md b/README.md index 9c80aa5..725b3df 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,25 @@ You can use conda/mamba: mamba install -c conda-forge safe-mol ``` +#### 2024/11/22 +NOTE: Installation might cause issues like no detection of GPUs (which can be checked by `torch.cuda.is_available()`) and sengmentation error due to mismatch between installed and driver cuda versions. In that case, follow these steps: + +Create a new environment using conda: + +```bash +conda create -n env_safe python=3.12 +conda activate env_safe +``` + +Check nvidia driver version on machine by running `nvcc --version` or `nvidia-smi` commands + +Install pytorch with compatible cuda versions (from `https://pytorch.org/get-started/locally/`) and safe-mol: + +```bash +conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia +conda install -c conda-forge safe-mol +``` + ### Datasets and Models | Type | Name | Infos | Size | Comment | diff --git a/safe/tokenizer.py b/safe/tokenizer.py index 5f58009..dce76c4 100644 --- a/safe/tokenizer.py +++ b/safe/tokenizer.py @@ -136,14 +136,34 @@ def bos_token_id(self): @property def pad_token_id(self): - """Get the bos token id""" + """Get the pad token id""" return self.tokenizer.token_to_id(self.tokenizer.pad_token) @property def eos_token_id(self): - """Get the bos token id""" + """Get the eos token id""" return self.tokenizer.token_to_id(self.tokenizer.eos_token) + @property + def unk_token_id(self): + """Get the unk token id""" + return self.tokenizer.token_to_id(self.tokenizer.unk_token) + + @property + def mask_token_id(self): + """Get the mask token id""" + return self.tokenizer.token_to_id(self.tokenizer.mask_token) + + @property + def cls_token_id(self): + """Get the cls token id""" + return self.tokenizer.token_to_id(self.tokenizer.cls_token) + + @property + def sep_token_id(self): + """Get the sep token id""" + return self.tokenizer.token_to_id(self.tokenizer.sep_token) + @classmethod def set_special_tokens( cls,