-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
15 lines (15 loc) · 865 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from transformers import AutoTokenizer
# tokenizer = AutoTokenizer.from_pretrained("stabilityai/stable-code-instruct-3b")
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct")
# tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoderbase-1b", token="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")
print(tokenizer)
print(tokenizer.bos_token_id,
tokenizer.encode("<|fim▁hole|>", add_special_tokens=False),
tokenizer.encode("<|fim▁begin|>", add_special_tokens=False),
tokenizer.encode("<|fim▁end|>", add_special_tokens=False),
tokenizer.encode("<pad>", add_special_tokens=False)
)
# print(tokenizer.encode("<fim_suffix>"),
# tokenizer.encode("<fim_prefix>"),
# tokenizer.encode("<fim_middle>"),
# tokenizer.encode("<fim_pad>"))