1 file changed
+1
-1
lines changedSubmodule tokenizers updated 28 files
- .gitmodules+3
- CMakeLists.txt+46-2
- include/pytorch/tokenizers/bpe_tokenizer_base.h+12-9
- include/pytorch/tokenizers/hf_tokenizer.h+2-5
- include/pytorch/tokenizers/pcre2_regex.h+52
- include/pytorch/tokenizers/pre_tokenizer.h+11-8
- include/pytorch/tokenizers/re2_regex.h+44
- include/pytorch/tokenizers/regex.h+48
- include/pytorch/tokenizers/result.h+17
- include/pytorch/tokenizers/std_regex.h+40
- include/pytorch/tokenizers/tiktoken.h+5-15
- include/pytorch/tokenizers/token_decoder.h+2-2
- include/pytorch/tokenizers/tokenizer.h+1-1
- src/bpe_tokenizer_base.cpp+20-27
- src/hf_tokenizer.cpp+4-6
- src/pcre2_regex.cpp+109
- src/pre_tokenizer.cpp+17-15
- src/re2_regex.cpp+36
- src/regex.cpp+73
- src/std_regex.cpp+30
- src/tiktoken.cpp+15-100
- src/token_decoder.cpp+3-4
- targets.bzl+33-13
- test/test_base64.cpp+1-1
- test/test_pre_tokenizer.cpp+1-2
- test/test_regex.cpp+107
- third-party/pcre2+1
- third-party/targets.bzl+74
0 commit comments