update openai tokenizer to use "cl100k_base" encoding since all model…

…s use it now
defog-ai · May 15, 2024 · 14e01c7 · 14e01c7
1 parent 421a4ee
commit 14e01c7
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/query_generators/openai.py b/query_generators/openai.py
@@ -101,7 +101,7 @@ def count_tokens(
         messages: (only for OpenAI chat models) a list of messages to be used as a prompt. Each message is a dict with two keys: role and content
         prompt: (only for text-davinci-003 model) a string to be used as a prompt
         """
-        tokenizer = tiktoken.encoding_for_model(model)
+        tokenizer = tiktoken.get_encoding("cl100k_base")
         num_tokens = 0
         if model != "text-davinci-003":
             for message in messages: