Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Anthropic token counting #85

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions tests/test_costs.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ def test_count_message_tokens(model, expected_output):
("gpt-4o", 17),
("azure/gpt-4o", 17),
("claude-2.1", 4),

],
)
def test_count_message_tokens_with_name(model, expected_output):
Expand Down Expand Up @@ -116,7 +115,7 @@ def test_count_message_tokens_invalid_model():
("gpt-4-vision-preview", 4),
("text-embedding-ada-002", 4),
("gpt-4o", 4),
("claude-2.1", 4)
("claude-2.1", 4),
],
)
def test_count_string_tokens(model, expected_output):
Expand Down
3 changes: 1 addition & 2 deletions tests/test_llama_index_callbacks.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# test_llama_index.py
import pytest
from tokencost.callbacks import llama_index
from llama_index.core.callbacks.schema import CBEventType, EventPayload
from unittest.mock import MagicMock
from llama_index.core.callbacks.schema import EventPayload

# Mock the calculate_prompt_cost and calculate_completion_cost functions

Expand Down
2 changes: 1 addition & 1 deletion tokencost/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
calculate_completion_cost,
calculate_prompt_cost,
calculate_all_costs_and_tokens,
calculate_cost_by_tokens
calculate_cost_by_tokens,
)
from .constants import TOKEN_COSTS_STATIC, TOKEN_COSTS, update_token_costs
9 changes: 6 additions & 3 deletions tokencost/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ async def fetch_costs():
if response.status == 200:
return await response.json(content_type=None)
else:
raise Exception(f"Failed to fetch token costs, status code: {response.status}")
raise Exception(
f"Failed to fetch token costs, status code: {response.status}"
)


async def update_token_costs():
Expand All @@ -49,11 +51,12 @@ async def update_token_costs():
fetched_costs = await fetch_costs()
# Safely remove 'sample_spec' if it exists
TOKEN_COSTS.update(fetched_costs)
TOKEN_COSTS.pop('sample_spec', None)
TOKEN_COSTS.pop("sample_spec", None)
except Exception as e:
logger.error(f"Failed to update TOKEN_COSTS: {e}")
raise


with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f:
TOKEN_COSTS_STATIC = json.load(f)

Expand All @@ -63,4 +66,4 @@ async def update_token_costs():
TOKEN_COSTS = TOKEN_COSTS_STATIC
asyncio.run(update_token_costs())
except Exception:
logger.error('Failed to update token costs. Using static costs.')
logger.error("Failed to update token costs. Using static costs.")
52 changes: 34 additions & 18 deletions tokencost/costs.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes make sense, but we're unfortunately failing a bunch of tests:

============================= test session starts ==============================
platform darwin -- Python 3.11.5, pytest-8.3.3, pluggy-1.5.0
rootdir: /Users/reibs/Projects/tokencost
configfile: pyproject.toml
plugins: baserun-0.9.16, anyio-4.6.2.post1, requests-mock-1.12.1
collected 98 items

tests/test_costs.py ................F................F.................. [ 53%]
..................F.........................                             [ 97%]
tests/test_llama_index_callbacks.py ..                                   [100%]

=================================== FAILURES ===================================
___________________ test_count_message_tokens[claude-2.1-4] ____________________

model = 'claude-2.1', expected_output = 4

    @pytest.mark.parametrize(
        "model,expected_output",
        [
            ("gpt-3.5-turbo", 15),
            ("gpt-3.5-turbo-0301", 17),
            ("gpt-3.5-turbo-0613", 15),
            ("gpt-3.5-turbo-16k", 15),
            ("gpt-3.5-turbo-16k-0613", 15),
            ("gpt-3.5-turbo-1106", 15),
            ("gpt-3.5-turbo-instruct", 15),
            ("gpt-4", 15),
            ("gpt-4-0314", 15),
            ("gpt-4-0613", 15),
            ("gpt-4-32k", 15),
            ("gpt-4-32k-0314", 15),
            ("gpt-4-1106-preview", 15),
            ("gpt-4-vision-preview", 15),
            ("gpt-4o", 15),
            ("azure/gpt-4o", 15),
            ("claude-2.1", 4),
        ],
    )
    def test_count_message_tokens(model, expected_output):
        print(model)
>       assert count_message_tokens(MESSAGES, model) == expected_output

tests/test_costs.py:54:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tokencost/costs.py:68: in count_message_tokens
    raise e
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

messages = [{'content': 'Hello', 'role': 'user'}, {'content': 'Hi there!', 'role': 'assistant'}]
model = 'claude-2.1'

    def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
        """
        Return the total number of tokens in a prompt's messages.
        Args:
            messages (List[Dict[str, str]]): Message format for prompt requests. e.g.:
                [{ "role": "user", "content": "Hello world"},
                 { "role": "assistant", "content": "How may I assist you today?"}]
            model (str): Name of LLM to choose encoding for.
        Returns:
            Total number of tokens in message.
        """
        model = model.lower()
        model = strip_ft_model_name(model)

        # Anthropic token counting requires a valid API key
        if "claude-" in model:
            logger.warning(
                "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
            )
            if "claude-3-sonnet" in model:
                logger.warning(
                    f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string."
                )
                # For anthropic<0.39.0 this method is no more supported
                prompt = "".join(message["content"] for message in messages)
                return count_string_tokens(prompt, model)

            ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")

            try:
                client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
>               num_tokens = client.beta.messages.count_tokens(
                    model=model,
                    messages=messages,
                ).input_tokens
E               AttributeError: 'Beta' object has no attribute 'messages'

tokencost/costs.py:60: AttributeError
----------------------------- Captured stdout call -----------------------------
claude-2.1
------------------------------ Captured log call -------------------------------
WARNING  tokencost.costs:costs.py:45 Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!
______________ test_count_message_tokens_with_name[claude-2.1-4] _______________

model = 'claude-2.1', expected_output = 4

    @pytest.mark.parametrize(
        "model,expected_output",
        [
            ("gpt-3.5-turbo", 17),
            ("gpt-3.5-turbo-0301", 17),
            ("gpt-3.5-turbo-0613", 17),
            ("gpt-3.5-turbo-1106", 17),
            ("gpt-3.5-turbo-instruct", 17),
            ("gpt-3.5-turbo-16k", 17),
            ("gpt-3.5-turbo-16k-0613", 17),
            ("gpt-4", 17),
            ("gpt-4-0314", 17),
            ("gpt-4-0613", 17),
            ("gpt-4-32k", 17),
            ("gpt-4-32k-0314", 17),
            ("gpt-4-1106-preview", 17),
            ("gpt-4-vision-preview", 17),
            ("gpt-4o", 17),
            ("azure/gpt-4o", 17),
            ("claude-2.1", 4),
        ],
    )
    def test_count_message_tokens_with_name(model, expected_output):
        """Notice: name 'John' appears"""

>       assert count_message_tokens(MESSAGES_WITH_NAME, model) == expected_output

tests/test_costs.py:83:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tokencost/costs.py:68: in count_message_tokens
    raise e
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

messages = [{'content': 'Hello', 'name': 'John', 'role': 'user'}, {'content': 'Hi there!', 'role': 'assistant'}]
model = 'claude-2.1'

    def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
        """
        Return the total number of tokens in a prompt's messages.
        Args:
            messages (List[Dict[str, str]]): Message format for prompt requests. e.g.:
                [{ "role": "user", "content": "Hello world"},
                 { "role": "assistant", "content": "How may I assist you today?"}]
            model (str): Name of LLM to choose encoding for.
        Returns:
            Total number of tokens in message.
        """
        model = model.lower()
        model = strip_ft_model_name(model)

        # Anthropic token counting requires a valid API key
        if "claude-" in model:
            logger.warning(
                "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
            )
            if "claude-3-sonnet" in model:
                logger.warning(
                    f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string."
                )
                # For anthropic<0.39.0 this method is no more supported
                prompt = "".join(message["content"] for message in messages)
                return count_string_tokens(prompt, model)

            ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")

            try:
                client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
>               num_tokens = client.beta.messages.count_tokens(
                    model=model,
                    messages=messages,
                ).input_tokens
E               AttributeError: 'Beta' object has no attribute 'messages'

tokencost/costs.py:60: AttributeError
------------------------------ Captured log call -------------------------------
WARNING  tokencost.costs:costs.py:45 Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!
______ test_calculate_prompt_cost[prompt16-claude-2.1-expected_output16] _______

prompt = [{'content': 'Hello', 'role': 'user'}, {'content': 'Hi there!', 'role': 'assistant'}]
model = 'claude-2.1', expected_output = Decimal('0.000032')

    @pytest.mark.parametrize(
        "prompt,model,expected_output",
        [
            (MESSAGES, "gpt-3.5-turbo", Decimal("0.0000225")),
            (MESSAGES, "gpt-3.5-turbo-0301", Decimal("0.0000255")),
            (MESSAGES, "gpt-3.5-turbo-0613", Decimal("0.0000225")),
            (MESSAGES, "gpt-3.5-turbo-16k", Decimal("0.000045")),
            (MESSAGES, "gpt-3.5-turbo-16k-0613", Decimal("0.000045")),
            (MESSAGES, "gpt-3.5-turbo-1106", Decimal("0.000015")),
            (MESSAGES, "gpt-3.5-turbo-instruct", Decimal("0.0000225")),
            (MESSAGES, "gpt-4", Decimal("0.00045")),
            (MESSAGES, "gpt-4-0314", Decimal("0.00045")),
            (MESSAGES, "gpt-4-32k", Decimal("0.00090")),
            (MESSAGES, "gpt-4-32k-0314", Decimal("0.00090")),
            (MESSAGES, "gpt-4-0613", Decimal("0.00045")),
            (MESSAGES, "gpt-4-1106-preview", Decimal("0.00015")),
            (MESSAGES, "gpt-4-vision-preview", Decimal("0.00015")),
            (MESSAGES, "gpt-4o", Decimal("0.000075")),
            (MESSAGES, "azure/gpt-4o", Decimal("0.000075")),
            (MESSAGES, "claude-2.1", Decimal("0.000032")),
            (STRING, "text-embedding-ada-002", Decimal("0.0000004")),
        ],
    )
    def test_calculate_prompt_cost(prompt, model, expected_output):
        """Test that the cost calculation is correct."""

>       cost = calculate_prompt_cost(prompt, model)

tests/test_costs.py:165:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
tokencost/costs.py:226: in calculate_prompt_cost
    else count_message_tokens(prompt, model)
tokencost/costs.py:68: in count_message_tokens
    raise e
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

messages = [{'content': 'Hello', 'role': 'user'}, {'content': 'Hi there!', 'role': 'assistant'}]
model = 'claude-2.1'

    def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
        """
        Return the total number of tokens in a prompt's messages.
        Args:
            messages (List[Dict[str, str]]): Message format for prompt requests. e.g.:
                [{ "role": "user", "content": "Hello world"},
                 { "role": "assistant", "content": "How may I assist you today?"}]
            model (str): Name of LLM to choose encoding for.
        Returns:
            Total number of tokens in message.
        """
        model = model.lower()
        model = strip_ft_model_name(model)

        # Anthropic token counting requires a valid API key
        if "claude-" in model:
            logger.warning(
                "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
            )
            if "claude-3-sonnet" in model:
                logger.warning(
                    f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string."
                )
                # For anthropic<0.39.0 this method is no more supported
                prompt = "".join(message["content"] for message in messages)
                return count_string_tokens(prompt, model)

            ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")

            try:
                client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
>               num_tokens = client.beta.messages.count_tokens(
                    model=model,
                    messages=messages,
                ).input_tokens
E               AttributeError: 'Beta' object has no attribute 'messages'

tokencost/costs.py:60: AttributeError
------------------------------ Captured log call -------------------------------
WARNING  tokencost.costs:costs.py:45 Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!
=========================== short test summary info ============================
FAILED tests/test_costs.py::test_count_message_tokens[claude-2.1-4] - AttributeError: 'Beta' object has no attribute 'messages'
FAILED tests/test_costs.py::test_count_message_tokens_with_name[claude-2.1-4] - AttributeError: 'Beta' object has no attribute 'messages'
FAILED tests/test_costs.py::test_calculate_prompt_cost[prompt16-claude-2.1-expected_output16] - AttributeError: 'Beta' object has no attribute 'messages'
========================= 3 failed, 95 passed in 7.39s =========================
(base) ➜  tokencost git:(feat/anthropic-token-count)```

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated the code and tests.

I raise a ValueError exception for models that do not support token counting and so any Claude model in count_string_tokens will raise this exception.

Tests are updated with commented Claude models to demonstrate the same.

Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

"""
Costs dictionary and utility tool for counting tokens
"""

import os
import tiktoken
import anthropic
from typing import Union, List, Dict
Expand All @@ -12,8 +12,6 @@

logger = logging.getLogger(__name__)

# TODO: Add Claude support
# https://www-files.anthropic.com/production/images/model_pricing_july2023.pdf
# Note: cl100k is the openai base tokenizer. Nothing to do with Claude. Tiktoken doesn't have claude yet.
# https://github.com/anthropics/anthropic-tokenizer-typescript/blob/main/index.ts

Expand Down Expand Up @@ -42,14 +40,32 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
model = model.lower()
model = strip_ft_model_name(model)

# Anthropic token counting requires a valid API key
if "claude-" in model:
"""
Note that this is only accurate for older models, e.g. `claude-2.1`.
For newer models this can only be used as a _very_ rough estimate,
instead you should rely on the `usage` property in the response for exact counts.
"""
prompt = "".join(message["content"] for message in messages)
return count_string_tokens(prompt, model)
logger.warning(
"Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
)
if "claude-3-sonnet" in model:
logger.warning(
f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string."
)
# For anthropic<0.39.0 this method is no more supported
prompt = "".join(message["content"] for message in messages)
return count_string_tokens(prompt, model)

ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")

try:
client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
num_tokens = client.beta.messages.count_tokens(
model=model,
messages=messages,
).input_tokens
return num_tokens
except TypeError as e:
raise e
except Exception as e:
raise e

try:
encoding = tiktoken.encoding_for_model(model)
Expand Down Expand Up @@ -80,8 +96,9 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
)
return count_message_tokens(messages, model="gpt-3.5-turbo-0613")
elif "gpt-4o" in model:
print(
"Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13.")
logger.warning(
"Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13."
)
return count_message_tokens(messages, model="gpt-4o-2024-05-13")
elif "gpt-4" in model:
logger.warning(
Expand Down Expand Up @@ -121,14 +138,13 @@ def count_string_tokens(prompt: str, model: str) -> int:
model = model.split("/")[-1]

if "claude-" in model:
"""
Note that this is only accurate for older models, e.g. `claude-2.1`.
For newer models this can only be used as a _very_ rough estimate,
instead you should rely on the `usage` property in the response for exact counts.
"""
logger.warning(
"Warning: This is only accurate for older models e.g. `claude-2.1` so please expect a _very_ rough estimate."
"Use the `usage` property in the response for exact counts."
)
if "claude-3" in model:
logger.warning(
"Warning: Claude-3 models are not yet supported. Returning num tokens assuming claude-2.1."
"Warning: Claude-3 models are unsupported. Returning num tokens assuming claude-2.1."
)
client = anthropic.Client()
token_count = client.count_tokens(prompt)
Expand Down
60 changes: 40 additions & 20 deletions update_prices.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
def diff_dicts(dict1, dict2):
diff_keys = dict1.keys() ^ dict2.keys()
differences = {k: (dict1.get(k), dict2.get(k)) for k in diff_keys}
differences.update({k: (dict1[k], dict2[k]) for k in dict1 if k in dict2 and dict1[k] != dict2[k]})
differences.update(
{k: (dict1[k], dict2[k]) for k in dict1 if k in dict2 and dict1[k] != dict2[k]}
)

if differences:
print("Differences found:")
Expand All @@ -24,56 +26,74 @@ def diff_dicts(dict1, dict2):
return False


with open('tokencost/model_prices.json', 'r') as f:
with open("tokencost/model_prices.json", "r") as f:
model_prices = json.load(f)

if diff_dicts(model_prices, tokencost.TOKEN_COSTS):
print('Updating model_prices.json')
with open('tokencost/model_prices.json', 'w') as f:
print("Updating model_prices.json")
with open("tokencost/model_prices.json", "w") as f:
json.dump(tokencost.TOKEN_COSTS, f, indent=4)
# Load the data
df = pd.DataFrame(tokencost.TOKEN_COSTS).T
df.loc[df.index[1:], 'max_input_tokens'] = df['max_input_tokens'].iloc[1:].apply(lambda x: '{:,.0f}'.format(x))
df.loc[df.index[1:], 'max_tokens'] = df['max_tokens'].iloc[1:].apply(lambda x: '{:,.0f}'.format(x))
df.loc[df.index[1:], "max_input_tokens"] = (
df["max_input_tokens"].iloc[1:].apply(lambda x: "{:,.0f}".format(x))
)
df.loc[df.index[1:], "max_tokens"] = (
df["max_tokens"].iloc[1:].apply(lambda x: "{:,.0f}".format(x))
)


# Updated function to format the cost or handle NaN


def format_cost(x):
if pd.isna(x):
return '--'
return "--"
else:
price_per_million = Decimal(str(x)) * Decimal(str(1_000_000))
# print(price_per_million)
normalized = price_per_million.normalize()
formatted_price = '{:2f}'.format(normalized)
formatted_price = "{:2f}".format(normalized)

formatted_price = formatted_price.rstrip('0').rstrip('.') if '.' in formatted_price else formatted_price + '.00'
formatted_price = (
formatted_price.rstrip("0").rstrip(".")
if "." in formatted_price
else formatted_price + ".00"
)

return f"${formatted_price}"


# Apply the formatting function using DataFrame.apply and lambda
df[['input_cost_per_token', 'output_cost_per_token']] = df[[
'input_cost_per_token', 'output_cost_per_token']].apply(lambda x: x.map(format_cost))
df[["input_cost_per_token", "output_cost_per_token"]] = df[
["input_cost_per_token", "output_cost_per_token"]
].apply(lambda x: x.map(format_cost))


column_mapping = {
'input_cost_per_token': 'Prompt Cost (USD) per 1M tokens',
'output_cost_per_token': 'Completion Cost (USD) per 1M tokens',
'max_input_tokens': 'Max Prompt Tokens',
'max_output_tokens': 'Max Output Tokens',
'model_name': 'Model Name'
"input_cost_per_token": "Prompt Cost (USD) per 1M tokens",
"output_cost_per_token": "Completion Cost (USD) per 1M tokens",
"max_input_tokens": "Max Prompt Tokens",
"max_output_tokens": "Max Output Tokens",
"model_name": "Model Name",
}

# Assuming the keys of the JSON data represent the model names and have been set as the index
df['Model Name'] = df.index
df["Model Name"] = df.index

# Apply the column renaming
df.rename(columns=column_mapping, inplace=True)

# Write the DataFrame with the correct column names as markdown to a file
with open('pricing_table.md', 'w') as f:
f.write(df[['Model Name', 'Prompt Cost (USD) per 1M tokens', 'Completion Cost (USD) per 1M tokens',
'Max Prompt Tokens', 'Max Output Tokens']].to_markdown(index=False))
with open("pricing_table.md", "w") as f:
f.write(
df[
[
"Model Name",
"Prompt Cost (USD) per 1M tokens",
"Completion Cost (USD) per 1M tokens",
"Max Prompt Tokens",
"Max Output Tokens",
]
].to_markdown(index=False)
)
Loading