AgentOps-AI · the-praxs · Nov 8, 2024 · Nov 9, 2024 · Nov 9, 2024 · Nov 9, 2024
diff --git a/tests/test_costs.py b/tests/test_costs.py
@@ -75,7 +75,6 @@ def test_count_message_tokens(model, expected_output):
         ("gpt-4o", 17),
         ("azure/gpt-4o", 17),
         ("claude-2.1", 4),
-
     ],
 )
 def test_count_message_tokens_with_name(model, expected_output):
@@ -116,7 +115,7 @@ def test_count_message_tokens_invalid_model():
         ("gpt-4-vision-preview", 4),
         ("text-embedding-ada-002", 4),
         ("gpt-4o", 4),
-        ("claude-2.1", 4)
+        ("claude-2.1", 4),
     ],
 )
 def test_count_string_tokens(model, expected_output):

diff --git a/tests/test_llama_index_callbacks.py b/tests/test_llama_index_callbacks.py
@@ -1,8 +1,7 @@
 # test_llama_index.py
 import pytest
 from tokencost.callbacks import llama_index
-from llama_index.core.callbacks.schema import CBEventType, EventPayload
-from unittest.mock import MagicMock
+from llama_index.core.callbacks.schema import EventPayload
 
 # Mock the calculate_prompt_cost and calculate_completion_cost functions
 

diff --git a/tokencost/__init__.py b/tokencost/__init__.py
@@ -4,6 +4,6 @@
     calculate_completion_cost,
     calculate_prompt_cost,
     calculate_all_costs_and_tokens,
-    calculate_cost_by_tokens
+    calculate_cost_by_tokens,
 )
 from .constants import TOKEN_COSTS_STATIC, TOKEN_COSTS, update_token_costs
diff --git a/tokencost/constants.py b/tokencost/constants.py
@@ -39,7 +39,9 @@ async def fetch_costs():
             if response.status == 200:
                 return await response.json(content_type=None)
             else:
-                raise Exception(f"Failed to fetch token costs, status code: {response.status}")
+                raise Exception(
+                    f"Failed to fetch token costs, status code: {response.status}"
+                )
 
 
 async def update_token_costs():
@@ -49,11 +51,12 @@ async def update_token_costs():
         fetched_costs = await fetch_costs()
         # Safely remove 'sample_spec' if it exists
         TOKEN_COSTS.update(fetched_costs)
-        TOKEN_COSTS.pop('sample_spec', None)
+        TOKEN_COSTS.pop("sample_spec", None)
     except Exception as e:
         logger.error(f"Failed to update TOKEN_COSTS: {e}")
         raise
 
+
 with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f:
     TOKEN_COSTS_STATIC = json.load(f)
 
@@ -63,4 +66,4 @@ async def update_token_costs():
     TOKEN_COSTS = TOKEN_COSTS_STATIC
     asyncio.run(update_token_costs())
 except Exception:
-    logger.error('Failed to update token costs. Using static costs.')
+    logger.error("Failed to update token costs. Using static costs.")
diff --git a/tokencost/costs.py b/tokencost/costs.py
@@ -1,8 +1,8 @@
-
 """
 Costs dictionary and utility tool for counting tokens
 """
 
+import os
 import tiktoken
 import anthropic
 from typing import Union, List, Dict
@@ -12,8 +12,6 @@
 
 logger = logging.getLogger(__name__)
 
-# TODO: Add Claude support
-# https://www-files.anthropic.com/production/images/model_pricing_july2023.pdf
 # Note: cl100k is the openai base tokenizer. Nothing to do with Claude. Tiktoken doesn't have claude yet.
 # https://github.com/anthropics/anthropic-tokenizer-typescript/blob/main/index.ts
 
@@ -42,14 +40,32 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
     model = model.lower()
     model = strip_ft_model_name(model)
 
+    # Anthropic token counting requires a valid API key
     if "claude-" in model:
-        """
-        Note that this is only accurate for older models, e.g. `claude-2.1`. 
-        For newer models this can only be used as a _very_ rough estimate, 
-        instead you should rely on the `usage` property in the response for exact counts.
-        """
-        prompt = "".join(message["content"] for message in messages)
-        return count_string_tokens(prompt, model)
+        logger.warning(
+            "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
+        )
+        if "claude-3-sonnet" in model:
+            logger.warning(
+                f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string."
+            )
+            # For anthropic<0.39.0 this method is no more supported
+            prompt = "".join(message["content"] for message in messages)
+            return count_string_tokens(prompt, model)
+
+        ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
+
+        try:
+            client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
+            num_tokens = client.beta.messages.count_tokens(
+                model=model,
+                messages=messages,
+            ).input_tokens
+            return num_tokens
+        except TypeError as e:
+            raise e
+        except Exception as e:
+            raise e
 
     try:
         encoding = tiktoken.encoding_for_model(model)
@@ -80,8 +96,9 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
         )
         return count_message_tokens(messages, model="gpt-3.5-turbo-0613")
     elif "gpt-4o" in model:
-        print(
-            "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13.")
+        logger.warning(
+            "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13."
+        )
         return count_message_tokens(messages, model="gpt-4o-2024-05-13")
     elif "gpt-4" in model:
         logger.warning(
@@ -121,14 +138,13 @@ def count_string_tokens(prompt: str, model: str) -> int:
         model = model.split("/")[-1]
 
     if "claude-" in model:
-        """
-        Note that this is only accurate for older models, e.g. `claude-2.1`. 
-        For newer models this can only be used as a _very_ rough estimate, 
-        instead you should rely on the `usage` property in the response for exact counts.
-        """
+        logger.warning(
+            "Warning: This is only accurate for older models e.g. `claude-2.1` so please expect a _very_ rough estimate."
+            "Use the `usage` property in the response for exact counts."
+        )
         if "claude-3" in model:
             logger.warning(
-                "Warning: Claude-3 models are not yet supported. Returning num tokens assuming claude-2.1."
+                "Warning: Claude-3 models are unsupported. Returning num tokens assuming claude-2.1."
             )
         client = anthropic.Client()
         token_count = client.count_tokens(prompt)

diff --git a/update_prices.py b/update_prices.py
@@ -9,7 +9,9 @@
 def diff_dicts(dict1, dict2):
     diff_keys = dict1.keys() ^ dict2.keys()
     differences = {k: (dict1.get(k), dict2.get(k)) for k in diff_keys}
-    differences.update({k: (dict1[k], dict2[k]) for k in dict1 if k in dict2 and dict1[k] != dict2[k]})
+    differences.update(
+        {k: (dict1[k], dict2[k]) for k in dict1 if k in dict2 and dict1[k] != dict2[k]}
+    )
 
     if differences:
         print("Differences found:")
@@ -24,56 +26,74 @@ def diff_dicts(dict1, dict2):
         return False
 
 
-with open('tokencost/model_prices.json', 'r') as f:
+with open("tokencost/model_prices.json", "r") as f:
     model_prices = json.load(f)
 
 if diff_dicts(model_prices, tokencost.TOKEN_COSTS):
-    print('Updating model_prices.json')
-    with open('tokencost/model_prices.json', 'w') as f:
+    print("Updating model_prices.json")
+    with open("tokencost/model_prices.json", "w") as f:
         json.dump(tokencost.TOKEN_COSTS, f, indent=4)
 # Load the data
 df = pd.DataFrame(tokencost.TOKEN_COSTS).T
-df.loc[df.index[1:], 'max_input_tokens'] = df['max_input_tokens'].iloc[1:].apply(lambda x: '{:,.0f}'.format(x))
-df.loc[df.index[1:], 'max_tokens'] = df['max_tokens'].iloc[1:].apply(lambda x: '{:,.0f}'.format(x))
+df.loc[df.index[1:], "max_input_tokens"] = (
+    df["max_input_tokens"].iloc[1:].apply(lambda x: "{:,.0f}".format(x))
+)
+df.loc[df.index[1:], "max_tokens"] = (
+    df["max_tokens"].iloc[1:].apply(lambda x: "{:,.0f}".format(x))
+)
 
 
 # Updated function to format the cost or handle NaN
 
 
 def format_cost(x):
     if pd.isna(x):
-        return '--'
+        return "--"
     else:
         price_per_million = Decimal(str(x)) * Decimal(str(1_000_000))
         # print(price_per_million)
         normalized = price_per_million.normalize()
-        formatted_price = '{:2f}'.format(normalized)
+        formatted_price = "{:2f}".format(normalized)
 
-        formatted_price = formatted_price.rstrip('0').rstrip('.') if '.' in formatted_price else formatted_price + '.00'
+        formatted_price = (
+            formatted_price.rstrip("0").rstrip(".")
+            if "." in formatted_price
+            else formatted_price + ".00"
+        )
 
         return f"${formatted_price}"
 
 
 # Apply the formatting function using DataFrame.apply and lambda
-df[['input_cost_per_token', 'output_cost_per_token']] = df[[
-    'input_cost_per_token', 'output_cost_per_token']].apply(lambda x: x.map(format_cost))
+df[["input_cost_per_token", "output_cost_per_token"]] = df[
+    ["input_cost_per_token", "output_cost_per_token"]
+].apply(lambda x: x.map(format_cost))
 
 
 column_mapping = {
-    'input_cost_per_token': 'Prompt Cost (USD) per 1M tokens',
-    'output_cost_per_token': 'Completion Cost (USD) per 1M tokens',
-    'max_input_tokens': 'Max Prompt Tokens',
-    'max_output_tokens': 'Max Output Tokens',
-    'model_name': 'Model Name'
+    "input_cost_per_token": "Prompt Cost (USD) per 1M tokens",
+    "output_cost_per_token": "Completion Cost (USD) per 1M tokens",
+    "max_input_tokens": "Max Prompt Tokens",
+    "max_output_tokens": "Max Output Tokens",
+    "model_name": "Model Name",
 }
 
 # Assuming the keys of the JSON data represent the model names and have been set as the index
-df['Model Name'] = df.index
+df["Model Name"] = df.index
 
 # Apply the column renaming
 df.rename(columns=column_mapping, inplace=True)
 
 # Write the DataFrame with the correct column names as markdown to a file
-with open('pricing_table.md', 'w') as f:
-    f.write(df[['Model Name', 'Prompt Cost (USD) per 1M tokens', 'Completion Cost (USD) per 1M tokens',
-            'Max Prompt Tokens', 'Max Output Tokens']].to_markdown(index=False))
+with open("pricing_table.md", "w") as f:
+    f.write(
+        df[
+            [
+                "Model Name",
+                "Prompt Cost (USD) per 1M tokens",
+                "Completion Cost (USD) per 1M tokens",
+                "Max Prompt Tokens",
+                "Max Output Tokens",
+            ]
+        ].to_markdown(index=False)
+    )