Merge pull request #30 from AgentOps-AI/29-update-table-to-use-usdm-t…

…okens updated table
AgentOps-AI · Mar 26, 2024 · 5e34862 · 5e34862
2 parents 6c5144d + ebcc4d2
commit 5e34862
Show file tree

Hide file tree

Showing 6 changed files with 1,706 additions and 280 deletions.
diff --git a/models_pricing_table.md b/models_pricing_table.md
diff --git a/tests/test_costs.py b/tests/test_costs.py
@@ -205,7 +205,7 @@ def test_calculate_invalid_input_types():
     with pytest.raises(KeyError):
         calculate_completion_cost(STRING, model="invalid_model")
 
-    with pytest.raises(TypeError):
+    with pytest.raises(KeyError):
         # Message objects not allowed, must be list of message objects.
         calculate_prompt_cost(MESSAGES[0], model="invalid_model")
 

diff --git a/tokencost/__init__.py b/tokencost/__init__.py
@@ -4,4 +4,4 @@
     calculate_completion_cost,
     calculate_prompt_cost,
 )
-from .constants import TOKEN_COSTS
+from .constants import TOKEN_COSTS_STATIC, TOKEN_COSTS_STATIC
diff --git a/tokencost/constants.py b/tokencost/constants.py
@@ -1,6 +1,8 @@
 import os
 import json
-from urllib.request import urlopen
+import aiohttp
+import asyncio
+import logging
 
 """
 Prompt (aka context) tokens are based on number of words + other chars (eg spaces and punctuation) in input.
@@ -20,16 +22,40 @@
 # Each completion token costs __ USD per token.
 # Max prompt limit of each model is __ tokens.
 
-# Fetch the latest prices using urllib.request
 PRICES_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
 
+
+async def fetch_costs():
+    """Fetch the latest token costs from the LiteLLM cost tracker asynchronously.
+    Returns:
+        dict: The token costs for each model.
+    Raises:
+        Exception: If the request fails.
+    """
+    async with aiohttp.ClientSession() as session:
+        async with session.get(PRICES_URL) as response:
+            if response.status == 200:
+                return await response.json(content_type=None)
+            else:
+                raise Exception(f"Failed to fetch token costs, status code: {response.status}")
+
+
+async def update_token_costs():
+    """Update the TOKEN_COSTS dictionary with the latest costs from the LiteLLM cost tracker asynchronously."""
+    global TOKEN_COSTS
+    try:
+        TOKEN_COSTS = await fetch_costs()
+        print("TOKEN_COSTS updated successfully.")
+    except Exception as e:
+        logging.error(f"Failed to update TOKEN_COSTS: {e}")
+
+with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f:
+    TOKEN_COSTS_STATIC = json.load(f)
+
+
+# Ensure TOKEN_COSTS is up to date when the module is loaded
 try:
-    with urlopen(PRICES_URL) as response:
-        if response.status == 200:
-            TOKEN_COSTS = json.loads(response.read())
-        else:
-            raise Exception("Failed to fetch token costs, status code: {}".format(response.status))
+    asyncio.run(update_token_costs())
 except Exception:
-    # If fetching fails, use the local model_prices.json as a fallback
-    with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f:
-        TOKEN_COSTS = json.load(f)
+    logging.error('Failed to update token costs. Using static costs.')
+    TOKEN_COSTS = TOKEN_COSTS_STATIC
diff --git a/tokencost/costs.py b/tokencost/costs.py
@@ -22,6 +22,7 @@ def strip_ft_model_name(model: str) -> str:
         model = "ft:gpt-3.5-turbo"
     return model
 
+
 def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
     """
     Return the total number of tokens in a prompt's messages.