diff --git a/model_cost_data/model_prices_and_context_window.json b/model_cost_data/model_prices_and_context_window.json index 55a50e04..5689201e 100644 --- a/model_cost_data/model_prices_and_context_window.json +++ b/model_cost_data/model_prices_and_context_window.json @@ -14,7 +14,8 @@ "supports_audio_output": true, "supports_prompt_caching": true, "supports_response_schema": true, - "supports_system_messages": true + "supports_system_messages": true, + "deprecation_date": "date when the model becomes deprecated in the format YYYY-MM-DD" }, "omni-moderation-latest": { "max_tokens": 32768, @@ -193,14 +194,44 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000012, - "cache_read_input_token_cost": 0.0000015, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 0.00000055, "litellm_provider": "openai", "mode": "chat", "supports_vision": true, "supports_prompt_caching": true }, + "o3-mini": { + "max_tokens": 100000, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 0.00000055, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_vision": false, + "supports_prompt_caching": true, + "supports_response_schema": true + }, + "o3-mini-2025-01-31": { + "max_tokens": 100000, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 0.00000055, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": false, + "supports_vision": false, + "supports_prompt_caching": true, + "supports_response_schema": true + }, "o1-mini-2024-09-12": { "max_tokens": 65536, "max_input_tokens": 128000, @@ -441,7 +472,8 @@ "mode": "chat", "supports_function_calling": true, "supports_prompt_caching": true, - "supports_system_messages": true + "supports_system_messages": true, + "deprecation_date": "2025-06-06" }, "gpt-4-32k": { "max_tokens": 4096, @@ -540,7 +572,8 @@ "mode": "chat", "supports_vision": true, "supports_prompt_caching": true, - "supports_system_messages": true + "supports_system_messages": true, + "deprecation_date": "2024-12-06" }, "gpt-4-1106-vision-preview": { "max_tokens": 4096, @@ -552,7 +585,8 @@ "mode": "chat", "supports_vision": true, "supports_prompt_caching": true, - "supports_system_messages": true + "supports_system_messages": true, + "deprecation_date": "2024-12-06" }, "gpt-3.5-turbo": { "max_tokens": 4097, @@ -887,7 +921,7 @@ }, "whisper-1": { "mode": "audio_transcription", - "input_cost_per_second": 0, + "input_cost_per_second": 0.0001, "output_cost_per_second": 0.0001, "litellm_provider": "openai" }, @@ -901,6 +935,30 @@ "input_cost_per_character": 0.000030, "litellm_provider": "openai" }, + "azure/o3-mini": { + "max_tokens": 100000, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 0.00000055, + "litellm_provider": "openai", + "mode": "chat", + "supports_vision": false, + "supports_prompt_caching": true + }, + "azure/o3-mini-2025-01-31": { + "max_tokens": 100000, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 0.00000055, + "litellm_provider": "openai", + "mode": "chat", + "supports_vision": false, + "supports_prompt_caching": true + }, "azure/tts-1": { "mode": "audio_speech", "input_cost_per_character": 0.000015, @@ -913,10 +971,34 @@ }, "azure/whisper-1": { "mode": "audio_transcription", - "input_cost_per_second": 0, + "input_cost_per_second": 0.0001, "output_cost_per_second": 0.0001, "litellm_provider": "azure" }, + "azure/deepseek-r1": { + "max_tokens": 8192, + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0, + "input_cost_per_token_cache_hit": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "chat", + "supports_prompt_caching": true + }, + "azure/o3-mini": { + "max_tokens": 100000, + "max_input_tokens": 200000, + "max_output_tokens": 100000, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 0.00000055, + "litellm_provider": "azure", + "mode": "chat", + "supports_vision": false, + "supports_prompt_caching": true, + "supports_response_schema": true + }, "azure/o1-mini": { "max_tokens": 65536, "max_input_tokens": 128000, @@ -1007,6 +1089,7 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.00000275, "output_cost_per_token": 0.000011, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -1047,6 +1130,7 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.0000025, "output_cost_per_token": 0.000010, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -1223,7 +1307,8 @@ "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "deprecation_date": "2025-03-31" }, "azure/gpt-35-turbo-0613": { "max_tokens": 4097, @@ -1234,7 +1319,8 @@ "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "deprecation_date": "2025-02-13" }, "azure/gpt-35-turbo-0301": { "max_tokens": 4097, @@ -1245,7 +1331,8 @@ "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "deprecation_date": "2025-02-13" }, "azure/gpt-35-turbo-0125": { "max_tokens": 4096, @@ -1256,7 +1343,8 @@ "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, - "supports_parallel_function_calling": true + "supports_parallel_function_calling": true, + "deprecation_date": "2025-03-31" }, "azure/gpt-35-turbo-16k": { "max_tokens": 4096, @@ -2043,6 +2131,84 @@ "supports_function_calling": true, "supports_vision": true }, + "xai/grok-2-vision-1212": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000002, + "input_cost_per_image": 0.000002, + "output_cost_per_token": 0.00001, + "litellm_provider": "xai", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "xai/grok-2-vision-latest": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000002, + "input_cost_per_image": 0.000002, + "output_cost_per_token": 0.00001, + "litellm_provider": "xai", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "xai/grok-2-vision": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000002, + "input_cost_per_image": 0.000002, + "output_cost_per_token": 0.00001, + "litellm_provider": "xai", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "xai/grok-vision-beta": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000005, + "input_cost_per_image": 0.000005, + "output_cost_per_token": 0.000015, + "litellm_provider": "xai", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true + }, + "xai/grok-2-1212": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.00001, + "litellm_provider": "xai", + "mode": "chat", + "supports_function_calling": true + }, + "xai/grok-2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.00001, + "litellm_provider": "xai", + "mode": "chat", + "supports_function_calling": true + }, + "xai/grok-2-latest": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.00001, + "litellm_provider": "xai", + "mode": "chat", + "supports_function_calling": true + }, "deepseek/deepseek-coder": { "max_tokens": 4096, "max_input_tokens": 128000, @@ -2057,6 +2223,19 @@ "supports_tool_choice": true, "supports_prompt_caching": true }, + "groq/deepseek-r1-distill-llama-70b": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.00000075, + "output_cost_per_token": 0.00000099, + "litellm_provider": "groq", + "mode": "chat", + "supports_system_messages": false, + "supports_function_calling": false, + "supports_response_schema": false, + "supports_tool_choice": false + }, "groq/llama-3.3-70b-versatile": { "max_tokens": 8192, "max_input_tokens": 128000, @@ -2354,7 +2533,8 @@ "tool_use_system_prompt_tokens": 264, "supports_assistant_prefill": true, "supports_prompt_caching": true, - "supports_response_schema": true + "supports_response_schema": true, + "deprecation_date": "2025-03-01" }, "claude-3-5-haiku-20241022": { "max_tokens": 8192, @@ -2370,7 +2550,8 @@ "tool_use_system_prompt_tokens": 264, "supports_assistant_prefill": true, "supports_prompt_caching": true, - "supports_response_schema": true + "supports_response_schema": true, + "deprecation_date": "2025-10-01" }, "claude-3-opus-20240229": { "max_tokens": 4096, @@ -2387,7 +2568,8 @@ "tool_use_system_prompt_tokens": 395, "supports_assistant_prefill": true, "supports_prompt_caching": true, - "supports_response_schema": true + "supports_response_schema": true, + "deprecation_date": "2025-03-01" }, "claude-3-sonnet-20240229": { "max_tokens": 4096, @@ -2402,7 +2584,8 @@ "tool_use_system_prompt_tokens": 159, "supports_assistant_prefill": true, "supports_prompt_caching": true, - "supports_response_schema": true + "supports_response_schema": true, + "deprecation_date": "2025-07-21" }, "claude-3-5-sonnet-20240620": { "max_tokens": 8192, @@ -2419,7 +2602,8 @@ "tool_use_system_prompt_tokens": 159, "supports_assistant_prefill": true, "supports_prompt_caching": true, - "supports_response_schema": true + "supports_response_schema": true, + "deprecation_date": "2025-06-01" }, "claude-3-5-sonnet-20241022": { "max_tokens": 8192, @@ -2437,7 +2621,8 @@ "supports_assistant_prefill": true, "supports_pdf_input": true, "supports_prompt_caching": true, - "supports_response_schema": true + "supports_response_schema": true, + "deprecation_date": "2025-10-01" }, "text-bison": { "max_tokens": 2048, @@ -2547,7 +2732,8 @@ "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "deprecation_date": "2025-04-09" }, "chat-bison-32k": { "max_tokens": 8192, @@ -2788,7 +2974,8 @@ "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "deprecation_date": "2025-04-09" }, "gemini-1.0-ultra": { "max_tokens": 8192, @@ -2833,7 +3020,8 @@ "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "deprecation_date": "2025-04-09" }, "gemini-1.5-pro": { "max_tokens": 8192, @@ -2915,7 +3103,8 @@ "supports_function_calling": true, "supports_tool_choice": true, "supports_response_schema": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "deprecation_date": "2025-05-24" }, "gemini-1.5-pro-preview-0514": { "max_tokens": 8192, @@ -3120,7 +3309,8 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "deprecation_date": "2025-05-24" }, "gemini-1.5-flash-preview-0514": { "max_tokens": 8192, @@ -3188,8 +3378,9 @@ "max_images_per_prompt": 16, "max_videos_per_prompt": 1, "max_video_length": 2, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "input_cost_per_image": 0.0025, "litellm_provider": "vertex_ai-vision-models", "mode": "chat", "supports_function_calling": true, @@ -3203,8 +3394,9 @@ "max_images_per_prompt": 16, "max_videos_per_prompt": 1, "max_video_length": 2, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "input_cost_per_image": 0.0025, "litellm_provider": "vertex_ai-vision-models", "mode": "chat", "supports_function_calling": true, @@ -3218,13 +3410,15 @@ "max_images_per_prompt": 16, "max_videos_per_prompt": 1, "max_video_length": 2, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "input_cost_per_image": 0.0025, "litellm_provider": "vertex_ai-vision-models", "mode": "chat", "supports_function_calling": true, "supports_vision": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", + "deprecation_date": "2025-04-09" }, "medlm-medium": { "max_tokens": 8192, @@ -3312,6 +3506,39 @@ "supports_audio_output": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" }, + "gemini-2.0-flash-thinking-exp-01-21": { + "max_tokens": 65536, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_token": 0, + "input_cost_per_character": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_character": 0, + "output_cost_per_token_above_128k_tokens": 0, + "output_cost_per_character_above_128k_tokens": 0, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": false, + "supports_vision": true, + "supports_response_schema": false, + "supports_audio_output": false, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" + }, "gemini/gemini-2.0-flash-exp": { "max_tokens": 8192, "max_input_tokens": 1048576, @@ -3691,6 +3918,16 @@ "mode": "chat", "supports_function_calling": true }, + "vertex_ai/codestral@2405": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000006, + "litellm_provider": "vertex_ai-mistral_models", + "mode": "chat", + "supports_function_calling": true + }, "vertex_ai/imagegeneration@006": { "output_cost_per_image": 0.020, "litellm_provider": "vertex_ai-image-models", @@ -3930,7 +4167,8 @@ "supports_prompt_caching": true, "tpm": 4000000, "rpm": 2000, - "source": "https://ai.google.dev/pricing" + "source": "https://ai.google.dev/pricing", + "deprecation_date": "2025-05-24" }, "gemini/gemini-1.5-flash": { "max_tokens": 8192, @@ -4206,7 +4444,8 @@ "supports_prompt_caching": true, "tpm": 4000000, "rpm": 1000, - "source": "https://ai.google.dev/pricing" + "source": "https://ai.google.dev/pricing", + "deprecation_date": "2025-05-24" }, "gemini/gemini-1.5-pro-exp-0801": { "max_tokens": 8192, @@ -7342,7 +7581,8 @@ "input_cost_per_token": 0.000005, "output_cost_per_token": 0.000005, "litellm_provider": "perplexity", - "mode": "chat" + "mode": "chat", + "deprecation_date": "2025-02-22" }, "perplexity/llama-3.1-sonar-large-128k-online": { "max_tokens": 127072, @@ -7351,7 +7591,8 @@ "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000001, "litellm_provider": "perplexity", - "mode": "chat" + "mode": "chat", + "deprecation_date": "2025-02-22" }, "perplexity/llama-3.1-sonar-large-128k-chat": { "max_tokens": 131072, @@ -7360,7 +7601,8 @@ "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000001, "litellm_provider": "perplexity", - "mode": "chat" + "mode": "chat", + "deprecation_date": "2025-02-22" }, "perplexity/llama-3.1-sonar-small-128k-chat": { "max_tokens": 131072, @@ -7369,7 +7611,8 @@ "input_cost_per_token": 0.0000002, "output_cost_per_token": 0.0000002, "litellm_provider": "perplexity", - "mode": "chat" + "mode": "chat", + "deprecation_date": "2025-02-22" }, "perplexity/llama-3.1-sonar-small-128k-online": { "max_tokens": 127072, @@ -7378,7 +7621,8 @@ "input_cost_per_token": 0.0000002, "output_cost_per_token": 0.0000002, "litellm_provider": "perplexity", - "mode": "chat" + "mode": "chat" , + "deprecation_date": "2025-02-22" }, "perplexity/pplx-7b-chat": { "max_tokens": 8192,