From 1c2ebafd626a166bd10a00ca30887991c3da48bf Mon Sep 17 00:00:00 2001 From: reibs Date: Sun, 17 Mar 2024 23:59:26 -0700 Subject: [PATCH 01/10] updated tablke --- models_pricing_table.md | 246 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 models_pricing_table.md diff --git a/models_pricing_table.md b/models_pricing_table.md new file mode 100644 index 0000000..e527d6e --- /dev/null +++ b/models_pricing_table.md @@ -0,0 +1,246 @@ +| Model Name | Input Cost per Million Tokens (USD) | Output Cost per Million Tokens (USD) | Max Tokens | +|:----------------------------------------------------------------------|--------------------------------------:|---------------------------------------:|-------------:| +| gpt-4 | 30 | 60 | 8192 | +| gpt-4-0314 | 30 | 60 | 8192 | +| gpt-4-0613 | 30 | 60 | 8192 | +| gpt-4-32k | 60 | 120 | 32768 | +| gpt-4-32k-0314 | 60 | 120 | 32768 | +| gpt-4-32k-0613 | 60 | 120 | 32768 | +| gpt-4-1106-preview | 10 | 30 | 128000 | +| gpt-4-0125-preview | 10 | 30 | 128000 | +| gpt-4-vision-preview | 10 | 30 | 128000 | +| gpt-4-1106-vision-preview | 10 | 30 | 128000 | +| gpt-3.5-turbo | 1.5 | 2 | 4097 | +| gpt-3.5-turbo-0301 | 1.5 | 2 | 4097 | +| gpt-3.5-turbo-0613 | 1.5 | 2 | 4097 | +| gpt-3.5-turbo-1106 | 1 | 2 | 16385 | +| gpt-3.5-turbo-16k | 3 | 4 | 16385 | +| gpt-3.5-turbo-16k-0613 | 3 | 4 | 16385 | +| ft:gpt-3.5-turbo | 3 | 6 | 4097 | +| text-embedding-3-large | 0.13 | 0 | 8191 | +| text-embedding-3-small | 0.02 | 0 | 8191 | +| text-embedding-ada-002 | 0.1 | 0 | 8191 | +| text-embedding-ada-002-v2 | 0.1 | 0 | 8191 | +| azure/gpt-4-1106-preview | 10 | 30 | 128000 | +| azure/gpt-4-0613 | 30 | 60 | 8192 | +| azure/gpt-4-32k-0613 | 60 | 120 | 32768 | +| azure/gpt-4-32k | 60 | 120 | 32768 | +| azure/gpt-4 | 30 | 60 | 8192 | +| azure/gpt-4-turbo | 10 | 30 | 128000 | +| azure/gpt-4-turbo-vision-preview | 10 | 30 | 128000 | +| azure/gpt-35-turbo-16k-0613 | 3 | 4 | 16385 | +| azure/gpt-35-turbo-1106 | 1.5 | 2 | 16384 | +| azure/gpt-35-turbo-16k | 3 | 4 | 16385 | +| azure/gpt-35-turbo | 1.5 | 2 | 4097 | +| azure/ada | 0.1 | 0 | 8191 | +| azure/text-embedding-ada-002 | 0.1 | 0 | 8191 | +| babbage-002 | 0.4 | 0.4 | 16384 | +| davinci-002 | 2 | 2 | 16384 | +| gpt-3.5-turbo-instruct | 1.5 | 2 | 8192 | +| claude-instant-1 | 1.63 | 5.51 | 100000 | +| mistral/mistral-tiny | 0.15 | 0.46 | 8192 | +| mistral/mistral-small | 0.66 | 1.97 | 8192 | +| mistral/mistral-medium | 2.73 | 8.2 | 8192 | +| mistral/mistral-embed | 0.111 | 0 | 8192 | +| claude-instant-1.2 | 0.163 | 0.551 | 100000 | +| claude-2 | 8 | 24 | 100000 | +| claude-2.1 | 8 | 24 | 200000 | +| text-bison | 0.125 | 0.125 | 8192 | +| text-bison@001 | 0.125 | 0.125 | 8192 | +| text-unicorn | 10 | 28 | 8192 | +| text-unicorn@001 | 10 | 28 | 8192 | +| chat-bison | 0.125 | 0.125 | 4096 | +| chat-bison@001 | 0.125 | 0.125 | 4096 | +| chat-bison@002 | 0.125 | 0.125 | 4096 | +| chat-bison-32k | 0.125 | 0.125 | 32000 | +| code-bison | 0.125 | 0.125 | 6144 | +| code-bison@001 | 0.125 | 0.125 | 6144 | +| code-gecko@001 | 0.125 | 0.125 | 2048 | +| code-gecko@002 | 0.125 | 0.125 | 2048 | +| code-gecko | 0.125 | 0.125 | 2048 | +| codechat-bison | 0.125 | 0.125 | 6144 | +| codechat-bison@001 | 0.125 | 0.125 | 6144 | +| codechat-bison-32k | 0.125 | 0.125 | 32000 | +| gemini-pro | 0.25 | 0.5 | 30720 | +| gemini-pro-vision | 0.25 | 0.5 | 30720 | +| palm/chat-bison | 0.125 | 0.125 | 4096 | +| palm/chat-bison-001 | 0.125 | 0.125 | 4096 | +| palm/text-bison | 0.125 | 0.125 | 8196 | +| palm/text-bison-001 | 0.125 | 0.125 | 8196 | +| palm/text-bison-safety-off | 0.125 | 0.125 | 8196 | +| palm/text-bison-safety-recitation-off | 0.125 | 0.125 | 8196 | +| gemini/gemini-pro | 0 | 0 | 30720 | +| gemini/gemini-pro-vision | 0 | 0 | 30720 | +| command-nightly | 15 | 15 | 4096 | +| command | 15 | 15 | 4096 | +| command-light | 15 | 15 | 4096 | +| command-medium-beta | 15 | 15 | 4096 | +| command-xlarge-beta | 15 | 15 | 4096 | +| openrouter/openai/gpt-3.5-turbo | 1.5 | 2 | 4095 | +| openrouter/openai/gpt-3.5-turbo-16k | 3 | 4 | 16383 | +| openrouter/openai/gpt-4 | 30 | 60 | 8192 | +| openrouter/anthropic/claude-instant-v1 | 1.63 | 5.51 | 100000 | +| openrouter/anthropic/claude-2 | 11.02 | 32.68 | 100000 | +| openrouter/google/palm-2-chat-bison | 0.5 | 0.5 | 8000 | +| openrouter/google/palm-2-codechat-bison | 0.5 | 0.5 | 8000 | +| openrouter/meta-llama/llama-2-13b-chat | 0.2 | 0.2 | 4096 | +| openrouter/meta-llama/llama-2-70b-chat | 1.5 | 1.5 | 4096 | +| openrouter/meta-llama/codellama-34b-instruct | 0.5 | 0.5 | 8096 | +| openrouter/nousresearch/nous-hermes-llama2-13b | 0.2 | 0.2 | 4096 | +| openrouter/mancer/weaver | 5.625 | 5.625 | 8000 | +| openrouter/gryphe/mythomax-l2-13b | 1.875 | 1.875 | 8192 | +| openrouter/jondurbin/airoboros-l2-70b-2.1 | 13.875 | 13.875 | 4096 | +| openrouter/undi95/remm-slerp-l2-13b | 1.875 | 1.875 | 6144 | +| openrouter/pygmalionai/mythalion-13b | 1.875 | 1.875 | 4096 | +| openrouter/mistralai/mistral-7b-instruct | 0 | 0 | 4096 | +| j2-ultra | 15 | 15 | 8192 | +| j2-mid | 10 | 10 | 8192 | +| j2-light | 3 | 3 | 8192 | +| dolphin | 20 | 20 | 4096 | +| chatdolphin | 20 | 20 | 4096 | +| luminous-base | 30 | 33 | 2048 | +| luminous-base-control | 37.5 | 41.25 | 2048 | +| luminous-extended | 45 | 49.5 | 2048 | +| luminous-extended-control | 56.25 | 61.875 | 2048 | +| luminous-supreme | 175 | 192.5 | 2048 | +| luminous-supreme-control | 218.75 | 240.625 | 2048 | +| ai21.j2-mid-v1 | 12.5 | 12.5 | 8191 | +| ai21.j2-ultra-v1 | 18.8 | 18.8 | 8191 | +| amazon.titan-text-lite-v1 | 0.3 | 0.4 | 8000 | +| amazon.titan-text-express-v1 | 1.3 | 1.7 | 8000 | +| amazon.titan-embed-text-v1 | 0.1 | 0 | 8192 | +| anthropic.claude-v1 | 8 | 24 | 100000 | +| bedrock/us-east-1/anthropic.claude-v1 | 8 | 24 | 100000 | +| bedrock/us-west-2/anthropic.claude-v1 | 8 | 24 | 100000 | +| bedrock/ap-northeast-1/anthropic.claude-v1 | 8 | 24 | 100000 | +| bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v1 | 0 | 0 | 100000 | +| bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v1 | 0 | 0 | 100000 | +| bedrock/eu-central-1/anthropic.claude-v1 | 8 | 24 | 100000 | +| bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1 | 0 | 0 | 100000 | +| bedrock/eu-central-1/6-month-commitment/anthropic.claude-v1 | 0 | 0 | 100000 | +| bedrock/us-east-1/1-month-commitment/anthropic.claude-v1 | 0 | 0 | 100000 | +| bedrock/us-east-1/6-month-commitment/anthropic.claude-v1 | 0 | 0 | 100000 | +| bedrock/us-west-2/1-month-commitment/anthropic.claude-v1 | 0 | 0 | 100000 | +| bedrock/us-west-2/6-month-commitment/anthropic.claude-v1 | 0 | 0 | 100000 | +| anthropic.claude-v2 | 8 | 24 | 100000 | +| bedrock/us-east-1/anthropic.claude-v2 | 8 | 24 | 100000 | +| bedrock/us-west-2/anthropic.claude-v2 | 8 | 24 | 100000 | +| bedrock/ap-northeast-1/anthropic.claude-v2 | 8 | 24 | 100000 | +| bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2 | 0 | 0 | 100000 | +| bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2 | 0 | 0 | 100000 | +| bedrock/eu-central-1/anthropic.claude-v2 | 8 | 24 | 100000 | +| bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2 | 0 | 0 | 100000 | +| bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2 | 0 | 0 | 100000 | +| bedrock/us-east-1/1-month-commitment/anthropic.claude-v2 | 0 | 0 | 100000 | +| bedrock/us-east-1/6-month-commitment/anthropic.claude-v2 | 0 | 0 | 100000 | +| bedrock/us-west-2/1-month-commitment/anthropic.claude-v2 | 0 | 0 | 100000 | +| bedrock/us-west-2/6-month-commitment/anthropic.claude-v2 | 0 | 0 | 100000 | +| anthropic.claude-v2:1 | 8 | 24 | 200000 | +| bedrock/us-east-1/anthropic.claude-v2:1 | 8 | 24 | 100000 | +| bedrock/us-west-2/anthropic.claude-v2:1 | 8 | 24 | 100000 | +| bedrock/ap-northeast-1/anthropic.claude-v2:1 | 8 | 24 | 100000 | +| bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2:1 | 0 | 0 | 100000 | +| bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2:1 | 0 | 0 | 100000 | +| bedrock/eu-central-1/anthropic.claude-v2:1 | 8 | 24 | 100000 | +| bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2:1 | 0 | 0 | 100000 | +| bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2:1 | 0 | 0 | 100000 | +| bedrock/us-east-1/1-month-commitment/anthropic.claude-v2:1 | 0 | 0 | 100000 | +| bedrock/us-east-1/6-month-commitment/anthropic.claude-v2:1 | 0 | 0 | 100000 | +| bedrock/us-west-2/1-month-commitment/anthropic.claude-v2:1 | 0 | 0 | 100000 | +| bedrock/us-west-2/6-month-commitment/anthropic.claude-v2:1 | 0 | 0 | 100000 | +| anthropic.claude-instant-v1 | 1.63 | 5.51 | 100000 | +| bedrock/us-east-1/anthropic.claude-instant-v1 | 0.8 | 2.4 | 100000 | +| bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1 | 0 | 0 | 100000 | +| bedrock/us-east-1/6-month-commitment/anthropic.claude-instant-v1 | 0 | 0 | 100000 | +| bedrock/us-west-2/1-month-commitment/anthropic.claude-instant-v1 | 0 | 0 | 100000 | +| bedrock/us-west-2/6-month-commitment/anthropic.claude-instant-v1 | 0 | 0 | 100000 | +| bedrock/us-west-2/anthropic.claude-instant-v1 | 0.8 | 2.4 | 100000 | +| bedrock/ap-northeast-1/anthropic.claude-instant-v1 | 2.23 | 7.55 | 100000 | +| bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-instant-v1 | 0 | 0 | 100000 | +| bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-instant-v1 | 0 | 0 | 100000 | +| bedrock/eu-central-1/anthropic.claude-instant-v1 | 2.48 | 8.38 | 100000 | +| bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1 | 0 | 0 | 100000 | +| bedrock/eu-central-1/6-month-commitment/anthropic.claude-instant-v1 | 0 | 0 | 100000 | +| cohere.command-text-v14 | 1.5 | 2 | 4096 | +| bedrock/*/1-month-commitment/cohere.command-text-v14 | 0 | 0 | 4096 | +| bedrock/*/6-month-commitment/cohere.command-text-v14 | 0 | 0 | 4096 | +| cohere.command-light-text-v14 | 0.3 | 0.6 | 4000 | +| bedrock/*/1-month-commitment/cohere.command-light-text-v14 | 0 | 0 | 4096 | +| bedrock/*/6-month-commitment/cohere.command-light-text-v14 | 0 | 0 | 4096 | +| cohere.embed-english-v3 | 0.1 | 0 | 512 | +| cohere.embed-multilingual-v3 | 0.1 | 0 | 512 | +| meta.llama2-13b-chat-v1 | 0.75 | 1 | 4096 | +| meta.llama2-70b-chat-v1 | 1.95 | 2.56 | 4096 | +| sagemaker/meta-textgeneration-llama-2-7b | 0 | 0 | 4096 | +| sagemaker/meta-textgeneration-llama-2-7b-f | 0 | 0 | 4096 | +| sagemaker/meta-textgeneration-llama-2-13b | 0 | 0 | 4096 | +| sagemaker/meta-textgeneration-llama-2-13b-f | 0 | 0 | 4096 | +| sagemaker/meta-textgeneration-llama-2-70b | 0 | 0 | 4096 | +| sagemaker/meta-textgeneration-llama-2-70b-b-f | 0 | 0 | 4096 | +| together-ai-7.1b-20b | 0.4 | 0.4 | 1000 | +| ollama/llama2 | 0 | 0 | 4096 | +| ollama/llama2:13b | 0 | 0 | 4096 | +| ollama/llama2:70b | 0 | 0 | 4096 | +| ollama/llama2-uncensored | 0 | 0 | 4096 | +| ollama/mistral | 0 | 0 | 8192 | +| ollama/codellama | 0 | 0 | 4096 | +| ollama/orca-mini | 0 | 0 | 4096 | +| ollama/vicuna | 0 | 0 | 2048 | +| deepinfra/lizpreciatior/lzlv_70b_fp16_hf | 0.7 | 0.9 | 4096 | +| deepinfra/Gryphe/MythoMax-L2-13b | 0.22 | 0.22 | 4096 | +| deepinfra/mistralai/Mistral-7B-Instruct-v0.1 | 0.13 | 0.13 | 32768 | +| deepinfra/meta-llama/Llama-2-70b-chat-hf | 0.7 | 0.9 | 4096 | +| deepinfra/cognitivecomputations/dolphin-2.6-mixtral-8x7b | 0.27 | 0.27 | 32768 | +| deepinfra/codellama/CodeLlama-34b-Instruct-hf | 0.6 | 0.6 | 4096 | +| deepinfra/deepinfra/mixtral | 0.27 | 0.27 | 4096 | +| deepinfra/Phind/Phind-CodeLlama-34B-v2 | 0.6 | 0.6 | 4096 | +| deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1 | 0.27 | 0.27 | 32768 | +| deepinfra/deepinfra/airoboros-70b | 0.7 | 0.9 | 4096 | +| deepinfra/01-ai/Yi-34B-Chat | 0.6 | 0.6 | 4096 | +| deepinfra/01-ai/Yi-6B-200K | 0.13 | 0.13 | 4096 | +| deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1 | 0.7 | 0.9 | 4096 | +| deepinfra/meta-llama/Llama-2-13b-chat-hf | 0.22 | 0.22 | 4096 | +| deepinfra/amazon/MistralLite | 0.2 | 0.2 | 32768 | +| deepinfra/meta-llama/Llama-2-7b-chat-hf | 0.13 | 0.13 | 4096 | +| deepinfra/01-ai/Yi-34B-200K | 0.6 | 0.6 | 4096 | +| deepinfra/openchat/openchat_3.5 | 0.13 | 0.13 | 4096 | +| perplexity/pplx-7b-chat | 0 | 0 | 8192 | +| perplexity/pplx-70b-chat | 0 | 0 | 4096 | +| perplexity/pplx-7b-online | 0 | 500 | 4096 | +| perplexity/pplx-70b-online | 0 | 500 | 4096 | +| perplexity/llama-2-13b-chat | 0 | 0 | 4096 | +| perplexity/llama-2-70b-chat | 0 | 0 | 4096 | +| perplexity/mistral-7b-instruct | 0 | 0 | 4096 | +| perplexity/replit-code-v1.5-3b | 0 | 0 | 4096 | +| anyscale/mistralai/Mistral-7B-Instruct-v0.1 | 0.15 | 0.15 | 16384 | +| anyscale/HuggingFaceH4/zephyr-7b-beta | 0.15 | 0.15 | 16384 | +| anyscale/meta-llama/Llama-2-7b-chat-hf | 0.15 | 0.15 | 4096 | +| anyscale/meta-llama/Llama-2-13b-chat-hf | 0.25 | 0.25 | 4096 | +| anyscale/meta-llama/Llama-2-70b-chat-hf | 1 | 1 | 4096 | +| anyscale/codellama/CodeLlama-34b-Instruct-hf | 1 | 1 | 16384 | +| cloudflare/@cf/meta/llama-2-7b-chat-fp16 | 1.923 | 1.923 | 3072 | +| cloudflare/@cf/meta/llama-2-7b-chat-int8 | 1.923 | 1.923 | 2048 | +| cloudflare/@cf/mistral/mistral-7b-instruct-v0.1 | 1.923 | 1.923 | 8192 | +| cloudflare/@hf/thebloke/codellama-7b-instruct-awq | 1.923 | 1.923 | 4096 | +| voyage/voyage-01 | 0.1 | 0 | 4096 | +| voyage/voyage-lite-01 | 0.1 | 0 | 4096 | +| together-ai-up-to-3b | 0.1 | 0.1 | 0 | +| together-ai-3.1b-7b | 0.2 | 0.2 | 0 | +| together-ai-20.1b-40b | 0.8 | 0.8 | 0 | +| together-ai-40.1b-70b | 0.9 | 0.9 | 0 | +| azure/standard/1024-x-1024/dall-e-3 | 0 | 0 | 0 | +| azure/hd/1024-x-1024/dall-e-3 | 0 | 0 | 0 | +| azure/standard/1024-x-1792/dall-e-3 | 0 | 0 | 0 | +| azure/standard/1792-x-1024/dall-e-3 | 0 | 0 | 0 | +| azure/hd/1024-x-1792/dall-e-3 | 0 | 0 | 0 | +| azure/hd/1792-x-1024/dall-e-3 | 0 | 0 | 0 | +| azure/standard/1024-x-1024/dall-e-2 | 0 | 0 | 0 | +| 256-x-256/dall-e-2 | 0 | 0 | 0 | +| 512-x-512/dall-e-2 | 0 | 0 | 0 | +| 1024-x-1024/dall-e-2 | 0 | 0 | 0 | +| hd/1024-x-1792/dall-e-3 | 0 | 0 | 0 | +| hd/1792-x-1024/dall-e-3 | 0 | 0 | 0 | +| hd/1024-x-1024/dall-e-3 | 0 | 0 | 0 | +| standard/1024-x-1792/dall-e-3 | 0 | 0 | 0 | +| standard/1792-x-1024/dall-e-3 | 0 | 0 | 0 | +| standard/1024-x-1024/dall-e-3 | 0 | 0 | 0 | \ No newline at end of file From 5f7464ceae7c340f4e4884fec7e395b8c1aa2853 Mon Sep 17 00:00:00 2001 From: reibs Date: Mon, 25 Mar 2024 12:34:55 -0700 Subject: [PATCH 02/10] log statement --- tokencost/constants.py | 1 + tokencost/model_prices.json | 1689 +++++++++++++++++++++++++++++------ 2 files changed, 1422 insertions(+), 268 deletions(-) diff --git a/tokencost/constants.py b/tokencost/constants.py index e47d14b..8229a79 100644 --- a/tokencost/constants.py +++ b/tokencost/constants.py @@ -31,5 +31,6 @@ raise Exception("Failed to fetch token costs, status code: {}".format(response.status)) except Exception: # If fetching fails, use the local model_prices.json as a fallback + print('Unable to fetch token costs, using local model_prices.json as fallback. Prices may have changed since the last update.') with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f: TOKEN_COSTS = json.load(f) diff --git a/tokencost/model_prices.json b/tokencost/model_prices.json index 96caeb7..e75bc19 100644 --- a/tokencost/model_prices.json +++ b/tokencost/model_prices.json @@ -1,91 +1,120 @@ { "gpt-4": { - "max_tokens": 8192, + "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, - "mode": "chat" + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true + }, + "gpt-4-turbo-preview": { + "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true }, "gpt-4-0314": { - "max_tokens": 8192, + "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, + "litellm_provider": "openai", "mode": "chat" }, "gpt-4-0613": { - "max_tokens": 8192, + "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, - "mode": "chat" + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true }, "gpt-4-32k": { - "max_tokens": 32768, + "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, + "litellm_provider": "openai", "mode": "chat" }, "gpt-4-32k-0314": { - "max_tokens": 32768, + "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, + "litellm_provider": "openai", "mode": "chat" }, "gpt-4-32k-0613": { - "max_tokens": 32768, + "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, + "litellm_provider": "openai", "mode": "chat" }, "gpt-4-1106-preview": { - "max_tokens": 128000, + "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, - "mode": "chat" + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true }, "gpt-4-0125-preview": { - "max_tokens": 128000, + "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, - "mode": "chat" + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true }, "gpt-4-vision-preview": { - "max_tokens": 128000, + "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, + "litellm_provider": "openai", "mode": "chat" }, "gpt-4-1106-vision-preview": { - "max_tokens": 128000, + "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, + "litellm_provider": "openai", "mode": "chat" }, "gpt-3.5-turbo": { "max_tokens": 4097, - "max_input_tokens": 4097, + "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, - "mode": "chat" + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true }, "gpt-3.5-turbo-0301": { "max_tokens": 4097, @@ -93,6 +122,7 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, + "litellm_provider": "openai", "mode": "chat" }, "gpt-3.5-turbo-0613": { @@ -101,7 +131,9 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, - "mode": "chat" + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true }, "gpt-3.5-turbo-1106": { "max_tokens": 16385, @@ -109,7 +141,21 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.0000010, "output_cost_per_token": 0.0000020, - "mode": "chat" + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "gpt-3.5-turbo-0125": { + "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "litellm_provider": "openai", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true }, "gpt-3.5-turbo-16k": { "max_tokens": 16385, @@ -117,6 +163,7 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, + "litellm_provider": "openai", "mode": "chat" }, "gpt-3.5-turbo-16k-0613": { @@ -125,6 +172,7 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, + "litellm_provider": "openai", "mode": "chat" }, "ft:gpt-3.5-turbo": { @@ -133,210 +181,371 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000006, + "litellm_provider": "openai", "mode": "chat" }, "text-embedding-3-large": { "max_tokens": 8191, + "max_input_tokens": 8191, "input_cost_per_token": 0.00000013, "output_cost_per_token": 0.000000, + "litellm_provider": "openai", "mode": "embedding" }, "text-embedding-3-small": { "max_tokens": 8191, + "max_input_tokens": 8191, "input_cost_per_token": 0.00000002, "output_cost_per_token": 0.000000, + "litellm_provider": "openai", "mode": "embedding" }, "text-embedding-ada-002": { "max_tokens": 8191, + "max_input_tokens": 8191, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000, + "litellm_provider": "openai", "mode": "embedding" }, "text-embedding-ada-002-v2": { "max_tokens": 8191, + "max_input_tokens": 8191, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000, + "litellm_provider": "openai", "mode": "embedding" }, + "text-moderation-stable": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 0, + "input_cost_per_token": 0.000000, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "moderations" + }, + "text-moderation-007": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 0, + "input_cost_per_token": 0.000000, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "moderations" + }, + "text-moderation-latest": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 0, + "input_cost_per_token": 0.000000, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "moderations" + }, "256-x-256/dall-e-2": { "mode": "image_generation", "input_cost_per_pixel": 0.00000024414, - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" }, "512-x-512/dall-e-2": { "mode": "image_generation", "input_cost_per_pixel": 0.0000000686, - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" }, "1024-x-1024/dall-e-2": { "mode": "image_generation", "input_cost_per_pixel": 0.000000019, - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" }, "hd/1024-x-1792/dall-e-3": { "mode": "image_generation", "input_cost_per_pixel": 0.00000006539, - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" }, "hd/1792-x-1024/dall-e-3": { "mode": "image_generation", "input_cost_per_pixel": 0.00000006539, - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" }, "hd/1024-x-1024/dall-e-3": { "mode": "image_generation", "input_cost_per_pixel": 0.00000007629, - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" }, "standard/1024-x-1792/dall-e-3": { "mode": "image_generation", "input_cost_per_pixel": 0.00000004359, - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" }, "standard/1792-x-1024/dall-e-3": { "mode": "image_generation", "input_cost_per_pixel": 0.00000004359, - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" }, "standard/1024-x-1024/dall-e-3": { "mode": "image_generation", "input_cost_per_pixel": 0.0000000381469, - "output_cost_per_pixel": 0.0 + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "whisper-1": { + "mode": "audio_transcription", + "input_cost_per_second": 0, + "output_cost_per_second": 0.0001, + "litellm_provider": "openai" + }, + "azure/whisper-1": { + "mode": "audio_transcription", + "input_cost_per_second": 0, + "output_cost_per_second": 0.0001, + "litellm_provider": "azure" + }, + "azure/gpt-4-0125-preview": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true }, "azure/gpt-4-1106-preview": { - "max_tokens": 128000, + "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, - "mode": "chat" + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true }, "azure/gpt-4-0613": { - "max_tokens": 8192, + "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, - "mode": "chat" + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true }, "azure/gpt-4-32k-0613": { - "max_tokens": 32768, + "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, + "litellm_provider": "azure", "mode": "chat" }, "azure/gpt-4-32k": { - "max_tokens": 32768, + "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, + "litellm_provider": "azure", "mode": "chat" }, "azure/gpt-4": { - "max_tokens": 8192, + "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, - "mode": "chat" + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true }, "azure/gpt-4-turbo": { - "max_tokens": 128000, + "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, - "mode": "chat" + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true }, "azure/gpt-4-turbo-vision-preview": { - "max_tokens": 128000, + "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, + "litellm_provider": "azure", "mode": "chat" }, "azure/gpt-35-turbo-16k-0613": { - "max_tokens": 16385, + "max_tokens": 4096, "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, - "mode": "chat" + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true }, "azure/gpt-35-turbo-1106": { - "max_tokens": 16384, + "max_tokens": 4096, "max_input_tokens": 16384, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, - "mode": "chat" + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "azure/gpt-35-turbo-0125": { + "max_tokens": 4096, + "max_input_tokens": 16384, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true }, "azure/gpt-35-turbo-16k": { - "max_tokens": 16385, + "max_tokens": 4096, "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, + "litellm_provider": "azure", "mode": "chat" }, "azure/gpt-35-turbo": { - "max_tokens": 4097, + "max_tokens": 4096, "max_input_tokens": 4097, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, - "mode": "chat" + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true + }, + "azure/gpt-3.5-turbo-instruct-0914": { + "max_tokens": 4097, + "max_input_tokens": 4097, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "text-completion-openai", + "mode": "completion" + }, + "azure/gpt-35-turbo-instruct": { + "max_tokens": 4097, + "max_input_tokens": 4097, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "text-completion-openai", + "mode": "completion" + }, + "azure/mistral-large-latest": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true + }, + "azure/mistral-large-2402": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true }, "azure/ada": { "max_tokens": 8191, + "max_input_tokens": 8191, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000, + "litellm_provider": "azure", "mode": "embedding" }, "azure/text-embedding-ada-002": { "max_tokens": 8191, + "max_input_tokens": 8191, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000, + "litellm_provider": "azure", + "mode": "embedding" + }, + "azure/text-embedding-3-large": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.000000, + "litellm_provider": "azure", "mode": "embedding" }, + "azure/text-embedding-3-small": { + "max_tokens": 8191, + "max_input_tokens": 8191, + "input_cost_per_token": 0.00000002, + "output_cost_per_token": 0.000000, + "litellm_provider": "azure", + "mode": "embedding" + }, "azure/standard/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 0.0000000381469, "output_cost_per_token": 0.0, + "litellm_provider": "azure", "mode": "image_generation" }, "azure/hd/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 0.00000007629, "output_cost_per_token": 0.0, + "litellm_provider": "azure", "mode": "image_generation" }, "azure/standard/1024-x-1792/dall-e-3": { "input_cost_per_pixel": 0.00000004359, "output_cost_per_token": 0.0, + "litellm_provider": "azure", "mode": "image_generation" }, "azure/standard/1792-x-1024/dall-e-3": { "input_cost_per_pixel": 0.00000004359, "output_cost_per_token": 0.0, + "litellm_provider": "azure", "mode": "image_generation" }, "azure/hd/1024-x-1792/dall-e-3": { "input_cost_per_pixel": 0.00000006539, "output_cost_per_token": 0.0, + "litellm_provider": "azure", "mode": "image_generation" }, "azure/hd/1792-x-1024/dall-e-3": { "input_cost_per_pixel": 0.00000006539, "output_cost_per_token": 0.0, + "litellm_provider": "azure", "mode": "image_generation" }, "azure/standard/1024-x-1024/dall-e-2": { "input_cost_per_pixel": 0.0, "output_cost_per_token": 0.0, + "litellm_provider": "azure", "mode": "image_generation" }, "babbage-002": { @@ -345,6 +554,7 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.0000004, "output_cost_per_token": 0.0000004, + "litellm_provider": "text-completion-openai", "mode": "completion" }, "davinci-002": { @@ -353,273 +563,696 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002, + "litellm_provider": "text-completion-openai", "mode": "completion" }, "gpt-3.5-turbo-instruct": { - "max_tokens": 8192, + "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, + "litellm_provider": "text-completion-openai", "mode": "completion" }, + "gpt-3.5-turbo-instruct-0914": { + "max_tokens": 4097, + "max_input_tokens": 8192, + "max_output_tokens": 4097, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "text-completion-openai", + "mode": "completion" + + }, "claude-instant-1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551, + "litellm_provider": "anthropic", "mode": "chat" }, "mistral/mistral-tiny": { - "max_tokens": 8192, + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, "input_cost_per_token": 0.00000015, "output_cost_per_token": 0.00000046, + "litellm_provider": "mistral", "mode": "chat" }, "mistral/mistral-small": { - "max_tokens": 8192, - "input_cost_per_token": 0.00000066, - "output_cost_per_token": 0.00000197, + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "mistral", + "mode": "chat" + }, + "mistral/mistral-small-latest": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "mistral", "mode": "chat" }, "mistral/mistral-medium": { - "max_tokens": 8192, - "input_cost_per_token": 0.00000273, - "output_cost_per_token": 0.00000820, + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000027, + "output_cost_per_token": 0.0000081, + "litellm_provider": "mistral", + "mode": "chat" + }, + "mistral/mistral-medium-latest": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000027, + "output_cost_per_token": 0.0000081, + "litellm_provider": "mistral", "mode": "chat" }, + "mistral/mistral-medium-2312": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000027, + "output_cost_per_token": 0.0000081, + "litellm_provider": "mistral", + "mode": "chat" + }, + "mistral/mistral-large-latest": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true + }, + "mistral/mistral-large-2402": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "mistral", + "mode": "chat", + "supports_function_calling": true + }, "mistral/mistral-embed": { "max_tokens": 8192, + "max_input_tokens": 8192, "input_cost_per_token": 0.000000111, + "litellm_provider": "mistral", "mode": "embedding" }, + "groq/llama2-70b-4096": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000080, + "litellm_provider": "groq", + "mode": "chat" + }, + "groq/mixtral-8x7b-32768": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.00000027, + "output_cost_per_token": 0.00000027, + "litellm_provider": "groq", + "mode": "chat" + }, + "groq/gemma-7b-it": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000010, + "output_cost_per_token": 0.00000010, + "litellm_provider": "groq", + "mode": "chat" + }, "claude-instant-1.2": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000000163, "output_cost_per_token": 0.000000551, + "litellm_provider": "anthropic", "mode": "chat" }, "claude-2": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "anthropic", "mode": "chat" }, "claude-2.1": { - "max_tokens": 200000, + "max_tokens": 8191, + "max_input_tokens": 200000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "anthropic", + "mode": "chat" + }, + "claude-3-haiku-20240307": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, + "litellm_provider": "anthropic", + "mode": "chat" + }, + "claude-3-opus-20240229": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "anthropic", + "mode": "chat" + }, + "claude-3-sonnet-20240229": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "anthropic", "mode": "chat" }, "text-bison": { - "max_tokens": 8192, + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "completion" + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-bison@001": { - "max_tokens": 8192, + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "completion" + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-unicorn": { - "max_tokens": 8192, + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.000028, - "mode": "completion" + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-unicorn@001": { - "max_tokens": 8192, + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.000028, - "mode": "completion" + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "chat-bison": { "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "chat" + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "chat-bison@001": { "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "chat" + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "chat-bison@002": { "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "chat" + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "chat-bison-32k": { - "max_tokens": 32000, + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "chat" + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "code-bison": { - "max_tokens": 6144, + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "chat" + "litellm_provider": "vertex_ai-code-text-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "code-bison@001": { - "max_tokens": 6144, + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "completion" + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "code-gecko@001": { - "max_tokens": 2048, + "max_tokens": 64, + "max_input_tokens": 2048, + "max_output_tokens": 64, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "completion" + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "code-gecko@002": { - "max_tokens": 2048, + "max_tokens": 64, + "max_input_tokens": 2048, + "max_output_tokens": 64, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "completion" + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "code-gecko": { - "max_tokens": 2048, + "max_tokens": 64, + "max_input_tokens": 2048, + "max_output_tokens": 64, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "completion" + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "codechat-bison": { - "max_tokens": 6144, + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "chat" + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "codechat-bison@001": { - "max_tokens": 6144, + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "chat" + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "codechat-bison-32k": { - "max_tokens": 32000, + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "chat" + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "gemini-pro": { - "max_tokens": 30720, - "max_output_tokens": 2048, + "max_tokens": 8192, + "max_input_tokens": 32760, + "max_output_tokens": 8192, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005, - "mode": "chat" + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.0-pro": { + "max_tokens": 8192, + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.0000005, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.5-pro": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.5-pro-preview-0215": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "gemini-pro-vision": { - "max_tokens": 30720, + "max_tokens": 2048, + "max_input_tokens": 16384, "max_output_tokens": 2048, + "max_images_per_prompt": 16, + "max_videos_per_prompt": 1, + "max_video_length": 2, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005, - "mode": "chat" + "litellm_provider": "vertex_ai-vision-models", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.0-pro-vision": { + "max_tokens": 2048, + "max_input_tokens": 16384, + "max_output_tokens": 2048, + "max_images_per_prompt": 16, + "max_videos_per_prompt": 1, + "max_video_length": 2, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.0000005, + "litellm_provider": "vertex_ai-vision-models", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.0-pro-vision-001": { + "max_tokens": 2048, + "max_input_tokens": 16384, + "max_output_tokens": 2048, + "max_images_per_prompt": 16, + "max_videos_per_prompt": 1, + "max_video_length": 2, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.0000005, + "litellm_provider": "vertex_ai-vision-models", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.5-pro-vision": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "max_images_per_prompt": 16, + "max_videos_per_prompt": 1, + "max_video_length": 2, + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-vision-models", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko-multilingual": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko-multilingual@001": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko@001": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "textembedding-gecko@003": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "palm/chat-bison": { "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "chat" + "litellm_provider": "palm", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "palm/chat-bison-001": { "max_tokens": 4096, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "chat" + "litellm_provider": "palm", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "palm/text-bison": { - "max_tokens": 8196, + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "completion" + "litellm_provider": "palm", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "palm/text-bison-001": { - "max_tokens": 8196, + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "completion" + "litellm_provider": "palm", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "palm/text-bison-safety-off": { - "max_tokens": 8196, + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "completion" + "litellm_provider": "palm", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "palm/text-bison-safety-recitation-off": { - "max_tokens": 8196, + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "mode": "completion" + "litellm_provider": "palm", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "gemini/gemini-pro": { - "max_tokens": 30720, - "max_output_tokens": 2048, + "max_tokens": 8192, + "max_input_tokens": 32760, + "max_output_tokens": 8192, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, - "mode": "chat" + "litellm_provider": "gemini", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini/gemini-1.5-pro": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "gemini/gemini-pro-vision": { - "max_tokens": 30720, + "max_tokens": 2048, + "max_input_tokens": 30720, "max_output_tokens": 2048, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, - "mode": "chat" + "litellm_provider": "gemini", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, - "command-nightly": { - "max_tokens": 4096, + "gemini/gemini-1.5-pro-vision": { + "max_tokens": 8192, + "max_input_tokens": 1000000, + "max_output_tokens": 8192, + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_function_calling": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "command-r": { + "max_tokens": 4096, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000050, + "output_cost_per_token": 0.0000015, + "litellm_provider": "cohere_chat", + "mode": "chat", + "supports_function_calling": true + }, + "command-light": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015, - "mode": "completion" + "litellm_provider": "cohere_chat", + "mode": "chat" }, - "command": { - "max_tokens": 4096, + "command-nightly": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015, + "litellm_provider": "cohere", "mode": "completion" }, - "command-light": { - "max_tokens": 4096, + "command": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015, + "litellm_provider": "cohere", "mode": "completion" }, "command-medium-beta": { - "max_tokens": 4096, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015, + "litellm_provider": "cohere", "mode": "completion" }, "command-xlarge-beta": { - "max_tokens": 4096, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015, + "litellm_provider": "cohere", "mode": "completion" }, + "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000, + "output_cost_per_token": 0.0000, + "litellm_provider": "replicate", + "mode": "chat" + }, "openrouter/openai/gpt-3.5-turbo": { "max_tokens": 4095, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/openai/gpt-3.5-turbo-16k": { "max_tokens": 16383, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/openai/gpt-4": { "max_tokens": 8192, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/anthropic/claude-instant-v1": { @@ -627,6 +1260,7 @@ "max_output_tokens": 8191, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/anthropic/claude-2": { @@ -634,933 +1268,1452 @@ "max_output_tokens": 8191, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/google/palm-2-chat-bison": { "max_tokens": 8000, "input_cost_per_token": 0.0000005, "output_cost_per_token": 0.0000005, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/google/palm-2-codechat-bison": { "max_tokens": 8000, "input_cost_per_token": 0.0000005, "output_cost_per_token": 0.0000005, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/meta-llama/llama-2-13b-chat": { "max_tokens": 4096, "input_cost_per_token": 0.0000002, "output_cost_per_token": 0.0000002, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/meta-llama/llama-2-70b-chat": { "max_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.0000015, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/meta-llama/codellama-34b-instruct": { "max_tokens": 8096, "input_cost_per_token": 0.0000005, "output_cost_per_token": 0.0000005, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/nousresearch/nous-hermes-llama2-13b": { "max_tokens": 4096, "input_cost_per_token": 0.0000002, "output_cost_per_token": 0.0000002, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/mancer/weaver": { "max_tokens": 8000, "input_cost_per_token": 0.000005625, "output_cost_per_token": 0.000005625, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/gryphe/mythomax-l2-13b": { "max_tokens": 8192, "input_cost_per_token": 0.000001875, "output_cost_per_token": 0.000001875, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/jondurbin/airoboros-l2-70b-2.1": { "max_tokens": 4096, "input_cost_per_token": 0.000013875, "output_cost_per_token": 0.000013875, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/undi95/remm-slerp-l2-13b": { "max_tokens": 6144, "input_cost_per_token": 0.000001875, "output_cost_per_token": 0.000001875, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/pygmalionai/mythalion-13b": { "max_tokens": 4096, "input_cost_per_token": 0.000001875, "output_cost_per_token": 0.000001875, + "litellm_provider": "openrouter", "mode": "chat" }, "openrouter/mistralai/mistral-7b-instruct": { - "max_tokens": 4096, + "max_tokens": 8192, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "openrouter", + "mode": "chat" + }, + "openrouter/mistralai/mistral-7b-instruct:free": { + "max_tokens": 8192, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, + "litellm_provider": "openrouter", "mode": "chat" }, "j2-ultra": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015, + "litellm_provider": "ai21", "mode": "completion" }, "j2-mid": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00001, + "litellm_provider": "ai21", "mode": "completion" }, "j2-light": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000003, + "litellm_provider": "ai21", "mode": "completion" }, "dolphin": { - "max_tokens": 4096, - "input_cost_per_token": 0.00002, - "output_cost_per_token": 0.00002, + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "nlp_cloud", "mode": "completion" }, "chatdolphin": { - "max_tokens": 4096, - "input_cost_per_token": 0.00002, - "output_cost_per_token": 0.00002, + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "nlp_cloud", "mode": "chat" }, "luminous-base": { "max_tokens": 2048, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.000033, + "litellm_provider": "aleph_alpha", "mode": "completion" }, "luminous-base-control": { "max_tokens": 2048, "input_cost_per_token": 0.0000375, "output_cost_per_token": 0.00004125, + "litellm_provider": "aleph_alpha", "mode": "chat" }, "luminous-extended": { "max_tokens": 2048, "input_cost_per_token": 0.000045, "output_cost_per_token": 0.0000495, + "litellm_provider": "aleph_alpha", "mode": "completion" }, "luminous-extended-control": { "max_tokens": 2048, "input_cost_per_token": 0.00005625, "output_cost_per_token": 0.000061875, + "litellm_provider": "aleph_alpha", "mode": "chat" }, "luminous-supreme": { "max_tokens": 2048, "input_cost_per_token": 0.000175, "output_cost_per_token": 0.0001925, + "litellm_provider": "aleph_alpha", "mode": "completion" }, "luminous-supreme-control": { "max_tokens": 2048, "input_cost_per_token": 0.00021875, "output_cost_per_token": 0.000240625, + "litellm_provider": "aleph_alpha", "mode": "chat" }, "ai21.j2-mid-v1": { "max_tokens": 8191, + "max_input_tokens": 8191, + "max_output_tokens": 8191, "input_cost_per_token": 0.0000125, "output_cost_per_token": 0.0000125, + "litellm_provider": "bedrock", "mode": "chat" }, "ai21.j2-ultra-v1": { "max_tokens": 8191, + "max_input_tokens": 8191, + "max_output_tokens": 8191, "input_cost_per_token": 0.0000188, "output_cost_per_token": 0.0000188, + "litellm_provider": "bedrock", "mode": "chat" }, "amazon.titan-text-lite-v1": { - "max_tokens": 8000, + "max_tokens": 4000, + "max_input_tokens": 42000, + "max_output_tokens": 4000, "input_cost_per_token": 0.0000003, "output_cost_per_token": 0.0000004, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "amazon.titan-text-express-v1": { + "max_tokens": 8000, + "max_input_tokens": 42000, + "max_output_tokens": 8000, + "input_cost_per_token": 0.0000013, + "output_cost_per_token": 0.0000017, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "amazon.titan-embed-text-v1": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "output_vector_size": 1536, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0, + "litellm_provider": "bedrock", + "mode": "embedding" + }, + "mistral.mistral-7b-instruct-v0:2": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000002, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "mistral.mixtral-8x7b-instruct": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000045, + "output_cost_per_token": 0.0000007, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/mistral.mixtral-8x7b-instruct": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000045, + "output_cost_per_token": 0.0000007, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/mistral.mistral-7b-instruct": { + "max_tokens": 8191, + "max_input_tokens": 32000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000002, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "anthropic.claude-3-sonnet-20240229-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "bedrock", "mode": "chat" }, - "amazon.titan-text-express-v1": { - "max_tokens": 8000, - "input_cost_per_token": 0.0000013, - "output_cost_per_token": 0.0000017, + "anthropic.claude-3-haiku-20240307-v1:0": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, + "litellm_provider": "bedrock", "mode": "chat" }, - "amazon.titan-embed-text-v1": { - "max_tokens": 8192, - "output_vector_size": 1536, - "input_cost_per_token": 0.0000001, - "output_cost_per_token": 0.0, - "mode": "embedding" - }, "anthropic.claude-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-east-1/anthropic.claude-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-west-2/anthropic.claude-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/ap-northeast-1/anthropic.claude-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.0455, "output_cost_per_second": 0.0455, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.02527, "output_cost_per_second": 0.02527, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/eu-central-1/anthropic.claude-v1": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.0415, "output_cost_per_second": 0.0415, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v1": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.02305, "output_cost_per_second": 0.02305, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-east-1/1-month-commitment/anthropic.claude-v1": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.0175, "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-east-1/6-month-commitment/anthropic.claude-v1": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.00972, "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-west-2/1-month-commitment/anthropic.claude-v1": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.0175, "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-west-2/6-month-commitment/anthropic.claude-v1": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.00972, "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", "mode": "chat" }, "anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-east-1/anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-west-2/anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/ap-northeast-1/anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.0455, "output_cost_per_second": 0.0455, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.02527, "output_cost_per_second": 0.02527, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/eu-central-1/anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.0415, "output_cost_per_second": 0.0415, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.02305, "output_cost_per_second": 0.02305, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.0175, "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.00972, "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.0175, "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2": { - "max_tokens": 100000, - "max_output_tokens": 8191, + "max_tokens": 8191, + "max_input_tokens": 100000, + "max_output_tokens": 8191, "input_cost_per_second": 0.00972, "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", "mode": "chat" }, "anthropic.claude-v2:1": { - "max_tokens": 200000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-east-1/anthropic.claude-v2:1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-west-2/anthropic.claude-v2:1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/ap-northeast-1/anthropic.claude-v2:1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2:1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.0455, "output_cost_per_second": 0.0455, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2:1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.02527, "output_cost_per_second": 0.02527, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/eu-central-1/anthropic.claude-v2:1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2:1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.0415, "output_cost_per_second": 0.0415, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2:1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.02305, "output_cost_per_second": 0.02305, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2:1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.0175, "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2:1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.00972, "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2:1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.0175, "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2:1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.00972, "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", "mode": "chat" }, "anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-east-1/anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.0000008, "output_cost_per_token": 0.0000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.011, "output_cost_per_second": 0.011, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-east-1/6-month-commitment/anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.00611, "output_cost_per_second": 0.00611, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-west-2/1-month-commitment/anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.011, "output_cost_per_second": 0.011, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-west-2/6-month-commitment/anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.00611, "output_cost_per_second": 0.00611, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/us-west-2/anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.0000008, "output_cost_per_token": 0.0000024, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/ap-northeast-1/anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.00000223, "output_cost_per_token": 0.00000755, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.01475, "output_cost_per_second": 0.01475, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.008194, "output_cost_per_second": 0.008194, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/eu-central-1/anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.00000248, "output_cost_per_token": 0.00000838, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.01635, "output_cost_per_second": 0.01635, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/eu-central-1/6-month-commitment/anthropic.claude-instant-v1": { - "max_tokens": 100000, + "max_tokens": 8191, + "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_second": 0.009083, "output_cost_per_second": 0.009083, + "litellm_provider": "bedrock", "mode": "chat" }, "cohere.command-text-v14": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.0000020, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/*/1-month-commitment/cohere.command-text-v14": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_second": 0.011, "output_cost_per_second": 0.011, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/*/6-month-commitment/cohere.command-text-v14": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_second": 0.0066027, "output_cost_per_second": 0.0066027, + "litellm_provider": "bedrock", "mode": "chat" }, "cohere.command-light-text-v14": { - "max_tokens": 4000, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000003, "output_cost_per_token": 0.0000006, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/*/1-month-commitment/cohere.command-light-text-v14": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_second": 0.001902, "output_cost_per_second": 0.001902, + "litellm_provider": "bedrock", "mode": "chat" }, "bedrock/*/6-month-commitment/cohere.command-light-text-v14": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_second": 0.0011416, "output_cost_per_second": 0.0011416, + "litellm_provider": "bedrock", "mode": "chat" }, "cohere.embed-english-v3": { "max_tokens": 512, + "max_input_tokens": 512, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000, + "litellm_provider": "bedrock", "mode": "embedding" }, "cohere.embed-multilingual-v3": { "max_tokens": 512, + "max_input_tokens": 512, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000, + "litellm_provider": "bedrock", "mode": "embedding" }, "meta.llama2-13b-chat-v1": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000075, "output_cost_per_token": 0.000001, + "litellm_provider": "bedrock", "mode": "chat" }, "meta.llama2-70b-chat-v1": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000195, "output_cost_per_token": 0.00000256, + "litellm_provider": "bedrock", "mode": "chat" }, + "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.018, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "512-x-512/max-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.036, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "max-x-max/50-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.036, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "max-x-max/max-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.072, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "1024-x-1024/50-steps/stability.stable-diffusion-xl-v1": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.04, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "1024-x-1024/max-steps/stability.stable-diffusion-xl-v1": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.08, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, "sagemaker/meta-textgeneration-llama-2-7b": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000, "output_cost_per_token": 0.000, + "litellm_provider": "sagemaker", "mode": "completion" }, "sagemaker/meta-textgeneration-llama-2-7b-f": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000, "output_cost_per_token": 0.000, + "litellm_provider": "sagemaker", "mode": "chat" }, "sagemaker/meta-textgeneration-llama-2-13b": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000, "output_cost_per_token": 0.000, + "litellm_provider": "sagemaker", "mode": "completion" }, "sagemaker/meta-textgeneration-llama-2-13b-f": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000, "output_cost_per_token": 0.000, + "litellm_provider": "sagemaker", "mode": "chat" }, "sagemaker/meta-textgeneration-llama-2-70b": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000, "output_cost_per_token": 0.000, + "litellm_provider": "sagemaker", "mode": "completion" }, "sagemaker/meta-textgeneration-llama-2-70b-b-f": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000, "output_cost_per_token": 0.000, + "litellm_provider": "sagemaker", "mode": "chat" }, "together-ai-up-to-3b": { "input_cost_per_token": 0.0000001, - "output_cost_per_token": 0.0000001 + "output_cost_per_token": 0.0000001, + "litellm_provider": "together_ai" }, "together-ai-3.1b-7b": { "input_cost_per_token": 0.0000002, - "output_cost_per_token": 0.0000002 + "output_cost_per_token": 0.0000002, + "litellm_provider": "together_ai" }, "together-ai-7.1b-20b": { "max_tokens": 1000, "input_cost_per_token": 0.0000004, - "output_cost_per_token": 0.0000004 + "output_cost_per_token": 0.0000004, + "litellm_provider": "together_ai" }, "together-ai-20.1b-40b": { "input_cost_per_token": 0.0000008, - "output_cost_per_token": 0.0000008 + "output_cost_per_token": 0.0000008, + "litellm_provider": "together_ai" }, "together-ai-40.1b-70b": { "input_cost_per_token": 0.0000009, - "output_cost_per_token": 0.0000009 + "output_cost_per_token": 0.0000009, + "litellm_provider": "together_ai" + }, + "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000006, + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "together_ai/mistralai/Mistral-7B-Instruct-v0.1": { + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true + }, + "together_ai/togethercomputer/CodeLlama-34b-Instruct": { + "litellm_provider": "together_ai", + "supports_function_calling": true, + "supports_parallel_function_calling": true }, "ollama/llama2": { - "max_tokens": 4096, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, + "litellm_provider": "ollama", "mode": "completion" }, "ollama/llama2:13b": { - "max_tokens": 4096, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, + "litellm_provider": "ollama", "mode": "completion" }, "ollama/llama2:70b": { - "max_tokens": 4096, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, + "litellm_provider": "ollama", "mode": "completion" }, "ollama/llama2-uncensored": { - "max_tokens": 4096, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, + "litellm_provider": "ollama", "mode": "completion" }, "ollama/mistral": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, + "litellm_provider": "ollama", "mode": "completion" }, "ollama/codellama": { - "max_tokens": 4096, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, + "litellm_provider": "ollama", "mode": "completion" }, "ollama/orca-mini": { - "max_tokens": 4096, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, + "litellm_provider": "ollama", "mode": "completion" }, "ollama/vicuna": { "max_tokens": 2048, + "max_input_tokens": 2048, + "max_output_tokens": 2048, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, + "litellm_provider": "ollama", "mode": "completion" }, "deepinfra/lizpreciatior/lzlv_70b_fp16_hf": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000070, "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/Gryphe/MythoMax-L2-13b": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000022, "output_cost_per_token": 0.00000022, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/mistralai/Mistral-7B-Instruct-v0.1": { - "max_tokens": 32768, + "max_tokens": 8191, + "max_input_tokens": 32768, + "max_output_tokens": 8191, "input_cost_per_token": 0.00000013, "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/meta-llama/Llama-2-70b-chat-hf": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000070, "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/cognitivecomputations/dolphin-2.6-mixtral-8x7b": { - "max_tokens": 32768, + "max_tokens": 8191, + "max_input_tokens": 32768, + "max_output_tokens": 8191, "input_cost_per_token": 0.00000027, "output_cost_per_token": 0.00000027, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/codellama/CodeLlama-34b-Instruct-hf": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000060, "output_cost_per_token": 0.00000060, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/deepinfra/mixtral": { "max_tokens": 4096, + "max_input_tokens": 32000, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000027, "output_cost_per_token": 0.00000027, + "litellm_provider": "deepinfra", "mode": "completion" }, "deepinfra/Phind/Phind-CodeLlama-34B-v2": { "max_tokens": 4096, + "max_input_tokens": 16384, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000060, "output_cost_per_token": 0.00000060, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { - "max_tokens": 32768, + "max_tokens": 8191, + "max_input_tokens": 32768, + "max_output_tokens": 8191, "input_cost_per_token": 0.00000027, "output_cost_per_token": 0.00000027, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/deepinfra/airoboros-70b": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000070, "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/01-ai/Yi-34B-Chat": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000060, "output_cost_per_token": 0.00000060, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/01-ai/Yi-6B-200K": { "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000013, "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", "mode": "completion" }, "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000070, "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/meta-llama/Llama-2-13b-chat-hf": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000022, "output_cost_per_token": 0.00000022, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/amazon/MistralLite": { - "max_tokens": 32768, + "max_tokens": 8191, + "max_input_tokens": 32768, + "max_output_tokens": 8191, "input_cost_per_token": 0.00000020, "output_cost_per_token": 0.00000020, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/meta-llama/Llama-2-7b-chat-hf": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000013, "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/01-ai/Yi-34B-200K": { "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000060, "output_cost_per_token": 0.00000060, + "litellm_provider": "deepinfra", "mode": "completion" }, "deepinfra/openchat/openchat_3.5": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000013, "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", "mode": "chat" }, - "perplexity/pplx-7b-chat": { - "max_tokens": 8192, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, + "perplexity/codellama-34b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000035, + "output_cost_per_token": 0.00000140, + "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/pplx-70b-chat": { - "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, + }, + "perplexity/codellama-70b-instruct": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000280, + "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/pplx-7b-online": { - "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.0005, + }, + "perplexity/pplx-7b-chat": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, + "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/pplx-70b-online": { - "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.0005, + }, + "perplexity/pplx-70b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000280, + "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/llama-2-13b-chat": { - "max_tokens": 4096, + }, + "perplexity/pplx-7b-online": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, + "output_cost_per_token": 0.00000028, + "input_cost_per_request": 0.005, + "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/llama-2-70b-chat": { - "max_tokens": 4096, + }, + "perplexity/pplx-70b-online": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, + "output_cost_per_token": 0.00000280, + "input_cost_per_request": 0.005, + "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/mistral-7b-instruct": { - "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, + }, + "perplexity/llama-2-70b-chat": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000280, + "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/replit-code-v1.5-3b": { - "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, + }, + "perplexity/mistral-7b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, + "litellm_provider": "perplexity", "mode": "chat" - }, + }, + "perplexity/mixtral-8x7b-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/sonar-small-chat": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/sonar-small-online": { + "max_tokens": 12000, + "max_input_tokens": 12000, + "max_output_tokens": 12000, + "input_cost_per_token": 0, + "output_cost_per_token": 0.00000028, + "input_cost_per_request": 0.005, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/sonar-medium-chat": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.0000006, + "output_cost_per_token": 0.0000018, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/sonar-medium-online": { + "max_tokens": 12000, + "max_input_tokens": 12000, + "max_output_tokens": 12000, + "input_cost_per_token": 0, + "output_cost_per_token": 0.0000018, + "input_cost_per_request": 0.005, + "litellm_provider": "perplexity", + "mode": "chat" + }, "anyscale/mistralai/Mistral-7B-Instruct-v0.1": { - "max_tokens": 16384, + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, "input_cost_per_token": 0.00000015, "output_cost_per_token": 0.00000015, - "mode": "chat" + "litellm_provider": "anyscale", + "mode": "chat", + "supports_function_calling": true + }, + "anyscale/Mixtral-8x7B-Instruct-v0.1": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "anyscale", + "mode": "chat", + "supports_function_calling": true }, "anyscale/HuggingFaceH4/zephyr-7b-beta": { - "max_tokens": 16384, + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 16384, "input_cost_per_token": 0.00000015, "output_cost_per_token": 0.00000015, + "litellm_provider": "anyscale", "mode": "chat" }, "anyscale/meta-llama/Llama-2-7b-chat-hf": { - "max_tokens": 4096, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000015, "output_cost_per_token": 0.00000015, + "litellm_provider": "anyscale", "mode": "chat" }, "anyscale/meta-llama/Llama-2-13b-chat-hf": { - "max_tokens": 4096, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.00000025, + "litellm_provider": "anyscale", "mode": "chat" }, "anyscale/meta-llama/Llama-2-70b-chat-hf": { - "max_tokens": 4096, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000001, + "litellm_provider": "anyscale", "mode": "chat" }, "anyscale/codellama/CodeLlama-34b-Instruct-hf": { - "max_tokens": 16384, + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000001, + "litellm_provider": "anyscale", "mode": "chat" }, "cloudflare/@cf/meta/llama-2-7b-chat-fp16": { "max_tokens": 3072, + "max_input_tokens": 3072, + "max_output_tokens": 3072, "input_cost_per_token": 0.000001923, "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", "mode": "chat" }, "cloudflare/@cf/meta/llama-2-7b-chat-int8": { "max_tokens": 2048, + "max_input_tokens": 2048, + "max_output_tokens": 2048, "input_cost_per_token": 0.000001923, "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", "mode": "chat" }, "cloudflare/@cf/mistral/mistral-7b-instruct-v0.1": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, "input_cost_per_token": 0.000001923, "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", "mode": "chat" }, "cloudflare/@hf/thebloke/codellama-7b-instruct-awq": { "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, "input_cost_per_token": 0.000001923, "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", "mode": "chat" }, "voyage/voyage-01": { "max_tokens": 4096, + "max_input_tokens": 4096, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", "mode": "embedding" }, "voyage/voyage-lite-01": { "max_tokens": 4096, + "max_input_tokens": 4096, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", "mode": "embedding" } -} +} \ No newline at end of file From 45daf11cee735f59cd1bfb37582bbd9fee870294 Mon Sep 17 00:00:00 2001 From: reibs Date: Mon, 25 Mar 2024 15:58:42 -0700 Subject: [PATCH 03/10] added fetch cost function --- tokencost/constants.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/tokencost/constants.py b/tokencost/constants.py index 8229a79..8049047 100644 --- a/tokencost/constants.py +++ b/tokencost/constants.py @@ -23,14 +23,19 @@ # Fetch the latest prices using urllib.request PRICES_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json" -try: - with urlopen(PRICES_URL) as response: - if response.status == 200: - TOKEN_COSTS = json.loads(response.read()) - else: - raise Exception("Failed to fetch token costs, status code: {}".format(response.status)) -except Exception: - # If fetching fails, use the local model_prices.json as a fallback - print('Unable to fetch token costs, using local model_prices.json as fallback. Prices may have changed since the last update.') - with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f: - TOKEN_COSTS = json.load(f) + +def fetch_costs(): + try: + with urlopen(PRICES_URL) as response: + if response.status == 200: + return json.loads(response.read()) + else: + raise Exception("Failed to fetch token costs, status code: {}".format(response.status)) + except Exception: + # If fetching fails, use the local model_prices.json as a fallback + print('Unable to fetch token costs, using local model_prices.json as fallback. Prices may have changed since the last update.') + with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f: + return json.load(f) + + +TOKEN_COSTS = fetch_costs() From ed479c3c93f4b5e46df624a44c5e19d93abe84b0 Mon Sep 17 00:00:00 2001 From: reibs Date: Mon, 25 Mar 2024 16:56:07 -0700 Subject: [PATCH 04/10] update tests --- tests/test_costs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_costs.py b/tests/test_costs.py index f417430..650422d 100644 --- a/tests/test_costs.py +++ b/tests/test_costs.py @@ -205,7 +205,7 @@ def test_calculate_invalid_input_types(): with pytest.raises(KeyError): calculate_completion_cost(STRING, model="invalid_model") - with pytest.raises(TypeError): + with pytest.raises(KeyError): # Message objects not allowed, must be list of message objects. calculate_prompt_cost(MESSAGES[0], model="invalid_model") From ee406c4f1ae9549fa2277d9b70c456040a709082 Mon Sep 17 00:00:00 2001 From: reibs Date: Tue, 26 Mar 2024 15:38:22 -0700 Subject: [PATCH 05/10] added static costs --- tokencost/__init__.py | 2 +- tokencost/constants.py | 25 ++++++++++++++----------- tokencost/costs.py | 11 ++++++----- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/tokencost/__init__.py b/tokencost/__init__.py index ceb87e7..5ad7f99 100644 --- a/tokencost/__init__.py +++ b/tokencost/__init__.py @@ -4,4 +4,4 @@ calculate_completion_cost, calculate_prompt_cost, ) -from .constants import TOKEN_COSTS +from .constants import TOKEN_COSTS_STATIC, TOKEN_COSTS_STATIC diff --git a/tokencost/constants.py b/tokencost/constants.py index 8049047..2fef1e3 100644 --- a/tokencost/constants.py +++ b/tokencost/constants.py @@ -25,17 +25,20 @@ def fetch_costs(): - try: - with urlopen(PRICES_URL) as response: - if response.status == 200: - return json.loads(response.read()) - else: - raise Exception("Failed to fetch token costs, status code: {}".format(response.status)) - except Exception: - # If fetching fails, use the local model_prices.json as a fallback - print('Unable to fetch token costs, using local model_prices.json as fallback. Prices may have changed since the last update.') - with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f: - return json.load(f) + """Fetch the latest token costs from the LiteLLM cost tracker. + Returns: + dict: The token costs for each model. + Raises: + Exception: If the request fails. + """ + with urlopen(PRICES_URL) as response: + if response.status == 200: + return json.loads(response.read()) + else: + raise Exception("Failed to fetch token costs, status code: {}".format(response.status)) TOKEN_COSTS = fetch_costs() + +with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f: + TOKEN_COSTS_STATIC = json.load(f) diff --git a/tokencost/costs.py b/tokencost/costs.py index f3d8c1c..81dc0db 100644 --- a/tokencost/costs.py +++ b/tokencost/costs.py @@ -3,7 +3,7 @@ """ import tiktoken from typing import Union, List, Dict -from .constants import TOKEN_COSTS +from .constants import TOKEN_COSTS_STATIC from decimal import Decimal @@ -22,6 +22,7 @@ def strip_ft_model_name(model: str) -> str: model = "ft:gpt-3.5-turbo" return model + def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int: """ Return the total number of tokens in a prompt's messages. @@ -114,14 +115,14 @@ def calculate_cost_by_tokens(num_tokens: int, model: str, token_type: str) -> De Decimal: The calculated cost in USD. """ model = model.lower() - if model not in TOKEN_COSTS: + if model not in TOKEN_COSTS_STATIC: raise KeyError( f"""Model {model} is not implemented. Double-check your spelling, or submit an issue/PR""" ) cost_per_token_key = 'input_cost_per_token' if token_type == 'input' else 'output_cost_per_token' - cost_per_token = TOKEN_COSTS[model][cost_per_token_key] + cost_per_token = TOKEN_COSTS_STATIC[model][cost_per_token_key] return Decimal(str(cost_per_token)) * Decimal(num_tokens) @@ -149,7 +150,7 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal """ model = model.lower() model = strip_ft_model_name(model) - if model not in TOKEN_COSTS: + if model not in TOKEN_COSTS_STATIC: raise KeyError( f"""Model {model} is not implemented. Double-check your spelling, or submit an issue/PR""" @@ -186,7 +187,7 @@ def calculate_completion_cost(completion: str, model: str) -> Decimal: Decimal('0.000014') """ model = strip_ft_model_name(model) - if model not in TOKEN_COSTS: + if model not in TOKEN_COSTS_STATIC: raise KeyError( f"""Model {model} is not implemented. Double-check your spelling, or submit an issue/PR""" From 86c449607897500f12ad5421c8578ec24f84f5cb Mon Sep 17 00:00:00 2001 From: reibs Date: Tue, 26 Mar 2024 15:40:27 -0700 Subject: [PATCH 06/10] revert var --- tokencost/costs.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tokencost/costs.py b/tokencost/costs.py index 81dc0db..7b4e7e3 100644 --- a/tokencost/costs.py +++ b/tokencost/costs.py @@ -3,7 +3,7 @@ """ import tiktoken from typing import Union, List, Dict -from .constants import TOKEN_COSTS_STATIC +from .constants import TOKEN_COSTS from decimal import Decimal @@ -115,14 +115,14 @@ def calculate_cost_by_tokens(num_tokens: int, model: str, token_type: str) -> De Decimal: The calculated cost in USD. """ model = model.lower() - if model not in TOKEN_COSTS_STATIC: + if model not in TOKEN_COSTS: raise KeyError( f"""Model {model} is not implemented. Double-check your spelling, or submit an issue/PR""" ) cost_per_token_key = 'input_cost_per_token' if token_type == 'input' else 'output_cost_per_token' - cost_per_token = TOKEN_COSTS_STATIC[model][cost_per_token_key] + cost_per_token = TOKEN_COSTS[model][cost_per_token_key] return Decimal(str(cost_per_token)) * Decimal(num_tokens) @@ -150,7 +150,7 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal """ model = model.lower() model = strip_ft_model_name(model) - if model not in TOKEN_COSTS_STATIC: + if model not in TOKEN_COSTS: raise KeyError( f"""Model {model} is not implemented. Double-check your spelling, or submit an issue/PR""" @@ -187,7 +187,7 @@ def calculate_completion_cost(completion: str, model: str) -> Decimal: Decimal('0.000014') """ model = strip_ft_model_name(model) - if model not in TOKEN_COSTS_STATIC: + if model not in TOKEN_COSTS: raise KeyError( f"""Model {model} is not implemented. Double-check your spelling, or submit an issue/PR""" From e7368e62de926713d6ac564cfc1994cdb2590077 Mon Sep 17 00:00:00 2001 From: reibs Date: Tue, 26 Mar 2024 15:59:46 -0700 Subject: [PATCH 07/10] async updater function added --- tokencost/constants.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/tokencost/constants.py b/tokencost/constants.py index 2fef1e3..66491d3 100644 --- a/tokencost/constants.py +++ b/tokencost/constants.py @@ -1,6 +1,7 @@ import os import json -from urllib.request import urlopen +import aiohttp +import asyncio """ Prompt (aka context) tokens are based on number of words + other chars (eg spaces and punctuation) in input. @@ -20,25 +21,38 @@ # Each completion token costs __ USD per token. # Max prompt limit of each model is __ tokens. -# Fetch the latest prices using urllib.request PRICES_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json" -def fetch_costs(): - """Fetch the latest token costs from the LiteLLM cost tracker. +async def fetch_costs(): + """Fetch the latest token costs from the LiteLLM cost tracker asynchronously. Returns: dict: The token costs for each model. Raises: Exception: If the request fails. """ - with urlopen(PRICES_URL) as response: - if response.status == 200: - return json.loads(response.read()) - else: - raise Exception("Failed to fetch token costs, status code: {}".format(response.status)) + async with aiohttp.ClientSession() as session: + async with session.get(PRICES_URL) as response: + if response.status == 200: + return await response.json(content_type=None) + else: + raise Exception(f"Failed to fetch token costs, status code: {response.status}") +TOKEN_COSTS = asyncio.run(fetch_costs()) -TOKEN_COSTS = fetch_costs() + +async def update_token_costs(): + """Update the TOKEN_COSTS dictionary with the latest costs from the LiteLLM cost tracker asynchronously.""" + global TOKEN_COSTS + try: + TOKEN_COSTS = await fetch_costs() + print("TOKEN_COSTS updated successfully.") + except Exception as e: + print(f"Failed to update TOKEN_COSTS: {e}") + + +# Ensure TOKEN_COSTS is up to date when the module is loaded +asyncio.run(update_token_costs()) with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f: TOKEN_COSTS_STATIC = json.load(f) From 248760f9d68cfb3e23e6dd3002ead4e35b0cd94e Mon Sep 17 00:00:00 2001 From: reibs Date: Tue, 26 Mar 2024 16:05:37 -0700 Subject: [PATCH 08/10] update static def --- tokencost/constants.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tokencost/constants.py b/tokencost/constants.py index 66491d3..f80d350 100644 --- a/tokencost/constants.py +++ b/tokencost/constants.py @@ -38,8 +38,6 @@ async def fetch_costs(): else: raise Exception(f"Failed to fetch token costs, status code: {response.status}") -TOKEN_COSTS = asyncio.run(fetch_costs()) - async def update_token_costs(): """Update the TOKEN_COSTS dictionary with the latest costs from the LiteLLM cost tracker asynchronously.""" @@ -50,9 +48,10 @@ async def update_token_costs(): except Exception as e: print(f"Failed to update TOKEN_COSTS: {e}") +with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f: + TOKEN_COSTS_STATIC = json.load(f) + +TOKEN_COSTS = TOKEN_COSTS_STATIC # Ensure TOKEN_COSTS is up to date when the module is loaded asyncio.run(update_token_costs()) - -with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f: - TOKEN_COSTS_STATIC = json.load(f) From c529dc5877ac0f98e37da584f7c74eab950953b1 Mon Sep 17 00:00:00 2001 From: reibs Date: Tue, 26 Mar 2024 16:09:23 -0700 Subject: [PATCH 09/10] added clearer load on start --- tokencost/constants.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tokencost/constants.py b/tokencost/constants.py index f80d350..e38abbd 100644 --- a/tokencost/constants.py +++ b/tokencost/constants.py @@ -51,7 +51,10 @@ async def update_token_costs(): with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f: TOKEN_COSTS_STATIC = json.load(f) -TOKEN_COSTS = TOKEN_COSTS_STATIC # Ensure TOKEN_COSTS is up to date when the module is loaded -asyncio.run(update_token_costs()) +try: + asyncio.run(update_token_costs()) +except Exception: + print('Failed to update token costs. Using static costs.') + TOKEN_COSTS = TOKEN_COSTS_STATIC From ebcc4d20ebcd6be43c6055270a1254f244eb9fcf Mon Sep 17 00:00:00 2001 From: reibs Date: Tue, 26 Mar 2024 16:26:59 -0700 Subject: [PATCH 10/10] added exception logger --- tokencost/constants.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tokencost/constants.py b/tokencost/constants.py index e38abbd..144a584 100644 --- a/tokencost/constants.py +++ b/tokencost/constants.py @@ -2,6 +2,7 @@ import json import aiohttp import asyncio +import logging """ Prompt (aka context) tokens are based on number of words + other chars (eg spaces and punctuation) in input. @@ -46,7 +47,7 @@ async def update_token_costs(): TOKEN_COSTS = await fetch_costs() print("TOKEN_COSTS updated successfully.") except Exception as e: - print(f"Failed to update TOKEN_COSTS: {e}") + logging.error(f"Failed to update TOKEN_COSTS: {e}") with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f: TOKEN_COSTS_STATIC = json.load(f) @@ -56,5 +57,5 @@ async def update_token_costs(): try: asyncio.run(update_token_costs()) except Exception: - print('Failed to update token costs. Using static costs.') + logging.error('Failed to update token costs. Using static costs.') TOKEN_COSTS = TOKEN_COSTS_STATIC