From 2de2b7988c0dc51d39951e1e1aead8f6d48f0d1b Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 09:29:12 -0800 Subject: [PATCH 01/17] fix(health.md): add rerank model health check information --- docs/my-website/docs/proxy/health.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/my-website/docs/proxy/health.md b/docs/my-website/docs/proxy/health.md index ffc66dde811c..585e2ff7505c 100644 --- a/docs/my-website/docs/proxy/health.md +++ b/docs/my-website/docs/proxy/health.md @@ -121,6 +121,20 @@ model_list: mode: audio_speech ``` +### Rerank Models + +To run rerank health checks, specify the mode as "rerank" in your config for the relevant model. + +```yaml +model_list: + - model_name: rerank-english-v3.0 + litellm_params: + model: cohere/rerank-english-v3.0 + api_key: os.environ/COHERE_API_KEY + model_info: + mode: rerank +``` + ### Batch Models (Azure Only) For Azure models deployed as 'batch' models, set `mode: batch`. From abaea848f4686686fb9012cb491ecb30d85c4770 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 09:48:40 -0800 Subject: [PATCH 02/17] build(model_prices_and_context_window.json): add gemini 2.0 for google ai studio - pricing + commercial rate limits --- ...odel_prices_and_context_window_backup.json | 34 +++++++++++++++++++ model_prices_and_context_window.json | 34 +++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index a607bfbc6ffc..92f3e1c5bd0e 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -3098,6 +3098,40 @@ "supports_response_schema": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" }, + "gemini/gemini-2.0-flash-exp": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_token": 0, + "input_cost_per_character": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_character": 0, + "output_cost_per_token_above_128k_tokens": 0, + "output_cost_per_character_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 10, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" + }, "vertex_ai/claude-3-sonnet": { "max_tokens": 4096, "max_input_tokens": 200000, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index a607bfbc6ffc..92f3e1c5bd0e 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -3098,6 +3098,40 @@ "supports_response_schema": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" }, + "gemini/gemini-2.0-flash-exp": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_image": 0, + "input_cost_per_video_per_second": 0, + "input_cost_per_audio_per_second": 0, + "input_cost_per_token": 0, + "input_cost_per_character": 0, + "input_cost_per_token_above_128k_tokens": 0, + "input_cost_per_character_above_128k_tokens": 0, + "input_cost_per_image_above_128k_tokens": 0, + "input_cost_per_video_per_second_above_128k_tokens": 0, + "input_cost_per_audio_per_second_above_128k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_character": 0, + "output_cost_per_token_above_128k_tokens": 0, + "output_cost_per_character_above_128k_tokens": 0, + "litellm_provider": "gemini", + "mode": "chat", + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "tpm": 4000000, + "rpm": 10, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" + }, "vertex_ai/claude-3-sonnet": { "max_tokens": 4096, "max_input_tokens": 200000, From e6508ad66591b5a2f08306f4e255c65c015aca1e Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 09:50:34 -0800 Subject: [PATCH 03/17] build(model_prices_and_context_window.json): add gemini-2.0 supports audio output = true --- litellm/model_prices_and_context_window_backup.json | 2 ++ model_prices_and_context_window.json | 2 ++ 2 files changed, 4 insertions(+) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 92f3e1c5bd0e..d1be19cc1776 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -3096,6 +3096,7 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, + "supports_audio_output": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" }, "gemini/gemini-2.0-flash-exp": { @@ -3128,6 +3129,7 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, + "supports_audio_output": true, "tpm": 4000000, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 92f3e1c5bd0e..d1be19cc1776 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -3096,6 +3096,7 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, + "supports_audio_output": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" }, "gemini/gemini-2.0-flash-exp": { @@ -3128,6 +3129,7 @@ "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, + "supports_audio_output": true, "tpm": 4000000, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" From 6ab132351f15e0fae505368b02a4d2386b682430 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 09:54:34 -0800 Subject: [PATCH 04/17] docs(team_model_add.md): clarify allowing teams to add models is an enterprise feature --- docs/my-website/docs/proxy/team_model_add.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/proxy/team_model_add.md b/docs/my-website/docs/proxy/team_model_add.md index bb57801624a9..a8a6878fd590 100644 --- a/docs/my-website/docs/proxy/team_model_add.md +++ b/docs/my-website/docs/proxy/team_model_add.md @@ -1,4 +1,13 @@ -# Allow Teams to Add Models +# ✨ Allow Teams to Add Models + +:::info + +This is an Enterprise feature. +[Enterprise Pricing](https://www.litellm.ai/#pricing) + +[Contact us here to get a free trial](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) + +::: Allow team to add a their own models/key for that project - so any OpenAI call they make uses their OpenAI key. From 4a470ef12214c74e4bfbb9e781830410613bad2e Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 10:12:13 -0800 Subject: [PATCH 05/17] fix(o1_transformation.py): add support for 'n', 'response_format' and 'stop' params for o1 and 'stream_options' param for o1-mini --- litellm/llms/openai/chat/o1_transformation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py index 115bb29b1da0..5a30c20d277b 100644 --- a/litellm/llms/openai/chat/o1_transformation.py +++ b/litellm/llms/openai/chat/o1_transformation.py @@ -44,15 +44,15 @@ def get_supported_openai_params(self, model: str) -> list: "function_call", "functions", "top_p", - "n", "presence_penalty", "frequency_penalty", "top_logprobs", - "response_format", - "stop", - "stream_options", ] + if "o1-mini" not in model: + non_supported_params.append("stream") + non_supported_params.append("stream_options") + return [ param for param in all_openai_params if param not in non_supported_params ] From 01c3340d4809dc66c4cf4a7403a1126abab1072a Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 11:24:34 -0800 Subject: [PATCH 06/17] build(model_prices_and_context_window.json): add 'supports_system_message' to supporting openai models needed as o1-preview, and o1-mini models don't support 'system message --- ...odel_prices_and_context_window_backup.json | 138 ++++++++++++------ model_prices_and_context_window.json | 138 ++++++++++++------ 2 files changed, 184 insertions(+), 92 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index d1be19cc1776..27d750a6c56e 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -13,7 +13,8 @@ "supports_audio_input": true, "supports_audio_output": true, "supports_prompt_caching": true, - "supports_response_schema": true + "supports_response_schema": true, + "supports_system_messages": true }, "gpt-4": { "max_tokens": 4096, @@ -24,7 +25,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o": { "max_tokens": 16384, @@ -39,7 +41,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o-audio-preview": { "max_tokens": 16384, @@ -54,7 +57,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-audio-preview-2024-10-01": { "max_tokens": 16384, @@ -69,7 +73,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-mini-audio-preview-2024-12-17": { "max_tokens": 16384, @@ -84,7 +89,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-mini": { "max_tokens": 16384, @@ -99,7 +105,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o-mini-2024-07-18": { "max_tokens": 16384, @@ -114,7 +121,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "o1": { "max_tokens": 100000, @@ -128,7 +136,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": false, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "o1-mini": { "max_tokens": 65536, @@ -198,7 +207,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": false, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "chatgpt-4o-latest": { "max_tokens": 4096, @@ -211,7 +221,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o-2024-05-13": { "max_tokens": 4096, @@ -224,7 +235,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o-2024-08-06": { "max_tokens": 16384, @@ -239,7 +251,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o-2024-11-20": { "max_tokens": 16384, @@ -254,7 +267,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o-realtime-preview-2024-10-01": { "max_tokens": 4096, @@ -271,7 +285,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-realtime-preview": { "max_tokens": 4096, @@ -287,7 +302,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-realtime-preview-2024-12-17": { "max_tokens": 4096, @@ -303,7 +319,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-mini-realtime-preview": { "max_tokens": 4096, @@ -320,7 +337,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-mini-realtime-preview-2024-12-17": { "max_tokens": 4096, @@ -337,7 +355,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4-turbo-preview": { "max_tokens": 4096, @@ -349,7 +368,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-0314": { "max_tokens": 4096, @@ -359,7 +379,8 @@ "output_cost_per_token": 0.00006, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-0613": { "max_tokens": 4096, @@ -370,7 +391,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-32k": { "max_tokens": 4096, @@ -380,7 +402,8 @@ "output_cost_per_token": 0.00012, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-32k-0314": { "max_tokens": 4096, @@ -390,7 +413,8 @@ "output_cost_per_token": 0.00012, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-32k-0613": { "max_tokens": 4096, @@ -400,7 +424,8 @@ "output_cost_per_token": 0.00012, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-turbo": { "max_tokens": 4096, @@ -413,7 +438,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-turbo-2024-04-09": { "max_tokens": 4096, @@ -426,7 +452,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-1106-preview": { "max_tokens": 4096, @@ -438,7 +465,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-0125-preview": { "max_tokens": 4096, @@ -450,7 +478,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-vision-preview": { "max_tokens": 4096, @@ -461,7 +490,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-1106-vision-preview": { "max_tokens": 4096, @@ -472,7 +502,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo": { "max_tokens": 4097, @@ -483,7 +514,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo-0301": { "max_tokens": 4097, @@ -493,7 +525,8 @@ "output_cost_per_token": 0.000002, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo-0613": { "max_tokens": 4097, @@ -504,7 +537,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo-1106": { "max_tokens": 16385, @@ -516,7 +550,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo-0125": { "max_tokens": 16385, @@ -528,7 +563,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo-16k": { "max_tokens": 16385, @@ -538,7 +574,8 @@ "output_cost_per_token": 0.000004, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo-16k-0613": { "max_tokens": 16385, @@ -548,7 +585,8 @@ "output_cost_per_token": 0.000004, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "ft:gpt-3.5-turbo": { "max_tokens": 4096, @@ -557,7 +595,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000006, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_system_messages": true }, "ft:gpt-3.5-turbo-0125": { "max_tokens": 4096, @@ -566,7 +605,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000006, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_system_messages": true }, "ft:gpt-3.5-turbo-1106": { "max_tokens": 4096, @@ -575,7 +615,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000006, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_system_messages": true }, "ft:gpt-3.5-turbo-0613": { "max_tokens": 4096, @@ -584,7 +625,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000006, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_system_messages": true }, "ft:gpt-4-0613": { "max_tokens": 4096, @@ -595,7 +637,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing" + "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing", + "supports_system_messages": true }, "ft:gpt-4o-2024-08-06": { "max_tokens": 16384, @@ -608,7 +651,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_response_schema": true, - "supports_vision": true + "supports_vision": true, + "supports_system_messages": true }, "ft:gpt-4o-2024-11-20": { "max_tokens": 16384, @@ -623,7 +667,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "ft:gpt-4o-mini-2024-07-18": { "max_tokens": 16384, @@ -638,7 +683,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "ft:davinci-002": { "max_tokens": 16384, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index d1be19cc1776..27d750a6c56e 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -13,7 +13,8 @@ "supports_audio_input": true, "supports_audio_output": true, "supports_prompt_caching": true, - "supports_response_schema": true + "supports_response_schema": true, + "supports_system_messages": true }, "gpt-4": { "max_tokens": 4096, @@ -24,7 +25,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o": { "max_tokens": 16384, @@ -39,7 +41,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o-audio-preview": { "max_tokens": 16384, @@ -54,7 +57,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-audio-preview-2024-10-01": { "max_tokens": 16384, @@ -69,7 +73,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-mini-audio-preview-2024-12-17": { "max_tokens": 16384, @@ -84,7 +89,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-mini": { "max_tokens": 16384, @@ -99,7 +105,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o-mini-2024-07-18": { "max_tokens": 16384, @@ -114,7 +121,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "o1": { "max_tokens": 100000, @@ -128,7 +136,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": false, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "o1-mini": { "max_tokens": 65536, @@ -198,7 +207,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": false, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "chatgpt-4o-latest": { "max_tokens": 4096, @@ -211,7 +221,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o-2024-05-13": { "max_tokens": 4096, @@ -224,7 +235,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o-2024-08-06": { "max_tokens": 16384, @@ -239,7 +251,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o-2024-11-20": { "max_tokens": 16384, @@ -254,7 +267,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4o-realtime-preview-2024-10-01": { "max_tokens": 4096, @@ -271,7 +285,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-realtime-preview": { "max_tokens": 4096, @@ -287,7 +302,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-realtime-preview-2024-12-17": { "max_tokens": 4096, @@ -303,7 +319,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-mini-realtime-preview": { "max_tokens": 4096, @@ -320,7 +337,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4o-mini-realtime-preview-2024-12-17": { "max_tokens": 4096, @@ -337,7 +355,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_audio_input": true, - "supports_audio_output": true + "supports_audio_output": true, + "supports_system_messages": true }, "gpt-4-turbo-preview": { "max_tokens": 4096, @@ -349,7 +368,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-0314": { "max_tokens": 4096, @@ -359,7 +379,8 @@ "output_cost_per_token": 0.00006, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-0613": { "max_tokens": 4096, @@ -370,7 +391,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-32k": { "max_tokens": 4096, @@ -380,7 +402,8 @@ "output_cost_per_token": 0.00012, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-32k-0314": { "max_tokens": 4096, @@ -390,7 +413,8 @@ "output_cost_per_token": 0.00012, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-32k-0613": { "max_tokens": 4096, @@ -400,7 +424,8 @@ "output_cost_per_token": 0.00012, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-turbo": { "max_tokens": 4096, @@ -413,7 +438,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-turbo-2024-04-09": { "max_tokens": 4096, @@ -426,7 +452,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-1106-preview": { "max_tokens": 4096, @@ -438,7 +465,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-0125-preview": { "max_tokens": 4096, @@ -450,7 +478,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-vision-preview": { "max_tokens": 4096, @@ -461,7 +490,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-4-1106-vision-preview": { "max_tokens": 4096, @@ -472,7 +502,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo": { "max_tokens": 4097, @@ -483,7 +514,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo-0301": { "max_tokens": 4097, @@ -493,7 +525,8 @@ "output_cost_per_token": 0.000002, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo-0613": { "max_tokens": 4097, @@ -504,7 +537,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo-1106": { "max_tokens": 16385, @@ -516,7 +550,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo-0125": { "max_tokens": 16385, @@ -528,7 +563,8 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo-16k": { "max_tokens": 16385, @@ -538,7 +574,8 @@ "output_cost_per_token": 0.000004, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "gpt-3.5-turbo-16k-0613": { "max_tokens": 16385, @@ -548,7 +585,8 @@ "output_cost_per_token": 0.000004, "litellm_provider": "openai", "mode": "chat", - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "ft:gpt-3.5-turbo": { "max_tokens": 4096, @@ -557,7 +595,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000006, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_system_messages": true }, "ft:gpt-3.5-turbo-0125": { "max_tokens": 4096, @@ -566,7 +605,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000006, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_system_messages": true }, "ft:gpt-3.5-turbo-1106": { "max_tokens": 4096, @@ -575,7 +615,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000006, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_system_messages": true }, "ft:gpt-3.5-turbo-0613": { "max_tokens": 4096, @@ -584,7 +625,8 @@ "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000006, "litellm_provider": "openai", - "mode": "chat" + "mode": "chat", + "supports_system_messages": true }, "ft:gpt-4-0613": { "max_tokens": 4096, @@ -595,7 +637,8 @@ "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, - "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing" + "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing", + "supports_system_messages": true }, "ft:gpt-4o-2024-08-06": { "max_tokens": 16384, @@ -608,7 +651,8 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_response_schema": true, - "supports_vision": true + "supports_vision": true, + "supports_system_messages": true }, "ft:gpt-4o-2024-11-20": { "max_tokens": 16384, @@ -623,7 +667,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "ft:gpt-4o-mini-2024-07-18": { "max_tokens": 16384, @@ -638,7 +683,8 @@ "supports_parallel_function_calling": true, "supports_response_schema": true, "supports_vision": true, - "supports_prompt_caching": true + "supports_prompt_caching": true, + "supports_system_messages": true }, "ft:davinci-002": { "max_tokens": 16384, From e6e368675b1b1826323e01f410397a817fcc8230 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 13:33:25 -0800 Subject: [PATCH 07/17] fix(o1_transformation.py): translate system message based on if o1 model supports it --- .../llms/openai/chat/gpt_transformation.py | 2 +- litellm/llms/openai/chat/o1_transformation.py | 7 +++--- litellm/llms/openai/openai.py | 6 +++-- litellm/utils.py | 16 +++++--------- tests/llm_translation/test_openai_o1.py | 22 ++++++++++++++----- 5 files changed, 31 insertions(+), 22 deletions(-) diff --git a/litellm/llms/openai/chat/gpt_transformation.py b/litellm/llms/openai/chat/gpt_transformation.py index c6e63edb8c58..01bd720ba461 100644 --- a/litellm/llms/openai/chat/gpt_transformation.py +++ b/litellm/llms/openai/chat/gpt_transformation.py @@ -164,7 +164,7 @@ def map_openai_params( ) def _transform_messages( - self, messages: List[AllMessageValues] + self, messages: List[AllMessageValues], model: str ) -> List[AllMessageValues]: return messages diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py index 5a30c20d277b..f0ec262763e2 100644 --- a/litellm/llms/openai/chat/o1_transformation.py +++ b/litellm/llms/openai/chat/o1_transformation.py @@ -16,6 +16,7 @@ import litellm from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage +from litellm.utils import supports_system_messages from .gpt_transformation import OpenAIGPTConfig @@ -95,16 +96,16 @@ def is_model_o1_reasoning_model(self, model: str) -> bool: return False def _transform_messages( - self, messages: List[AllMessageValues] + self, messages: List[AllMessageValues], model: str ) -> List[AllMessageValues]: """ Handles limitations of O-1 model family. - modalities: image => drop param (if user opts in to dropping param) - role: system ==> translate to role 'user' """ - + _supports_system_messages = supports_system_messages(model, "openai") for i, message in enumerate(messages): - if message["role"] == "system": + if message["role"] == "system" and not _supports_system_messages: new_message = ChatCompletionUserMessage( content=message["content"], role="user" ) diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index ffac461f385c..62b193e2dbd8 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -198,7 +198,7 @@ def _map_openai_params( return optional_params def _transform_messages( - self, messages: List[AllMessageValues] + self, messages: List[AllMessageValues], model: str ) -> List[AllMessageValues]: return messages @@ -456,7 +456,9 @@ def completion( # type: ignore # noqa: PLR0915 if isinstance(provider_config, OpenAIGPTConfig) or isinstance( provider_config, OpenAIConfig ): - messages = provider_config._transform_messages(messages) + messages = provider_config._transform_messages( + messages=messages, model=model + ) for _ in range( 2 diff --git a/litellm/utils.py b/litellm/utils.py index 8baafe21ed7b..7a3fa7eaffcf 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1645,17 +1645,11 @@ def supports_system_messages(model: str, custom_llm_provider: Optional[str]) -> Raises: Exception: If the given model is not found in model_prices_and_context_window.json. """ - try: - model_info = litellm.get_model_info( - model=model, custom_llm_provider=custom_llm_provider - ) - if model_info.get("supports_system_messages", False) is True: - return True - return False - except Exception: - raise Exception( - f"Model not supports system messages. You passed model={model}, custom_llm_provider={custom_llm_provider}." - ) + return _supports_factory( + model=model, + custom_llm_provider=custom_llm_provider, + key="supports_system_messages", + ) def supports_response_schema(model: str, custom_llm_provider: Optional[str]) -> bool: diff --git a/tests/llm_translation/test_openai_o1.py b/tests/llm_translation/test_openai_o1.py index 2bb82c6a28da..32948a6042cb 100644 --- a/tests/llm_translation/test_openai_o1.py +++ b/tests/llm_translation/test_openai_o1.py @@ -17,14 +17,19 @@ from litellm import Choices, Message, ModelResponse +@pytest.mark.parametrize("model", ["o1-preview", "o1-mini", "o1"]) @pytest.mark.asyncio -async def test_o1_handle_system_role(): +async def test_o1_handle_system_role(model): """ Tests that: - max_tokens is translated to 'max_completion_tokens' - role 'system' is translated to 'user' """ from openai import AsyncOpenAI + from litellm.utils import supports_system_messages + + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") litellm.set_verbose = True @@ -35,9 +40,9 @@ async def test_o1_handle_system_role(): ) as mock_client: try: await litellm.acompletion( - model="o1-preview", + model=model, max_tokens=10, - messages=[{"role": "system", "content": "Hello!"}], + messages=[{"role": "system", "content": "Be a good bot!"}], client=client, ) except Exception as e: @@ -48,9 +53,16 @@ async def test_o1_handle_system_role(): print("request_body: ", request_body) - assert request_body["model"] == "o1-preview" + assert request_body["model"] == model assert request_body["max_completion_tokens"] == 10 - assert request_body["messages"] == [{"role": "user", "content": "Hello!"}] + if supports_system_messages(model, "openai"): + assert request_body["messages"] == [ + {"role": "system", "content": "Be a good bot!"} + ] + else: + assert request_body["messages"] == [ + {"role": "user", "content": "Be a good bot!"} + ] @pytest.mark.asyncio From 9a0d6db9377ee6cafd4df176f7ff871e12ef408c Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 13:57:16 -0800 Subject: [PATCH 08/17] fix(o1_transformation.py): return 'stream' param support if o1-mini/o1-preview o1 currently doesn't support streaming, but the other model versions do Fixes https://github.com/BerriAI/litellm/issues/7292 --- litellm/llms/openai/chat/o1_transformation.py | 4 +- tests/llm_translation/test_openai_o1.py | 55 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py index f0ec262763e2..8572ef54f25b 100644 --- a/litellm/llms/openai/chat/o1_transformation.py +++ b/litellm/llms/openai/chat/o1_transformation.py @@ -50,7 +50,9 @@ def get_supported_openai_params(self, model: str) -> list: "top_logprobs", ] - if "o1-mini" not in model: + supported_streaming_models = ["o1-preview", "o1-mini"] + + if model not in supported_streaming_models: non_supported_params.append("stream") non_supported_params.append("stream_options") diff --git a/tests/llm_translation/test_openai_o1.py b/tests/llm_translation/test_openai_o1.py index 32948a6042cb..9f46003461c2 100644 --- a/tests/llm_translation/test_openai_o1.py +++ b/tests/llm_translation/test_openai_o1.py @@ -65,6 +65,61 @@ async def test_o1_handle_system_role(model): ] +@pytest.mark.parametrize( + "model, expected_streaming_support", + [("o1-preview", True), ("o1-mini", True), ("o1", False)], +) +@pytest.mark.asyncio +async def test_o1_handle_streaming_optional_params(model, expected_streaming_support): + """ + Tests that: + - max_tokens is translated to 'max_completion_tokens' + - role 'system' is translated to 'user' + """ + from openai import AsyncOpenAI + from litellm.utils import ProviderConfigManager + from litellm.types.utils import LlmProviders + + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + config = ProviderConfigManager.get_provider_chat_config( + model=model, provider=LlmProviders.OPENAI + ) + + supported_params = config.get_supported_openai_params(model=model) + + assert expected_streaming_support == ("stream" in supported_params) + + +# @pytest.mark.parametrize( +# "model", +# ["o1"], # "o1-preview", "o1-mini", +# ) +# @pytest.mark.asyncio +# async def test_o1_handle_streaming_e2e(model): +# """ +# Tests that: +# - max_tokens is translated to 'max_completion_tokens' +# - role 'system' is translated to 'user' +# """ +# from openai import AsyncOpenAI +# from litellm.utils import ProviderConfigManager +# from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper +# from litellm.types.utils import LlmProviders + +# resp = litellm.completion( +# model=model, +# messages=[{"role": "user", "content": "Hello!"}], +# stream=True, +# ) +# assert isinstance(resp, CustomStreamWrapper) +# for chunk in resp: +# print("chunk: ", chunk) + +# assert True + + @pytest.mark.asyncio @pytest.mark.parametrize("model", ["gpt-4", "gpt-4-0314", "gpt-4-32k", "o1-preview"]) async def test_o1_max_completion_tokens(model: str): From 0715ccc59f026dacf96054a13e87ee77803f1dd1 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 14:31:52 -0800 Subject: [PATCH 09/17] fix(o1_transformation.py): return tool calling/response_format in supported params if model map says so Fixes https://github.com/BerriAI/litellm/issues/7292 --- .../llms/ollama/completion/transformation.py | 6 +- litellm/llms/openai/chat/o1_transformation.py | 26 +- ...odel_prices_and_context_window_backup.json | 14 +- litellm/types/utils.py | 15 +- litellm/utils.py | 256 ++++++++---------- model_prices_and_context_window.json | 14 +- tests/llm_translation/test_openai_o1.py | 29 ++ 7 files changed, 188 insertions(+), 172 deletions(-) diff --git a/litellm/llms/ollama/completion/transformation.py b/litellm/llms/ollama/completion/transformation.py index 3ba3d29587f3..d9cdff20d42b 100644 --- a/litellm/llms/ollama/completion/transformation.py +++ b/litellm/llms/ollama/completion/transformation.py @@ -23,6 +23,7 @@ from litellm.types.utils import ( GenericStreamingChunk, ModelInfo, + ModelInfoBase, ModelResponse, ProviderField, StreamingChoices, @@ -198,7 +199,7 @@ def _get_max_tokens(self, ollama_model_info: dict) -> Optional[int]: return v return None - def get_model_info(self, model: str) -> ModelInfo: + def get_model_info(self, model: str) -> ModelInfoBase: """ curl http://localhost:11434/api/show -d '{ "name": "mistral" @@ -222,11 +223,10 @@ def get_model_info(self, model: str) -> ModelInfo: _max_tokens: Optional[int] = self._get_max_tokens(model_info) - return ModelInfo( + return ModelInfoBase( key=model, litellm_provider="ollama", mode="chat", - supported_openai_params=self.get_supported_openai_params(model=model), supports_function_calling=self._supports_function_calling(model_info), input_cost_per_token=0.0, output_cost_per_token=0.0, diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py index 8572ef54f25b..97899d67fe04 100644 --- a/litellm/llms/openai/chat/o1_transformation.py +++ b/litellm/llms/openai/chat/o1_transformation.py @@ -16,7 +16,11 @@ import litellm from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage -from litellm.utils import supports_system_messages +from litellm.utils import ( + supports_function_calling, + supports_response_schema, + supports_system_messages, +) from .gpt_transformation import OpenAIGPTConfig @@ -39,11 +43,6 @@ def get_supported_openai_params(self, model: str) -> list: all_openai_params = super().get_supported_openai_params(model=model) non_supported_params = [ "logprobs", - "tools", - "tool_choice", - "parallel_tool_calls", - "function_call", - "functions", "top_p", "presence_penalty", "frequency_penalty", @@ -51,14 +50,27 @@ def get_supported_openai_params(self, model: str) -> list: ] supported_streaming_models = ["o1-preview", "o1-mini"] + _supports_function_calling = supports_function_calling(model, "openai") + _supports_response_schema = supports_response_schema(model, "openai") if model not in supported_streaming_models: non_supported_params.append("stream") non_supported_params.append("stream_options") - return [ + if not _supports_function_calling: + non_supported_params.append("tools") + non_supported_params.append("tool_choice") + non_supported_params.append("parallel_tool_calls") + non_supported_params.append("function_call") + non_supported_params.append("functions") + + if not _supports_response_schema: + non_supported_params.append("response_format") + + returned_params = [ param for param in all_openai_params if param not in non_supported_params ] + return returned_params def map_openai_params( self, diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 27d750a6c56e..984e5d940da2 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -137,7 +137,8 @@ "supports_parallel_function_calling": true, "supports_vision": false, "supports_prompt_caching": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_response_schema": true }, "o1-mini": { "max_tokens": 65536, @@ -148,8 +149,6 @@ "cache_read_input_token_cost": 0.0000015, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_vision": false, "supports_prompt_caching": true }, @@ -162,8 +161,6 @@ "cache_read_input_token_cost": 0.0000015, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_vision": false, "supports_prompt_caching": true }, @@ -176,8 +173,6 @@ "cache_read_input_token_cost": 0.0000075, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_vision": false, "supports_prompt_caching": true }, @@ -190,8 +185,6 @@ "cache_read_input_token_cost": 0.0000075, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_vision": false, "supports_prompt_caching": true }, @@ -208,7 +201,8 @@ "supports_parallel_function_calling": true, "supports_vision": false, "supports_prompt_caching": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_response_schema": true }, "chatgpt-4o-latest": { "max_tokens": 4096, diff --git a/litellm/types/utils.py b/litellm/types/utils.py index d4b6c789a409..ca28b15b71aa 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -74,11 +74,7 @@ class ProviderField(TypedDict): field_value: str -class ModelInfo(TypedDict, total=False): - """ - Model info for a given model, this is information found in litellm.model_prices_and_context_window.json - """ - +class ModelInfoBase(TypedDict, total=False): key: Required[str] # the key in litellm.model_cost which is returned max_tokens: Required[Optional[int]] @@ -119,7 +115,6 @@ class ModelInfo(TypedDict, total=False): "completion", "embedding", "image_generation", "chat", "audio_transcription" ] ] - supported_openai_params: Required[Optional[List[str]]] supports_system_messages: Optional[bool] supports_response_schema: Optional[bool] supports_vision: Optional[bool] @@ -133,6 +128,14 @@ class ModelInfo(TypedDict, total=False): rpm: Optional[int] +class ModelInfo(ModelInfoBase, total=False): + """ + Model info for a given model, this is information found in litellm.model_prices_and_context_window.json + """ + + supported_openai_params: Required[Optional[List[str]]] + + class GenericStreamingChunk(TypedDict, total=False): text: Required[str] tool_use: Optional[ChatCompletionToolCallChunk] diff --git a/litellm/utils.py b/litellm/utils.py index 7a3fa7eaffcf..a0666267c959 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -132,6 +132,7 @@ LlmProviders, Message, ModelInfo, + ModelInfoBase, ModelResponse, ModelResponseStream, ProviderField, @@ -1678,25 +1679,11 @@ def supports_response_schema(model: str, custom_llm_provider: Optional[str]) -> if custom_llm_provider in PROVIDERS_GLOBALLY_SUPPORT_RESPONSE_SCHEMA: return True - try: - ## GET MODEL INFO - model_info = litellm.get_model_info( - model=model, custom_llm_provider=custom_llm_provider - ) - - if model_info.get("supports_response_schema", False) is True: - return True - except Exception: - ## check if provider supports response schema globally - supported_params = get_supported_openai_params( - model=model, - custom_llm_provider=custom_llm_provider, - request_type="chat_completion", - ) - if supported_params is not None and "response_schema" in supported_params: - return True - - return False + return _supports_factory( + model=model, + custom_llm_provider=custom_llm_provider, + key="supports_response_schema", + ) def supports_function_calling( @@ -1715,23 +1702,11 @@ def supports_function_calling( Raises: Exception: If the given model is not found or there's an error in retrieval. """ - try: - model, custom_llm_provider, _, _ = litellm.get_llm_provider( - model=model, custom_llm_provider=custom_llm_provider - ) - - ## CHECK IF MODEL SUPPORTS FUNCTION CALLING ## - model_info = litellm.get_model_info( - model=model, custom_llm_provider=custom_llm_provider - ) - - if model_info.get("supports_function_calling", False) is True: - return True - return False - except Exception as e: - raise Exception( - f"Model not found or error in checking function calling support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}" - ) + return _supports_factory( + model=model, + custom_llm_provider=custom_llm_provider, + key="supports_function_calling", + ) def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str) -> bool: @@ -1753,7 +1728,7 @@ def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str) model=model, custom_llm_provider=custom_llm_provider ) - model_info = litellm.get_model_info( + model_info = _get_model_info_helper( model=model, custom_llm_provider=custom_llm_provider ) @@ -4190,99 +4165,35 @@ def _get_potential_model_names( ) -def get_model_info( # noqa: PLR0915 - model: str, custom_llm_provider: Optional[str] = None -) -> ModelInfo: - """ - Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model. - - Parameters: - - model (str): The name of the model. - - custom_llm_provider (str | null): the provider used for the model. If provided, used to check if the litellm model info is for that provider. - - Returns: - dict: A dictionary containing the following information: - key: Required[str] # the key in litellm.model_cost which is returned - max_tokens: Required[Optional[int]] - max_input_tokens: Required[Optional[int]] - max_output_tokens: Required[Optional[int]] - input_cost_per_token: Required[float] - input_cost_per_character: Optional[float] # only for vertex ai models - input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models - input_cost_per_character_above_128k_tokens: Optional[ - float - ] # only for vertex ai models - input_cost_per_query: Optional[float] # only for rerank models - input_cost_per_image: Optional[float] # only for vertex ai models - input_cost_per_audio_token: Optional[float] - input_cost_per_audio_per_second: Optional[float] # only for vertex ai models - input_cost_per_video_per_second: Optional[float] # only for vertex ai models - output_cost_per_token: Required[float] - output_cost_per_audio_token: Optional[float] - output_cost_per_character: Optional[float] # only for vertex ai models - output_cost_per_token_above_128k_tokens: Optional[ - float - ] # only for vertex ai models - output_cost_per_character_above_128k_tokens: Optional[ - float - ] # only for vertex ai models - output_cost_per_image: Optional[float] - output_vector_size: Optional[int] - output_cost_per_video_per_second: Optional[float] # only for vertex ai models - output_cost_per_audio_per_second: Optional[float] # only for vertex ai models - litellm_provider: Required[str] - mode: Required[ - Literal[ - "completion", "embedding", "image_generation", "chat", "audio_transcription" - ] - ] - supported_openai_params: Required[Optional[List[str]]] - supports_system_messages: Optional[bool] - supports_response_schema: Optional[bool] - supports_vision: Optional[bool] - supports_function_calling: Optional[bool] - supports_prompt_caching: Optional[bool] - supports_audio_input: Optional[bool] - supports_audio_output: Optional[bool] - supports_pdf_input: Optional[bool] - Raises: - Exception: If the model is not mapped yet. - - Example: - >>> get_model_info("gpt-4") - { - "max_tokens": 8192, - "input_cost_per_token": 0.00003, - "output_cost_per_token": 0.00006, - "litellm_provider": "openai", - "mode": "chat", - "supported_openai_params": ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"] - } - """ - supported_openai_params: Union[List[str], None] = [] - - def _get_max_position_embeddings(model_name): - # Construct the URL for the config.json file - config_url = f"https://huggingface.co/{model_name}/raw/main/config.json" +def _get_max_position_embeddings(model_name: str) -> Optional[int]: + # Construct the URL for the config.json file + config_url = f"https://huggingface.co/{model_name}/raw/main/config.json" - try: - # Make the HTTP request to get the raw JSON file - response = litellm.module_level_client.get(config_url) - response.raise_for_status() # Raise an exception for bad responses (4xx or 5xx) + try: + # Make the HTTP request to get the raw JSON file + response = litellm.module_level_client.get(config_url) + response.raise_for_status() # Raise an exception for bad responses (4xx or 5xx) - # Parse the JSON response - config_json = response.json() + # Parse the JSON response + config_json = response.json() - # Extract and return the max_position_embeddings - max_position_embeddings = config_json.get("max_position_embeddings") + # Extract and return the max_position_embeddings + max_position_embeddings = config_json.get("max_position_embeddings") - if max_position_embeddings is not None: - return max_position_embeddings - else: - return None - except Exception: + if max_position_embeddings is not None: + return max_position_embeddings + else: return None + except Exception: + return None + +def _get_model_info_helper( # noqa: PLR0915 + model: str, custom_llm_provider: Optional[str] = None +) -> ModelInfoBase: + """ + Helper for 'get_model_info'. Separated out to avoid infinite loop caused by returning 'supported_openai_param's + """ try: azure_llms = {**litellm.azure_llms, **litellm.azure_embedding_models} if model in azure_llms: @@ -4308,12 +4219,9 @@ def _get_max_position_embeddings(model_name): split_model = potential_model_names["split_model"] custom_llm_provider = potential_model_names["custom_llm_provider"] ######################### - supported_openai_params = litellm.get_supported_openai_params( - model=model, custom_llm_provider=custom_llm_provider - ) if custom_llm_provider == "huggingface": max_tokens = _get_max_position_embeddings(model_name=model) - return ModelInfo( + return ModelInfoBase( key=model, max_tokens=max_tokens, # type: ignore max_input_tokens=None, @@ -4322,7 +4230,6 @@ def _get_max_position_embeddings(model_name): output_cost_per_token=0, litellm_provider="huggingface", mode="chat", - supported_openai_params=supported_openai_params, supports_system_messages=None, supports_response_schema=None, supports_function_calling=None, @@ -4347,7 +4254,6 @@ def _get_max_position_embeddings(model_name): if combined_model_name in litellm.model_cost: key = combined_model_name _model_info = _get_model_info_from_model_cost(key=key) - _model_info["supported_openai_params"] = supported_openai_params if not _check_provider_match( model_info=_model_info, custom_llm_provider=custom_llm_provider ): @@ -4355,7 +4261,6 @@ def _get_max_position_embeddings(model_name): if _model_info is None and model in litellm.model_cost: key = model _model_info = _get_model_info_from_model_cost(key=key) - _model_info["supported_openai_params"] = supported_openai_params if not _check_provider_match( model_info=_model_info, custom_llm_provider=custom_llm_provider ): @@ -4366,7 +4271,6 @@ def _get_max_position_embeddings(model_name): ): key = combined_stripped_model_name _model_info = _get_model_info_from_model_cost(key=key) - _model_info["supported_openai_params"] = supported_openai_params if not _check_provider_match( model_info=_model_info, custom_llm_provider=custom_llm_provider ): @@ -4374,7 +4278,6 @@ def _get_max_position_embeddings(model_name): if _model_info is None and stripped_model_name in litellm.model_cost: key = stripped_model_name _model_info = _get_model_info_from_model_cost(key=key) - _model_info["supported_openai_params"] = supported_openai_params if not _check_provider_match( model_info=_model_info, custom_llm_provider=custom_llm_provider ): @@ -4382,7 +4285,6 @@ def _get_max_position_embeddings(model_name): if _model_info is None and split_model in litellm.model_cost: key = split_model _model_info = _get_model_info_from_model_cost(key=key) - _model_info["supported_openai_params"] = supported_openai_params if not _check_provider_match( model_info=_model_info, custom_llm_provider=custom_llm_provider ): @@ -4420,7 +4322,7 @@ def _get_max_position_embeddings(model_name): ) _output_cost_per_token = 0 - return ModelInfo( + return ModelInfoBase( key=key, max_tokens=_model_info.get("max_tokens", None), max_input_tokens=_model_info.get("max_input_tokens", None), @@ -4463,7 +4365,6 @@ def _get_max_position_embeddings(model_name): "litellm_provider", custom_llm_provider ), mode=_model_info.get("mode"), # type: ignore - supported_openai_params=supported_openai_params, supports_system_messages=_model_info.get( "supports_system_messages", None ), @@ -4496,6 +4397,89 @@ def _get_max_position_embeddings(model_name): ) +def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo: + """ + Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token for a given model. + + Parameters: + - model (str): The name of the model. + - custom_llm_provider (str | null): the provider used for the model. If provided, used to check if the litellm model info is for that provider. + + Returns: + dict: A dictionary containing the following information: + key: Required[str] # the key in litellm.model_cost which is returned + max_tokens: Required[Optional[int]] + max_input_tokens: Required[Optional[int]] + max_output_tokens: Required[Optional[int]] + input_cost_per_token: Required[float] + input_cost_per_character: Optional[float] # only for vertex ai models + input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models + input_cost_per_character_above_128k_tokens: Optional[ + float + ] # only for vertex ai models + input_cost_per_query: Optional[float] # only for rerank models + input_cost_per_image: Optional[float] # only for vertex ai models + input_cost_per_audio_token: Optional[float] + input_cost_per_audio_per_second: Optional[float] # only for vertex ai models + input_cost_per_video_per_second: Optional[float] # only for vertex ai models + output_cost_per_token: Required[float] + output_cost_per_audio_token: Optional[float] + output_cost_per_character: Optional[float] # only for vertex ai models + output_cost_per_token_above_128k_tokens: Optional[ + float + ] # only for vertex ai models + output_cost_per_character_above_128k_tokens: Optional[ + float + ] # only for vertex ai models + output_cost_per_image: Optional[float] + output_vector_size: Optional[int] + output_cost_per_video_per_second: Optional[float] # only for vertex ai models + output_cost_per_audio_per_second: Optional[float] # only for vertex ai models + litellm_provider: Required[str] + mode: Required[ + Literal[ + "completion", "embedding", "image_generation", "chat", "audio_transcription" + ] + ] + supported_openai_params: Required[Optional[List[str]]] + supports_system_messages: Optional[bool] + supports_response_schema: Optional[bool] + supports_vision: Optional[bool] + supports_function_calling: Optional[bool] + supports_prompt_caching: Optional[bool] + supports_audio_input: Optional[bool] + supports_audio_output: Optional[bool] + supports_pdf_input: Optional[bool] + Raises: + Exception: If the model is not mapped yet. + + Example: + >>> get_model_info("gpt-4") + { + "max_tokens": 8192, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "openai", + "mode": "chat", + "supported_openai_params": ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"] + } + """ + supported_openai_params = litellm.get_supported_openai_params( + model=model, custom_llm_provider=custom_llm_provider + ) + + _model_info = _get_model_info_helper( + model=model, + custom_llm_provider=custom_llm_provider, + ) + + returned_model_info = ModelInfo( + **_model_info, supported_openai_params=supported_openai_params + ) + + return returned_model_info + + def json_schema_type(python_type_name: str): """Converts standard python types to json schema types diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 27d750a6c56e..984e5d940da2 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -137,7 +137,8 @@ "supports_parallel_function_calling": true, "supports_vision": false, "supports_prompt_caching": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_response_schema": true }, "o1-mini": { "max_tokens": 65536, @@ -148,8 +149,6 @@ "cache_read_input_token_cost": 0.0000015, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_vision": false, "supports_prompt_caching": true }, @@ -162,8 +161,6 @@ "cache_read_input_token_cost": 0.0000015, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_vision": false, "supports_prompt_caching": true }, @@ -176,8 +173,6 @@ "cache_read_input_token_cost": 0.0000075, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_vision": false, "supports_prompt_caching": true }, @@ -190,8 +185,6 @@ "cache_read_input_token_cost": 0.0000075, "litellm_provider": "openai", "mode": "chat", - "supports_function_calling": true, - "supports_parallel_function_calling": true, "supports_vision": false, "supports_prompt_caching": true }, @@ -208,7 +201,8 @@ "supports_parallel_function_calling": true, "supports_vision": false, "supports_prompt_caching": true, - "supports_system_messages": true + "supports_system_messages": true, + "supports_response_schema": true }, "chatgpt-4o-latest": { "max_tokens": 4096, diff --git a/tests/llm_translation/test_openai_o1.py b/tests/llm_translation/test_openai_o1.py index 9f46003461c2..1e2e9d3929b3 100644 --- a/tests/llm_translation/test_openai_o1.py +++ b/tests/llm_translation/test_openai_o1.py @@ -92,6 +92,35 @@ async def test_o1_handle_streaming_optional_params(model, expected_streaming_sup assert expected_streaming_support == ("stream" in supported_params) +@pytest.mark.parametrize( + "model, expected_tool_calling_support", + [("o1-preview", False), ("o1-mini", False), ("o1", True)], +) +@pytest.mark.asyncio +async def test_o1_handle_tool_calling_optional_params( + model, expected_tool_calling_support +): + """ + Tests that: + - max_tokens is translated to 'max_completion_tokens' + - role 'system' is translated to 'user' + """ + from openai import AsyncOpenAI + from litellm.utils import ProviderConfigManager + from litellm.types.utils import LlmProviders + + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + config = ProviderConfigManager.get_provider_chat_config( + model=model, provider=LlmProviders.OPENAI + ) + + supported_params = config.get_supported_openai_params(model=model) + + assert expected_tool_calling_support == ("tools" in supported_params) + + # @pytest.mark.parametrize( # "model", # ["o1"], # "o1-preview", "o1-mini", From e331213ef20a1a373b450263feee12821430021e Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 15:17:24 -0800 Subject: [PATCH 10/17] fix: fix linting errors --- litellm/llms/databricks/chat/transformation.py | 12 ++++++------ litellm/llms/deepseek/chat/transformation.py | 10 +++++----- litellm/llms/groq/chat/transformation.py | 2 +- litellm/llms/mistral/mistral_chat_transformation.py | 4 ++-- litellm/llms/oobabooga/chat/transformation.py | 5 ----- litellm/llms/openai_like/chat/handler.py | 4 +++- 6 files changed, 17 insertions(+), 20 deletions(-) diff --git a/litellm/llms/databricks/chat/transformation.py b/litellm/llms/databricks/chat/transformation.py index 581eb1366c83..f154ed5c1c52 100644 --- a/litellm/llms/databricks/chat/transformation.py +++ b/litellm/llms/databricks/chat/transformation.py @@ -7,14 +7,14 @@ from pydantic import BaseModel -from litellm.types.llms.openai import AllMessageValues -from litellm.types.utils import ProviderField - -from ...openai_like.chat.transformation import OpenAILikeChatConfig from litellm.litellm_core_utils.prompt_templates.common_utils import ( handle_messages_with_content_list_to_str_conversion, strip_name_from_messages, ) +from litellm.types.llms.openai import AllMessageValues +from litellm.types.utils import ProviderField + +from ...openai_like.chat.transformation import OpenAILikeChatConfig class DatabricksConfig(OpenAILikeChatConfig): @@ -86,7 +86,7 @@ def _should_fake_stream(self, optional_params: dict) -> bool: return False def _transform_messages( - self, messages: List[AllMessageValues] + self, messages: List[AllMessageValues], model: str ) -> List[AllMessageValues]: """ Databricks does not support: @@ -102,4 +102,4 @@ def _transform_messages( new_messages.append(_message) new_messages = handle_messages_with_content_list_to_str_conversion(new_messages) new_messages = strip_name_from_messages(new_messages) - return super()._transform_messages(new_messages) + return super()._transform_messages(messages=new_messages, model=model) diff --git a/litellm/llms/deepseek/chat/transformation.py b/litellm/llms/deepseek/chat/transformation.py index 288b1b7c16a5..b2c72b00107f 100644 --- a/litellm/llms/deepseek/chat/transformation.py +++ b/litellm/llms/deepseek/chat/transformation.py @@ -8,26 +8,26 @@ from pydantic import BaseModel import litellm +from litellm.litellm_core_utils.prompt_templates.common_utils import ( + handle_messages_with_content_list_to_str_conversion, +) from litellm.secret_managers.main import get_secret_str from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage from ....utils import _remove_additional_properties, _remove_strict_from_schema from ...openai.chat.gpt_transformation import OpenAIGPTConfig -from litellm.litellm_core_utils.prompt_templates.common_utils import ( - handle_messages_with_content_list_to_str_conversion, -) class DeepSeekChatConfig(OpenAIGPTConfig): def _transform_messages( - self, messages: List[AllMessageValues] + self, messages: List[AllMessageValues], model: str ) -> List[AllMessageValues]: """ DeepSeek does not support content in list format. """ messages = handle_messages_with_content_list_to_str_conversion(messages) - return super()._transform_messages(messages) + return super()._transform_messages(messages=messages, model=model) def _get_openai_compatible_provider_info( self, api_base: Optional[str], api_key: Optional[str] diff --git a/litellm/llms/groq/chat/transformation.py b/litellm/llms/groq/chat/transformation.py index 267d52761820..78e844f5058a 100644 --- a/litellm/llms/groq/chat/transformation.py +++ b/litellm/llms/groq/chat/transformation.py @@ -61,7 +61,7 @@ def __init__( def get_config(cls): return super().get_config() - def _transform_messages(self, messages: List[AllMessageValues]) -> List: + def _transform_messages(self, messages: List[AllMessageValues], model: str) -> List: for idx, message in enumerate(messages): """ 1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839 diff --git a/litellm/llms/mistral/mistral_chat_transformation.py b/litellm/llms/mistral/mistral_chat_transformation.py index 2279e807fccd..97af6d4229d5 100644 --- a/litellm/llms/mistral/mistral_chat_transformation.py +++ b/litellm/llms/mistral/mistral_chat_transformation.py @@ -9,11 +9,11 @@ import types from typing import List, Literal, Optional, Tuple, Union -from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig from litellm.litellm_core_utils.prompt_templates.common_utils import ( handle_messages_with_content_list_to_str_conversion, strip_none_values_from_message, ) +from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig from litellm.secret_managers.main import get_secret_str from litellm.types.llms.openai import AllMessageValues @@ -148,7 +148,7 @@ def _get_openai_compatible_provider_info( return api_base, dynamic_api_key def _transform_messages( - self, messages: List[AllMessageValues] + self, messages: List[AllMessageValues], model: str ) -> List[AllMessageValues]: """ - handles scenario where content is list and not string diff --git a/litellm/llms/oobabooga/chat/transformation.py b/litellm/llms/oobabooga/chat/transformation.py index 79ccca840ce8..f3a25f1df2ba 100644 --- a/litellm/llms/oobabooga/chat/transformation.py +++ b/litellm/llms/oobabooga/chat/transformation.py @@ -23,11 +23,6 @@ class OobaboogaConfig(OpenAIGPTConfig): - def _transform_messages( - self, messages: List[AllMessageValues] - ) -> List[AllMessageValues]: - return messages - def get_error_class( self, error_message: str, diff --git a/litellm/llms/openai_like/chat/handler.py b/litellm/llms/openai_like/chat/handler.py index 2252dfc9ccc9..dee57b9a28b5 100644 --- a/litellm/llms/openai_like/chat/handler.py +++ b/litellm/llms/openai_like/chat/handler.py @@ -284,7 +284,9 @@ def completion( if isinstance(provider_config, OpenAIGPTConfig) or isinstance( provider_config, OpenAIConfig ): - messages = provider_config._transform_messages(messages) + messages = provider_config._transform_messages( + messages=messages, model=model + ) data = { "model": model, From fcf515b5001e34d3372ecd9f4f11ceab20f4a521 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 15:25:18 -0800 Subject: [PATCH 11/17] fix: update '_transform_messages' --- litellm/litellm_core_utils/prompt_templates/factory.py | 4 +++- litellm/llms/anthropic/completion/transformation.py | 6 ------ litellm/llms/azure_ai/chat/transformation.py | 3 ++- litellm/llms/clarifai/chat/transformation.py | 5 ----- litellm/llms/cloudflare/chat/transformation.py | 5 ----- litellm/llms/cohere/chat/transformation.py | 5 ----- litellm/llms/cohere/completion/transformation.py | 6 ------ litellm/llms/databricks/chat/handler.py | 7 +++++-- litellm/llms/groq/chat/handler.py | 7 +++++-- litellm/llms/huggingface/chat/transformation.py | 6 ------ litellm/llms/ollama/completion/transformation.py | 5 ----- litellm/llms/predibase/chat/transformation.py | 5 ----- litellm/llms/replicate/chat/transformation.py | 5 ----- litellm/llms/sagemaker/completion/transformation.py | 6 ------ litellm/llms/watsonx/completion/transformation.py | 6 ------ 15 files changed, 15 insertions(+), 66 deletions(-) diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py index 0b5bd48bb8d4..71de4398a05b 100644 --- a/litellm/litellm_core_utils/prompt_templates/factory.py +++ b/litellm/litellm_core_utils/prompt_templates/factory.py @@ -3144,7 +3144,9 @@ def prompt_factory( else: return gemini_text_image_pt(messages=messages) elif custom_llm_provider == "mistral": - return litellm.MistralConfig()._transform_messages(messages=messages) + return litellm.MistralConfig()._transform_messages( + messages=messages, model=model + ) elif custom_llm_provider == "bedrock": if "amazon.titan-text" in model: return amazon_titan_pt(messages=messages) diff --git a/litellm/llms/anthropic/completion/transformation.py b/litellm/llms/anthropic/completion/transformation.py index df8064ddf48d..57cdd95524a9 100644 --- a/litellm/llms/anthropic/completion/transformation.py +++ b/litellm/llms/anthropic/completion/transformation.py @@ -260,12 +260,6 @@ def _get_anthropic_text_prompt_from_messages( return str(prompt) - def _transform_messages( - self, messages: List[AllMessageValues] - ) -> List[AllMessageValues]: - "Not required" - raise NotImplementedError - def get_model_response_iterator( self, streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse], diff --git a/litellm/llms/azure_ai/chat/transformation.py b/litellm/llms/azure_ai/chat/transformation.py index bce48e6fc01a..4c60c93f04fb 100644 --- a/litellm/llms/azure_ai/chat/transformation.py +++ b/litellm/llms/azure_ai/chat/transformation.py @@ -2,11 +2,11 @@ import litellm from litellm._logging import verbose_logger -from litellm.llms.openai.openai import OpenAIConfig from litellm.litellm_core_utils.prompt_templates.common_utils import ( _audio_or_image_in_message_content, convert_content_list_to_str, ) +from litellm.llms.openai.openai import OpenAIConfig from litellm.secret_managers.main import get_secret_str from litellm.types.llms.openai import AllMessageValues from litellm.types.utils import ProviderField @@ -33,6 +33,7 @@ def get_required_params(self) -> List[ProviderField]: def _transform_messages( self, messages: List[AllMessageValues], + model: str, ) -> List: """ - Azure AI Studio doesn't support content as a list. This handles: diff --git a/litellm/llms/clarifai/chat/transformation.py b/litellm/llms/clarifai/chat/transformation.py index 5dc22c284ef5..c832ff89244f 100644 --- a/litellm/llms/clarifai/chat/transformation.py +++ b/litellm/llms/clarifai/chat/transformation.py @@ -131,11 +131,6 @@ def validate_environment( headers["Authorization"] = f"Bearer {api_key}" return headers - def _transform_messages( - self, messages: List[AllMessageValues] - ) -> List[AllMessageValues]: - raise NotImplementedError - def get_error_class( self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] ) -> BaseLLMException: diff --git a/litellm/llms/cloudflare/chat/transformation.py b/litellm/llms/cloudflare/chat/transformation.py index 596875919a3c..8f6d5ccc1eae 100644 --- a/litellm/llms/cloudflare/chat/transformation.py +++ b/litellm/llms/cloudflare/chat/transformation.py @@ -158,11 +158,6 @@ def get_error_class( message=error_message, ) - def _transform_messages( - self, messages: List[AllMessageValues] - ) -> List[AllMessageValues]: - raise NotImplementedError - def get_model_response_iterator( self, streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse], diff --git a/litellm/llms/cohere/chat/transformation.py b/litellm/llms/cohere/chat/transformation.py index 39df1e021f43..464ef1f2687c 100644 --- a/litellm/llms/cohere/chat/transformation.py +++ b/litellm/llms/cohere/chat/transformation.py @@ -365,8 +365,3 @@ def get_error_class( self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] ) -> BaseLLMException: return CohereError(status_code=status_code, message=error_message) - - def _transform_messages( - self, messages: List[AllMessageValues] - ) -> List[AllMessageValues]: - raise NotImplementedError diff --git a/litellm/llms/cohere/completion/transformation.py b/litellm/llms/cohere/completion/transformation.py index 61d5ca5ad39e..23ba87f11544 100644 --- a/litellm/llms/cohere/completion/transformation.py +++ b/litellm/llms/cohere/completion/transformation.py @@ -121,12 +121,6 @@ def validate_environment( api_key=api_key, ) - def _transform_messages( - self, - messages: List[AllMessageValues], - ) -> List[AllMessageValues]: - raise NotImplementedError - def get_error_class( self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] ) -> BaseLLMException: diff --git a/litellm/llms/databricks/chat/handler.py b/litellm/llms/databricks/chat/handler.py index 078235a284c2..39fb79493bce 100644 --- a/litellm/llms/databricks/chat/handler.py +++ b/litellm/llms/databricks/chat/handler.py @@ -2,11 +2,12 @@ Handles the chat completion request for Databricks """ -from typing import Any, Callable, Literal, Optional, Tuple, Union +from typing import Any, Callable, List, Literal, Optional, Tuple, Union, cast from httpx._config import Timeout from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.types.llms.openai import AllMessageValues from litellm.types.utils import CustomStreamingDecoder from litellm.utils import ModelResponse @@ -44,7 +45,9 @@ def completion( streaming_decoder: Optional[CustomStreamingDecoder] = None, fake_stream: bool = False, ): - messages = DatabricksConfig()._transform_messages(messages) # type: ignore + messages = DatabricksConfig()._transform_messages( + messages=cast(List[AllMessageValues], messages), model=model + ) api_base, headers = self.databricks_validate_environment( api_base=api_base, api_key=api_key, diff --git a/litellm/llms/groq/chat/handler.py b/litellm/llms/groq/chat/handler.py index a6d6822a5e8a..a29a9009dd75 100644 --- a/litellm/llms/groq/chat/handler.py +++ b/litellm/llms/groq/chat/handler.py @@ -2,11 +2,12 @@ Handles the chat completion request for groq """ -from typing import Any, Callable, Optional, Union +from typing import Any, Callable, List, Optional, Union, cast from httpx._config import Timeout from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.types.llms.openai import AllMessageValues from litellm.types.utils import CustomStreamingDecoder from litellm.utils import ModelResponse @@ -42,7 +43,9 @@ def completion( streaming_decoder: Optional[CustomStreamingDecoder] = None, fake_stream: bool = False, ): - messages = GroqChatConfig()._transform_messages(messages) # type: ignore + messages = GroqChatConfig()._transform_messages( + messages=cast(List[AllMessageValues], messages), model=model + ) if optional_params.get("stream") is True: fake_stream = GroqChatConfig()._should_fake_stream(optional_params) diff --git a/litellm/llms/huggingface/chat/transformation.py b/litellm/llms/huggingface/chat/transformation.py index c1bdc9ca6738..2c35f2a20d78 100644 --- a/litellm/llms/huggingface/chat/transformation.py +++ b/litellm/llms/huggingface/chat/transformation.py @@ -369,12 +369,6 @@ def validate_environment( headers = {**headers, **default_headers} return headers - def _transform_messages( - self, - messages: List[AllMessageValues], - ) -> List[AllMessageValues]: - return messages - def get_error_class( self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] ) -> BaseLLMException: diff --git a/litellm/llms/ollama/completion/transformation.py b/litellm/llms/ollama/completion/transformation.py index d9cdff20d42b..46e67b4720e0 100644 --- a/litellm/llms/ollama/completion/transformation.py +++ b/litellm/llms/ollama/completion/transformation.py @@ -235,11 +235,6 @@ def get_model_info(self, model: str) -> ModelInfoBase: max_output_tokens=_max_tokens, ) - def _transform_messages( - self, messages: List[AllMessageValues] - ) -> List[AllMessageValues]: - return messages - def get_error_class( self, error_message: str, status_code: int, headers: Union[dict, Headers] ) -> BaseLLMException: diff --git a/litellm/llms/predibase/chat/transformation.py b/litellm/llms/predibase/chat/transformation.py index 016b9e700f7c..597f24794b2a 100644 --- a/litellm/llms/predibase/chat/transformation.py +++ b/litellm/llms/predibase/chat/transformation.py @@ -139,11 +139,6 @@ def transform_response( "Predibase transformation currently done in handler.py. Need to migrate to this file." ) - def _transform_messages( - self, messages: List[AllMessageValues] - ) -> List[AllMessageValues]: - return messages - def transform_request( self, model: str, diff --git a/litellm/llms/replicate/chat/transformation.py b/litellm/llms/replicate/chat/transformation.py index b4d8b008d57a..ea0fbd035f38 100644 --- a/litellm/llms/replicate/chat/transformation.py +++ b/litellm/llms/replicate/chat/transformation.py @@ -130,11 +130,6 @@ def model_to_version_id(self, model: str) -> str: return split_model[1] return model - def _transform_messages( - self, messages: List[AllMessageValues] - ) -> List[AllMessageValues]: - return messages - def get_error_class( self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] ) -> BaseLLMException: diff --git a/litellm/llms/sagemaker/completion/transformation.py b/litellm/llms/sagemaker/completion/transformation.py index 6e4d2ac9c544..e411bea519df 100644 --- a/litellm/llms/sagemaker/completion/transformation.py +++ b/litellm/llms/sagemaker/completion/transformation.py @@ -57,12 +57,6 @@ def __init__( def get_config(cls): return super().get_config() - def _transform_messages( - self, - messages: List[AllMessageValues], - ) -> List[AllMessageValues]: - return messages - def get_error_class( self, error_message: str, status_code: int, headers: Union[dict, Headers] ) -> BaseLLMException: diff --git a/litellm/llms/watsonx/completion/transformation.py b/litellm/llms/watsonx/completion/transformation.py index 566b6ad2ce50..dd5657763308 100644 --- a/litellm/llms/watsonx/completion/transformation.py +++ b/litellm/llms/watsonx/completion/transformation.py @@ -240,12 +240,6 @@ def get_us_regions(self) -> List[str]: "us-south", ] - def _transform_messages( - self, - messages: List[AllMessageValues], - ) -> List[AllMessageValues]: - return messages - def get_error_class( self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers] ) -> BaseLLMException: From ec9ae28e0374c99b861c9c729cf266b4a88acff8 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 16:02:47 -0800 Subject: [PATCH 12/17] fix(o1_transformation.py): fix provider passed for supported param checks --- litellm/llms/openai/chat/o1_transformation.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py index 97899d67fe04..6a2d7bd06ccf 100644 --- a/litellm/llms/openai/chat/o1_transformation.py +++ b/litellm/llms/openai/chat/o1_transformation.py @@ -15,6 +15,8 @@ from typing import Any, List, Optional, Union import litellm +from litellm import verbose_logger +from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage from litellm.utils import ( supports_function_calling, @@ -50,8 +52,20 @@ def get_supported_openai_params(self, model: str) -> list: ] supported_streaming_models = ["o1-preview", "o1-mini"] - _supports_function_calling = supports_function_calling(model, "openai") - _supports_response_schema = supports_response_schema(model, "openai") + try: + model, custom_llm_provider, api_base, api_key = get_llm_provider( + model=model + ) + except Exception: + verbose_logger.debug( + f"Unable to infer model provider for model={model}, defaulting to openai for o1 supported param check" + ) + custom_llm_provider = "openai" + + _supports_function_calling = supports_function_calling( + model, custom_llm_provider + ) + _supports_response_schema = supports_response_schema(model, custom_llm_provider) if model not in supported_streaming_models: non_supported_params.append("stream") From 8fdf421b430fbfd6c73e92a6856fcf2d3df1827b Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 16:53:39 -0800 Subject: [PATCH 13/17] test(base_llm_unit_tests.py): skip test if api takes >5s to respond --- tests/llm_translation/base_llm_unit_tests.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py index ab42bc5fe998..968655f43376 100644 --- a/tests/llm_translation/base_llm_unit_tests.py +++ b/tests/llm_translation/base_llm_unit_tests.py @@ -219,6 +219,7 @@ class TestModel(BaseModel): }, ], response_format=TestModel, + timeout=5, ) assert res is not None @@ -226,6 +227,8 @@ class TestModel(BaseModel): assert res.choices[0].message.content is not None assert res.choices[0].message.tool_calls is None + except litellm.Timeout: + pytest.skip("Model took too long to respond") except litellm.InternalServerError: pytest.skip("Model is overloaded") From ef20c8badeffd3f76e7e2e9fecfc9994239db643 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 16:55:56 -0800 Subject: [PATCH 14/17] fix(utils.py): return false in 'supports_factory' if can't find value --- litellm/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/litellm/utils.py b/litellm/utils.py index a0666267c959..360d31093695 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1736,9 +1736,10 @@ def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str) return True return False except Exception as e: - raise Exception( + verbose_logger.debug( f"Model not found or error in checking {key} support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}" ) + return False def supports_audio_input(model: str, custom_llm_provider: Optional[str] = None) -> bool: From a082a72668e06f8a6de0caf48ab3580ff721eaed Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 17:07:47 -0800 Subject: [PATCH 15/17] fix(o1_transformation.py): always return stream + stream_options as supported params + handle stream options being passed in for azure o1 --- litellm/llms/azure/chat/o1_handler.py | 2 ++ litellm/llms/openai/chat/o1_transformation.py | 8 +------- tests/local_testing/test_streaming.py | 8 ++++---- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/litellm/llms/azure/chat/o1_handler.py b/litellm/llms/azure/chat/o1_handler.py index 45c35d6276b0..3660ffdc73f6 100644 --- a/litellm/llms/azure/chat/o1_handler.py +++ b/litellm/llms/azure/chat/o1_handler.py @@ -57,6 +57,7 @@ def completion( client=None, ): stream: Optional[bool] = optional_params.pop("stream", False) + stream_options: Optional[dict] = optional_params.pop("stream_options", None) response = super().completion( model, messages, @@ -90,6 +91,7 @@ def completion( model=model, custom_llm_provider="openai", logging_obj=logging_obj, + stream_options=stream_options, ) return streaming_response diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py index 6a2d7bd06ccf..4f6197c3abf9 100644 --- a/litellm/llms/openai/chat/o1_transformation.py +++ b/litellm/llms/openai/chat/o1_transformation.py @@ -51,7 +51,6 @@ def get_supported_openai_params(self, model: str) -> list: "top_logprobs", ] - supported_streaming_models = ["o1-preview", "o1-mini"] try: model, custom_llm_provider, api_base, api_key = get_llm_provider( model=model @@ -67,10 +66,6 @@ def get_supported_openai_params(self, model: str) -> list: ) _supports_response_schema = supports_response_schema(model, custom_llm_provider) - if model not in supported_streaming_models: - non_supported_params.append("stream") - non_supported_params.append("stream_options") - if not _supports_function_calling: non_supported_params.append("tools") non_supported_params.append("tool_choice") @@ -81,10 +76,9 @@ def get_supported_openai_params(self, model: str) -> list: if not _supports_response_schema: non_supported_params.append("response_format") - returned_params = [ + return [ param for param in all_openai_params if param not in non_supported_params ] - return returned_params def map_openai_params( self, diff --git a/tests/local_testing/test_streaming.py b/tests/local_testing/test_streaming.py index 285b8b298c04..cf342c79069a 100644 --- a/tests/local_testing/test_streaming.py +++ b/tests/local_testing/test_streaming.py @@ -2068,10 +2068,10 @@ def test_openai_chat_completion_complete_response_call(): @pytest.mark.parametrize( "model", [ - "gpt-3.5-turbo", - "azure/chatgpt-v-2", - "claude-3-haiku-20240307", - "o1-preview", + # "gpt-3.5-turbo", + # "azure/chatgpt-v-2", + # "claude-3-haiku-20240307", + # "o1-preview", "azure/fake-o1-mini", ], ) From a702326a05cb3d88834ee22f74d3980fbd270660 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 17:44:56 -0800 Subject: [PATCH 16/17] feat(openai.py): support stream faking natively in openai handler Allows o1 calls to be faked for just the "o1" model, allows native streaming for o1-mini, o1-preview Fixes https://github.com/BerriAI/litellm/issues/7292 --- litellm/llms/base_llm/chat/transformation.py | 8 ++ litellm/llms/openai/chat/o1_transformation.py | 9 +++ litellm/llms/openai/openai.py | 79 +++++++++++++++++-- tests/llm_translation/test_openai_o1.py | 27 ------- tests/local_testing/test_streaming.py | 9 ++- 5 files changed, 95 insertions(+), 37 deletions(-) diff --git a/litellm/llms/base_llm/chat/transformation.py b/litellm/llms/base_llm/chat/transformation.py index aa0f0838d37b..626712a80636 100644 --- a/litellm/llms/base_llm/chat/transformation.py +++ b/litellm/llms/base_llm/chat/transformation.py @@ -82,6 +82,14 @@ def get_config(cls): and v is not None } + def should_fake_stream( + self, model: str, custom_llm_provider: Optional[str] = None + ) -> bool: + """ + Returns True if the model/provider should fake stream + """ + return False + @abstractmethod def get_supported_openai_params(self, model: str) -> list: pass diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py index 4f6197c3abf9..ab7e2182bda1 100644 --- a/litellm/llms/openai/chat/o1_transformation.py +++ b/litellm/llms/openai/chat/o1_transformation.py @@ -36,6 +36,15 @@ class OpenAIO1Config(OpenAIGPTConfig): def get_config(cls): return super().get_config() + def should_fake_stream( + self, model: str, custom_llm_provider: Optional[str] = None + ) -> bool: + supported_stream_models = ["o1-mini", "o1-preview"] + for supported_model in supported_stream_models: + if supported_model in model: + return False + return True + def get_supported_openai_params(self, model: str) -> list: """ Get the supported OpenAI params for the given model diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index 62b193e2dbd8..66ff5dfe1bda 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -33,6 +33,7 @@ prompt_factory, ) from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException +from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS from litellm.secret_managers.main import get_secret_str from litellm.types.utils import ( @@ -410,6 +411,24 @@ def make_sync_openai_chat_completion_request( else: raise e + def mock_streaming( + self, + response: ModelResponse, + logging_obj: LiteLLMLoggingObj, + model: str, + stream_options: Optional[dict] = None, + ) -> CustomStreamWrapper: + completion_stream = MockResponseIterator(model_response=response) + streaming_response = CustomStreamWrapper( + completion_stream=completion_stream, + model=model, + custom_llm_provider="openai", + logging_obj=logging_obj, + stream_options=stream_options, + ) + + return streaming_response + def completion( # type: ignore # noqa: PLR0915 self, model_response: ModelResponse, @@ -433,8 +452,21 @@ def completion( # type: ignore # noqa: PLR0915 ): super().completion() try: + fake_stream: bool = False + if custom_llm_provider is not None and model is not None: + provider_config = ProviderConfigManager.get_provider_chat_config( + model=model, provider=LlmProviders(custom_llm_provider) + ) + fake_stream = provider_config.should_fake_stream( + model=model, custom_llm_provider=custom_llm_provider + ) + inference_params = optional_params.copy() + stream_options: Optional[dict] = inference_params.pop( + "stream_options", None + ) + stream: Optional[bool] = inference_params.pop("stream", False) if headers: - optional_params["extra_headers"] = headers + inference_params["extra_headers"] = headers if model is None or messages is None: raise OpenAIError(status_code=422, message="Missing model or messages") @@ -466,7 +498,7 @@ def completion( # type: ignore # noqa: PLR0915 data = OpenAIConfig().transform_request( model=model, messages=messages, - optional_params=optional_params, + optional_params=inference_params, litellm_params=litellm_params, headers=headers or {}, ) @@ -474,7 +506,7 @@ def completion( # type: ignore # noqa: PLR0915 try: max_retries = data.pop("max_retries", 2) if acompletion is True: - if optional_params.get("stream", False): + if stream is True and fake_stream is False: return self.async_streaming( logging_obj=logging_obj, headers=headers, @@ -487,11 +519,13 @@ def completion( # type: ignore # noqa: PLR0915 max_retries=max_retries, organization=organization, drop_params=drop_params, + stream_options=stream_options, ) else: return self.acompletion( data=data, headers=headers, + model=model, logging_obj=logging_obj, model_response=model_response, api_base=api_base, @@ -501,8 +535,9 @@ def completion( # type: ignore # noqa: PLR0915 max_retries=max_retries, organization=organization, drop_params=drop_params, + fake_stream=fake_stream, ) - elif optional_params.get("stream", False): + elif stream is True and fake_stream is False: return self.streaming( logging_obj=logging_obj, headers=headers, @@ -514,6 +549,7 @@ def completion( # type: ignore # noqa: PLR0915 client=client, max_retries=max_retries, organization=organization, + stream_options=stream_options, ) else: if not isinstance(max_retries, int): @@ -559,11 +595,21 @@ def completion( # type: ignore # noqa: PLR0915 original_response=stringified_response, additional_args={"complete_input_dict": data}, ) - return convert_to_model_response_object( + final_response_obj = convert_to_model_response_object( response_object=stringified_response, model_response_object=model_response, _response_headers=headers, ) + + if fake_stream is True: + return self.mock_streaming( + response=cast(ModelResponse, final_response_obj), + logging_obj=logging_obj, + model=model, + stream_options=stream_options, + ) + + return final_response_obj except openai.UnprocessableEntityError as e: ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800 if litellm.drop_params is True or drop_params is True: @@ -625,6 +671,7 @@ def completion( # type: ignore # noqa: PLR0915 async def acompletion( self, data: dict, + model: str, model_response: ModelResponse, logging_obj: LiteLLMLoggingObj, timeout: Union[float, httpx.Timeout], @@ -635,6 +682,8 @@ async def acompletion( max_retries=None, headers=None, drop_params: Optional[bool] = None, + stream_options: Optional[dict] = None, + fake_stream: bool = False, ): response = None for _ in range( @@ -676,12 +725,22 @@ async def acompletion( additional_args={"complete_input_dict": data}, ) logging_obj.model_call_details["response_headers"] = headers - return convert_to_model_response_object( + final_response_obj = convert_to_model_response_object( response_object=stringified_response, model_response_object=model_response, hidden_params={"headers": headers}, _response_headers=headers, ) + + if fake_stream is True: + return self.mock_streaming( + response=cast(ModelResponse, final_response_obj), + logging_obj=logging_obj, + model=model, + stream_options=stream_options, + ) + + return final_response_obj except openai.UnprocessableEntityError as e: ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800 if litellm.drop_params is True or drop_params is True: @@ -712,7 +771,11 @@ def streaming( client=None, max_retries=None, headers=None, + stream_options: Optional[dict] = None, ): + data["stream"] = True + if stream_options is not None: + data["stream_options"] = stream_options openai_client: OpenAI = self._get_openai_client( # type: ignore is_async=False, api_key=api_key, @@ -763,8 +826,12 @@ async def async_streaming( max_retries=None, headers=None, drop_params: Optional[bool] = None, + stream_options: Optional[dict] = None, ): response = None + data["stream"] = True + if stream_options is not None: + data["stream_options"] = stream_options for _ in range(2): try: openai_aclient: AsyncOpenAI = self._get_openai_client( # type: ignore diff --git a/tests/llm_translation/test_openai_o1.py b/tests/llm_translation/test_openai_o1.py index 1e2e9d3929b3..48f8cfdd387f 100644 --- a/tests/llm_translation/test_openai_o1.py +++ b/tests/llm_translation/test_openai_o1.py @@ -65,33 +65,6 @@ async def test_o1_handle_system_role(model): ] -@pytest.mark.parametrize( - "model, expected_streaming_support", - [("o1-preview", True), ("o1-mini", True), ("o1", False)], -) -@pytest.mark.asyncio -async def test_o1_handle_streaming_optional_params(model, expected_streaming_support): - """ - Tests that: - - max_tokens is translated to 'max_completion_tokens' - - role 'system' is translated to 'user' - """ - from openai import AsyncOpenAI - from litellm.utils import ProviderConfigManager - from litellm.types.utils import LlmProviders - - os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") - - config = ProviderConfigManager.get_provider_chat_config( - model=model, provider=LlmProviders.OPENAI - ) - - supported_params = config.get_supported_openai_params(model=model) - - assert expected_streaming_support == ("stream" in supported_params) - - @pytest.mark.parametrize( "model, expected_tool_calling_support", [("o1-preview", False), ("o1-mini", False), ("o1", True)], diff --git a/tests/local_testing/test_streaming.py b/tests/local_testing/test_streaming.py index cf342c79069a..67a0400283f3 100644 --- a/tests/local_testing/test_streaming.py +++ b/tests/local_testing/test_streaming.py @@ -2068,10 +2068,11 @@ def test_openai_chat_completion_complete_response_call(): @pytest.mark.parametrize( "model", [ - # "gpt-3.5-turbo", - # "azure/chatgpt-v-2", - # "claude-3-haiku-20240307", - # "o1-preview", + "gpt-3.5-turbo", + "azure/chatgpt-v-2", + "claude-3-haiku-20240307", + "o1-preview", + "o1", "azure/fake-o1-mini", ], ) From 10530e0afc8f95252a7b38c11b2d99d0a3c4f672 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 18 Dec 2024 18:36:33 -0800 Subject: [PATCH 17/17] fix(openai.py): use inference param instead of original optional param --- litellm/llms/openai/openai.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py index 66ff5dfe1bda..7cecb9d0491d 100644 --- a/litellm/llms/openai/openai.py +++ b/litellm/llms/openai/openai.py @@ -595,12 +595,12 @@ def completion( # type: ignore # noqa: PLR0915 original_response=stringified_response, additional_args={"complete_input_dict": data}, ) + final_response_obj = convert_to_model_response_object( response_object=stringified_response, model_response_object=model_response, _response_headers=headers, ) - if fake_stream is True: return self.mock_streaming( response=cast(ModelResponse, final_response_obj), @@ -613,8 +613,8 @@ def completion( # type: ignore # noqa: PLR0915 except openai.UnprocessableEntityError as e: ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800 if litellm.drop_params is True or drop_params is True: - optional_params = drop_params_from_unprocessable_entity_error( - e, optional_params + inference_params = drop_params_from_unprocessable_entity_error( + e, inference_params ) else: raise e @@ -718,6 +718,7 @@ async def acompletion( openai_aclient=openai_aclient, data=data, timeout=timeout ) stringified_response = response.model_dump() + logging_obj.post_call( input=data["messages"], api_key=api_key,