From 2de2b7988c0dc51d39951e1e1aead8f6d48f0d1b Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 09:29:12 -0800
Subject: [PATCH 01/17] fix(health.md): add rerank model health check
 information

---
 docs/my-website/docs/proxy/health.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/docs/my-website/docs/proxy/health.md b/docs/my-website/docs/proxy/health.md
index ffc66dde811c..585e2ff7505c 100644
--- a/docs/my-website/docs/proxy/health.md
+++ b/docs/my-website/docs/proxy/health.md
@@ -121,6 +121,20 @@ model_list:
       mode: audio_speech
 ```
 
+### Rerank Models 
+
+To run rerank health checks, specify the mode as "rerank" in your config for the relevant model.
+
+```yaml
+model_list:
+  - model_name: rerank-english-v3.0
+    litellm_params:
+      model: cohere/rerank-english-v3.0
+      api_key: os.environ/COHERE_API_KEY
+    model_info:
+      mode: rerank
+```
+
 ### Batch Models (Azure Only)
 
 For Azure models deployed as 'batch' models, set `mode: batch`. 

From abaea848f4686686fb9012cb491ecb30d85c4770 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 09:48:40 -0800
Subject: [PATCH 02/17] build(model_prices_and_context_window.json): add gemini
 2.0 for google ai studio - pricing + commercial rate limits

---
 ...odel_prices_and_context_window_backup.json | 34 +++++++++++++++++++
 model_prices_and_context_window.json          | 34 +++++++++++++++++++
 2 files changed, 68 insertions(+)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index a607bfbc6ffc..92f3e1c5bd0e 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -3098,6 +3098,40 @@
         "supports_response_schema": true,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
     },
+    "gemini/gemini-2.0-flash-exp": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 0,
+        "input_cost_per_video_per_second": 0,
+        "input_cost_per_audio_per_second": 0,
+        "input_cost_per_token": 0,
+        "input_cost_per_character": 0, 
+        "input_cost_per_token_above_128k_tokens": 0, 
+        "input_cost_per_character_above_128k_tokens": 0, 
+        "input_cost_per_image_above_128k_tokens": 0,
+        "input_cost_per_video_per_second_above_128k_tokens": 0,
+        "input_cost_per_audio_per_second_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_character": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_character_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "tpm": 4000000,
+        "rpm": 10,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
+    },
     "vertex_ai/claude-3-sonnet": {
         "max_tokens": 4096,
         "max_input_tokens": 200000,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index a607bfbc6ffc..92f3e1c5bd0e 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -3098,6 +3098,40 @@
         "supports_response_schema": true,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
     },
+    "gemini/gemini-2.0-flash-exp": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_image": 0,
+        "input_cost_per_video_per_second": 0,
+        "input_cost_per_audio_per_second": 0,
+        "input_cost_per_token": 0,
+        "input_cost_per_character": 0, 
+        "input_cost_per_token_above_128k_tokens": 0, 
+        "input_cost_per_character_above_128k_tokens": 0, 
+        "input_cost_per_image_above_128k_tokens": 0,
+        "input_cost_per_video_per_second_above_128k_tokens": 0,
+        "input_cost_per_audio_per_second_above_128k_tokens": 0,
+        "output_cost_per_token": 0,
+        "output_cost_per_character": 0,
+        "output_cost_per_token_above_128k_tokens": 0,
+        "output_cost_per_character_above_128k_tokens": 0,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "tpm": 4000000,
+        "rpm": 10,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
+    },
     "vertex_ai/claude-3-sonnet": {
         "max_tokens": 4096,
         "max_input_tokens": 200000,

From e6508ad66591b5a2f08306f4e255c65c015aca1e Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 09:50:34 -0800
Subject: [PATCH 03/17] build(model_prices_and_context_window.json): add
 gemini-2.0 supports audio output = true

---
 litellm/model_prices_and_context_window_backup.json | 2 ++
 model_prices_and_context_window.json                | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 92f3e1c5bd0e..d1be19cc1776 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -3096,6 +3096,7 @@
         "supports_function_calling": true,
         "supports_vision": true,
         "supports_response_schema": true,
+        "supports_audio_output": true,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
     },
     "gemini/gemini-2.0-flash-exp": {
@@ -3128,6 +3129,7 @@
         "supports_function_calling": true,
         "supports_vision": true,
         "supports_response_schema": true,
+        "supports_audio_output": true,
         "tpm": 4000000,
         "rpm": 10,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 92f3e1c5bd0e..d1be19cc1776 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -3096,6 +3096,7 @@
         "supports_function_calling": true,
         "supports_vision": true,
         "supports_response_schema": true,
+        "supports_audio_output": true,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
     },
     "gemini/gemini-2.0-flash-exp": {
@@ -3128,6 +3129,7 @@
         "supports_function_calling": true,
         "supports_vision": true,
         "supports_response_schema": true,
+        "supports_audio_output": true,
         "tpm": 4000000,
         "rpm": 10,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"

From 6ab132351f15e0fae505368b02a4d2386b682430 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 09:54:34 -0800
Subject: [PATCH 04/17] docs(team_model_add.md): clarify allowing teams to add
 models is an enterprise feature

---
 docs/my-website/docs/proxy/team_model_add.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/docs/my-website/docs/proxy/team_model_add.md b/docs/my-website/docs/proxy/team_model_add.md
index bb57801624a9..a8a6878fd590 100644
--- a/docs/my-website/docs/proxy/team_model_add.md
+++ b/docs/my-website/docs/proxy/team_model_add.md
@@ -1,4 +1,13 @@
-# Allow Teams to Add Models
+# ✨ Allow Teams to Add Models
+
+:::info
+
+This is an Enterprise feature.
+[Enterprise Pricing](https://www.litellm.ai/#pricing)
+
+[Contact us here to get a free trial](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
+
+:::
 
 Allow team to add a their own models/key for that project - so any OpenAI call they make uses their OpenAI key.
 

From 4a470ef12214c74e4bfbb9e781830410613bad2e Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 10:12:13 -0800
Subject: [PATCH 05/17] fix(o1_transformation.py): add support for 'n',
 'response_format' and 'stop' params for o1 and 'stream_options' param for
 o1-mini

---
 litellm/llms/openai/chat/o1_transformation.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py
index 115bb29b1da0..5a30c20d277b 100644
--- a/litellm/llms/openai/chat/o1_transformation.py
+++ b/litellm/llms/openai/chat/o1_transformation.py
@@ -44,15 +44,15 @@ def get_supported_openai_params(self, model: str) -> list:
             "function_call",
             "functions",
             "top_p",
-            "n",
             "presence_penalty",
             "frequency_penalty",
             "top_logprobs",
-            "response_format",
-            "stop",
-            "stream_options",
         ]
 
+        if "o1-mini" not in model:
+            non_supported_params.append("stream")
+            non_supported_params.append("stream_options")
+
         return [
             param for param in all_openai_params if param not in non_supported_params
         ]

From 01c3340d4809dc66c4cf4a7403a1126abab1072a Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 11:24:34 -0800
Subject: [PATCH 06/17] build(model_prices_and_context_window.json): add
 'supports_system_message' to supporting openai models

needed as o1-preview, and o1-mini models don't support 'system message
---
 ...odel_prices_and_context_window_backup.json | 138 ++++++++++++------
 model_prices_and_context_window.json          | 138 ++++++++++++------
 2 files changed, 184 insertions(+), 92 deletions(-)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index d1be19cc1776..27d750a6c56e 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -13,7 +13,8 @@
         "supports_audio_input": true, 
         "supports_audio_output": true,
         "supports_prompt_caching": true,
-        "supports_response_schema": true
+        "supports_response_schema": true,
+        "supports_system_messages": true
     },
     "gpt-4": {
         "max_tokens": 4096, 
@@ -24,7 +25,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o": {
         "max_tokens": 16384,
@@ -39,7 +41,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o-audio-preview": {
         "max_tokens": 16384,
@@ -54,7 +57,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-audio-preview-2024-10-01": {
         "max_tokens": 16384,
@@ -69,7 +73,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-mini-audio-preview-2024-12-17": {
         "max_tokens": 16384,
@@ -84,7 +89,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-mini": {
         "max_tokens": 16384,
@@ -99,7 +105,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o-mini-2024-07-18": {
         "max_tokens": 16384,
@@ -114,7 +121,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "o1": {
         "max_tokens": 100000,
@@ -128,7 +136,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": false,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "o1-mini": {
         "max_tokens": 65536,
@@ -198,7 +207,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": false,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "chatgpt-4o-latest": {
         "max_tokens": 4096,
@@ -211,7 +221,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o-2024-05-13": {
         "max_tokens": 4096,
@@ -224,7 +235,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o-2024-08-06": {
         "max_tokens": 16384,
@@ -239,7 +251,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o-2024-11-20": {
         "max_tokens": 16384,
@@ -254,7 +267,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o-realtime-preview-2024-10-01": {
         "max_tokens": 4096,
@@ -271,7 +285,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-realtime-preview": {
         "max_tokens": 4096,
@@ -287,7 +302,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-realtime-preview-2024-12-17": {
         "max_tokens": 4096,
@@ -303,7 +319,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-mini-realtime-preview": {
         "max_tokens": 4096,
@@ -320,7 +337,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-mini-realtime-preview-2024-12-17": {
         "max_tokens": 4096,
@@ -337,7 +355,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4-turbo-preview": {
         "max_tokens": 4096,
@@ -349,7 +368,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-0314": {
         "max_tokens": 4096,
@@ -359,7 +379,8 @@
         "output_cost_per_token": 0.00006,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-0613": {
         "max_tokens": 4096,
@@ -370,7 +391,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-32k": {
         "max_tokens": 4096,
@@ -380,7 +402,8 @@
         "output_cost_per_token": 0.00012,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-32k-0314": {
         "max_tokens": 4096,
@@ -390,7 +413,8 @@
         "output_cost_per_token": 0.00012,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-32k-0613": {
         "max_tokens": 4096,
@@ -400,7 +424,8 @@
         "output_cost_per_token": 0.00012,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-turbo": {
         "max_tokens": 4096,
@@ -413,7 +438,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-turbo-2024-04-09": {
         "max_tokens": 4096,
@@ -426,7 +452,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-1106-preview": {
         "max_tokens": 4096,
@@ -438,7 +465,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-0125-preview": {
         "max_tokens": 4096,
@@ -450,7 +478,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-vision-preview": {
         "max_tokens": 4096,
@@ -461,7 +490,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-1106-vision-preview": {
         "max_tokens": 4096,
@@ -472,7 +502,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo": {
         "max_tokens": 4097,
@@ -483,7 +514,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo-0301": {
         "max_tokens": 4097,
@@ -493,7 +525,8 @@
         "output_cost_per_token": 0.000002,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo-0613": {
         "max_tokens": 4097,
@@ -504,7 +537,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo-1106": {
         "max_tokens": 16385,
@@ -516,7 +550,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo-0125": {
         "max_tokens": 16385,
@@ -528,7 +563,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo-16k": {
         "max_tokens": 16385,
@@ -538,7 +574,8 @@
         "output_cost_per_token": 0.000004,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo-16k-0613": {
         "max_tokens": 16385,
@@ -548,7 +585,8 @@
         "output_cost_per_token": 0.000004,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "ft:gpt-3.5-turbo": {
         "max_tokens": 4096,
@@ -557,7 +595,8 @@
         "input_cost_per_token": 0.000003,
         "output_cost_per_token": 0.000006,
         "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_system_messages": true
     },
     "ft:gpt-3.5-turbo-0125": {
         "max_tokens": 4096,
@@ -566,7 +605,8 @@
         "input_cost_per_token": 0.000003,
         "output_cost_per_token": 0.000006,
         "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_system_messages": true
     },
     "ft:gpt-3.5-turbo-1106": {
         "max_tokens": 4096,
@@ -575,7 +615,8 @@
         "input_cost_per_token": 0.000003,
         "output_cost_per_token": 0.000006,
         "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_system_messages": true
     },
     "ft:gpt-3.5-turbo-0613": {
         "max_tokens": 4096,
@@ -584,7 +625,8 @@
         "input_cost_per_token": 0.000003,
         "output_cost_per_token": 0.000006,
         "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_system_messages": true
     },
     "ft:gpt-4-0613": {
         "max_tokens": 4096,
@@ -595,7 +637,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_function_calling": true,
-        "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing"
+        "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing",
+        "supports_system_messages": true
     },
     "ft:gpt-4o-2024-08-06": {
         "max_tokens": 16384,
@@ -608,7 +651,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
-        "supports_vision": true
+        "supports_vision": true,
+        "supports_system_messages": true
     },
     "ft:gpt-4o-2024-11-20": {
         "max_tokens": 16384,
@@ -623,7 +667,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "ft:gpt-4o-mini-2024-07-18": {
         "max_tokens": 16384,
@@ -638,7 +683,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "ft:davinci-002": {
         "max_tokens": 16384,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index d1be19cc1776..27d750a6c56e 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -13,7 +13,8 @@
         "supports_audio_input": true, 
         "supports_audio_output": true,
         "supports_prompt_caching": true,
-        "supports_response_schema": true
+        "supports_response_schema": true,
+        "supports_system_messages": true
     },
     "gpt-4": {
         "max_tokens": 4096, 
@@ -24,7 +25,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o": {
         "max_tokens": 16384,
@@ -39,7 +41,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o-audio-preview": {
         "max_tokens": 16384,
@@ -54,7 +57,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-audio-preview-2024-10-01": {
         "max_tokens": 16384,
@@ -69,7 +73,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-mini-audio-preview-2024-12-17": {
         "max_tokens": 16384,
@@ -84,7 +89,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-mini": {
         "max_tokens": 16384,
@@ -99,7 +105,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o-mini-2024-07-18": {
         "max_tokens": 16384,
@@ -114,7 +121,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "o1": {
         "max_tokens": 100000,
@@ -128,7 +136,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": false,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "o1-mini": {
         "max_tokens": 65536,
@@ -198,7 +207,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": false,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "chatgpt-4o-latest": {
         "max_tokens": 4096,
@@ -211,7 +221,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o-2024-05-13": {
         "max_tokens": 4096,
@@ -224,7 +235,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o-2024-08-06": {
         "max_tokens": 16384,
@@ -239,7 +251,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o-2024-11-20": {
         "max_tokens": 16384,
@@ -254,7 +267,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4o-realtime-preview-2024-10-01": {
         "max_tokens": 4096,
@@ -271,7 +285,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-realtime-preview": {
         "max_tokens": 4096,
@@ -287,7 +302,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-realtime-preview-2024-12-17": {
         "max_tokens": 4096,
@@ -303,7 +319,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-mini-realtime-preview": {
         "max_tokens": 4096,
@@ -320,7 +337,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4o-mini-realtime-preview-2024-12-17": {
         "max_tokens": 4096,
@@ -337,7 +355,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_audio_input": true,
-        "supports_audio_output": true
+        "supports_audio_output": true,
+        "supports_system_messages": true
     },
     "gpt-4-turbo-preview": {
         "max_tokens": 4096,
@@ -349,7 +368,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-0314": {
         "max_tokens": 4096,
@@ -359,7 +379,8 @@
         "output_cost_per_token": 0.00006,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-0613": {
         "max_tokens": 4096,
@@ -370,7 +391,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-32k": {
         "max_tokens": 4096,
@@ -380,7 +402,8 @@
         "output_cost_per_token": 0.00012,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-32k-0314": {
         "max_tokens": 4096,
@@ -390,7 +413,8 @@
         "output_cost_per_token": 0.00012,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-32k-0613": {
         "max_tokens": 4096,
@@ -400,7 +424,8 @@
         "output_cost_per_token": 0.00012,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-turbo": {
         "max_tokens": 4096,
@@ -413,7 +438,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-turbo-2024-04-09": {
         "max_tokens": 4096,
@@ -426,7 +452,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-1106-preview": {
         "max_tokens": 4096,
@@ -438,7 +465,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-0125-preview": {
         "max_tokens": 4096,
@@ -450,7 +478,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-vision-preview": {
         "max_tokens": 4096,
@@ -461,7 +490,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-4-1106-vision-preview": {
         "max_tokens": 4096,
@@ -472,7 +502,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo": {
         "max_tokens": 4097,
@@ -483,7 +514,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo-0301": {
         "max_tokens": 4097,
@@ -493,7 +525,8 @@
         "output_cost_per_token": 0.000002,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo-0613": {
         "max_tokens": 4097,
@@ -504,7 +537,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo-1106": {
         "max_tokens": 16385,
@@ -516,7 +550,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo-0125": {
         "max_tokens": 16385,
@@ -528,7 +563,8 @@
         "mode": "chat",
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo-16k": {
         "max_tokens": 16385,
@@ -538,7 +574,8 @@
         "output_cost_per_token": 0.000004,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "gpt-3.5-turbo-16k-0613": {
         "max_tokens": 16385,
@@ -548,7 +585,8 @@
         "output_cost_per_token": 0.000004,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "ft:gpt-3.5-turbo": {
         "max_tokens": 4096,
@@ -557,7 +595,8 @@
         "input_cost_per_token": 0.000003,
         "output_cost_per_token": 0.000006,
         "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_system_messages": true
     },
     "ft:gpt-3.5-turbo-0125": {
         "max_tokens": 4096,
@@ -566,7 +605,8 @@
         "input_cost_per_token": 0.000003,
         "output_cost_per_token": 0.000006,
         "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_system_messages": true
     },
     "ft:gpt-3.5-turbo-1106": {
         "max_tokens": 4096,
@@ -575,7 +615,8 @@
         "input_cost_per_token": 0.000003,
         "output_cost_per_token": 0.000006,
         "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_system_messages": true
     },
     "ft:gpt-3.5-turbo-0613": {
         "max_tokens": 4096,
@@ -584,7 +625,8 @@
         "input_cost_per_token": 0.000003,
         "output_cost_per_token": 0.000006,
         "litellm_provider": "openai",
-        "mode": "chat"
+        "mode": "chat",
+        "supports_system_messages": true
     },
     "ft:gpt-4-0613": {
         "max_tokens": 4096,
@@ -595,7 +637,8 @@
         "litellm_provider": "openai",
         "mode": "chat",
         "supports_function_calling": true,
-        "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing"
+        "source": "OpenAI needs to add pricing for this ft model, will be updated when added by OpenAI. Defaulting to base model pricing",
+        "supports_system_messages": true
     },
     "ft:gpt-4o-2024-08-06": {
         "max_tokens": 16384,
@@ -608,7 +651,8 @@
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
-        "supports_vision": true
+        "supports_vision": true,
+        "supports_system_messages": true
     },
     "ft:gpt-4o-2024-11-20": {
         "max_tokens": 16384,
@@ -623,7 +667,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "ft:gpt-4o-mini-2024-07-18": {
         "max_tokens": 16384,
@@ -638,7 +683,8 @@
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_prompt_caching": true
+        "supports_prompt_caching": true,
+        "supports_system_messages": true
     },
     "ft:davinci-002": {
         "max_tokens": 16384,

From e6e368675b1b1826323e01f410397a817fcc8230 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 13:33:25 -0800
Subject: [PATCH 07/17] fix(o1_transformation.py): translate system message
 based on if o1 model supports it

---
 .../llms/openai/chat/gpt_transformation.py    |  2 +-
 litellm/llms/openai/chat/o1_transformation.py |  7 +++---
 litellm/llms/openai/openai.py                 |  6 +++--
 litellm/utils.py                              | 16 +++++---------
 tests/llm_translation/test_openai_o1.py       | 22 ++++++++++++++-----
 5 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/litellm/llms/openai/chat/gpt_transformation.py b/litellm/llms/openai/chat/gpt_transformation.py
index c6e63edb8c58..01bd720ba461 100644
--- a/litellm/llms/openai/chat/gpt_transformation.py
+++ b/litellm/llms/openai/chat/gpt_transformation.py
@@ -164,7 +164,7 @@ def map_openai_params(
         )
 
     def _transform_messages(
-        self, messages: List[AllMessageValues]
+        self, messages: List[AllMessageValues], model: str
     ) -> List[AllMessageValues]:
         return messages
 
diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py
index 5a30c20d277b..f0ec262763e2 100644
--- a/litellm/llms/openai/chat/o1_transformation.py
+++ b/litellm/llms/openai/chat/o1_transformation.py
@@ -16,6 +16,7 @@
 
 import litellm
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
+from litellm.utils import supports_system_messages
 
 from .gpt_transformation import OpenAIGPTConfig
 
@@ -95,16 +96,16 @@ def is_model_o1_reasoning_model(self, model: str) -> bool:
         return False
 
     def _transform_messages(
-        self, messages: List[AllMessageValues]
+        self, messages: List[AllMessageValues], model: str
     ) -> List[AllMessageValues]:
         """
         Handles limitations of O-1 model family.
         - modalities: image => drop param (if user opts in to dropping param)
         - role: system ==> translate to role 'user'
         """
-
+        _supports_system_messages = supports_system_messages(model, "openai")
         for i, message in enumerate(messages):
-            if message["role"] == "system":
+            if message["role"] == "system" and not _supports_system_messages:
                 new_message = ChatCompletionUserMessage(
                     content=message["content"], role="user"
                 )
diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py
index ffac461f385c..62b193e2dbd8 100644
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@@ -198,7 +198,7 @@ def _map_openai_params(
         return optional_params
 
     def _transform_messages(
-        self, messages: List[AllMessageValues]
+        self, messages: List[AllMessageValues], model: str
     ) -> List[AllMessageValues]:
         return messages
 
@@ -456,7 +456,9 @@ def completion(  # type: ignore # noqa: PLR0915
                 if isinstance(provider_config, OpenAIGPTConfig) or isinstance(
                     provider_config, OpenAIConfig
                 ):
-                    messages = provider_config._transform_messages(messages)
+                    messages = provider_config._transform_messages(
+                        messages=messages, model=model
+                    )
 
             for _ in range(
                 2
diff --git a/litellm/utils.py b/litellm/utils.py
index 8baafe21ed7b..7a3fa7eaffcf 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1645,17 +1645,11 @@ def supports_system_messages(model: str, custom_llm_provider: Optional[str]) ->
     Raises:
     Exception: If the given model is not found in model_prices_and_context_window.json.
     """
-    try:
-        model_info = litellm.get_model_info(
-            model=model, custom_llm_provider=custom_llm_provider
-        )
-        if model_info.get("supports_system_messages", False) is True:
-            return True
-        return False
-    except Exception:
-        raise Exception(
-            f"Model not supports system messages. You passed model={model}, custom_llm_provider={custom_llm_provider}."
-        )
+    return _supports_factory(
+        model=model,
+        custom_llm_provider=custom_llm_provider,
+        key="supports_system_messages",
+    )
 
 
 def supports_response_schema(model: str, custom_llm_provider: Optional[str]) -> bool:
diff --git a/tests/llm_translation/test_openai_o1.py b/tests/llm_translation/test_openai_o1.py
index 2bb82c6a28da..32948a6042cb 100644
--- a/tests/llm_translation/test_openai_o1.py
+++ b/tests/llm_translation/test_openai_o1.py
@@ -17,14 +17,19 @@
 from litellm import Choices, Message, ModelResponse
 
 
+@pytest.mark.parametrize("model", ["o1-preview", "o1-mini", "o1"])
 @pytest.mark.asyncio
-async def test_o1_handle_system_role():
+async def test_o1_handle_system_role(model):
     """
     Tests that:
     - max_tokens is translated to 'max_completion_tokens'
     - role 'system' is translated to 'user'
     """
     from openai import AsyncOpenAI
+    from litellm.utils import supports_system_messages
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
 
     litellm.set_verbose = True
 
@@ -35,9 +40,9 @@ async def test_o1_handle_system_role():
     ) as mock_client:
         try:
             await litellm.acompletion(
-                model="o1-preview",
+                model=model,
                 max_tokens=10,
-                messages=[{"role": "system", "content": "Hello!"}],
+                messages=[{"role": "system", "content": "Be a good bot!"}],
                 client=client,
             )
         except Exception as e:
@@ -48,9 +53,16 @@ async def test_o1_handle_system_role():
 
         print("request_body: ", request_body)
 
-        assert request_body["model"] == "o1-preview"
+        assert request_body["model"] == model
         assert request_body["max_completion_tokens"] == 10
-        assert request_body["messages"] == [{"role": "user", "content": "Hello!"}]
+        if supports_system_messages(model, "openai"):
+            assert request_body["messages"] == [
+                {"role": "system", "content": "Be a good bot!"}
+            ]
+        else:
+            assert request_body["messages"] == [
+                {"role": "user", "content": "Be a good bot!"}
+            ]
 
 
 @pytest.mark.asyncio

From 9a0d6db9377ee6cafd4df176f7ff871e12ef408c Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 13:57:16 -0800
Subject: [PATCH 08/17] fix(o1_transformation.py): return 'stream' param
 support if o1-mini/o1-preview

o1 currently doesn't support streaming, but the other model versions do

Fixes https://github.com/BerriAI/litellm/issues/7292
---
 litellm/llms/openai/chat/o1_transformation.py |  4 +-
 tests/llm_translation/test_openai_o1.py       | 55 +++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py
index f0ec262763e2..8572ef54f25b 100644
--- a/litellm/llms/openai/chat/o1_transformation.py
+++ b/litellm/llms/openai/chat/o1_transformation.py
@@ -50,7 +50,9 @@ def get_supported_openai_params(self, model: str) -> list:
             "top_logprobs",
         ]
 
-        if "o1-mini" not in model:
+        supported_streaming_models = ["o1-preview", "o1-mini"]
+
+        if model not in supported_streaming_models:
             non_supported_params.append("stream")
             non_supported_params.append("stream_options")
 
diff --git a/tests/llm_translation/test_openai_o1.py b/tests/llm_translation/test_openai_o1.py
index 32948a6042cb..9f46003461c2 100644
--- a/tests/llm_translation/test_openai_o1.py
+++ b/tests/llm_translation/test_openai_o1.py
@@ -65,6 +65,61 @@ async def test_o1_handle_system_role(model):
             ]
 
 
+@pytest.mark.parametrize(
+    "model, expected_streaming_support",
+    [("o1-preview", True), ("o1-mini", True), ("o1", False)],
+)
+@pytest.mark.asyncio
+async def test_o1_handle_streaming_optional_params(model, expected_streaming_support):
+    """
+    Tests that:
+    - max_tokens is translated to 'max_completion_tokens'
+    - role 'system' is translated to 'user'
+    """
+    from openai import AsyncOpenAI
+    from litellm.utils import ProviderConfigManager
+    from litellm.types.utils import LlmProviders
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    config = ProviderConfigManager.get_provider_chat_config(
+        model=model, provider=LlmProviders.OPENAI
+    )
+
+    supported_params = config.get_supported_openai_params(model=model)
+
+    assert expected_streaming_support == ("stream" in supported_params)
+
+
+# @pytest.mark.parametrize(
+#     "model",
+#     ["o1"],  # "o1-preview", "o1-mini",
+# )
+# @pytest.mark.asyncio
+# async def test_o1_handle_streaming_e2e(model):
+#     """
+#     Tests that:
+#     - max_tokens is translated to 'max_completion_tokens'
+#     - role 'system' is translated to 'user'
+#     """
+#     from openai import AsyncOpenAI
+#     from litellm.utils import ProviderConfigManager
+#     from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
+#     from litellm.types.utils import LlmProviders
+
+#     resp = litellm.completion(
+#         model=model,
+#         messages=[{"role": "user", "content": "Hello!"}],
+#         stream=True,
+#     )
+#     assert isinstance(resp, CustomStreamWrapper)
+#     for chunk in resp:
+#         print("chunk: ", chunk)
+
+#     assert True
+
+
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model", ["gpt-4", "gpt-4-0314", "gpt-4-32k", "o1-preview"])
 async def test_o1_max_completion_tokens(model: str):

From 0715ccc59f026dacf96054a13e87ee77803f1dd1 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 14:31:52 -0800
Subject: [PATCH 09/17] fix(o1_transformation.py): return tool
 calling/response_format in supported params if model map says so

Fixes https://github.com/BerriAI/litellm/issues/7292
---
 .../llms/ollama/completion/transformation.py  |   6 +-
 litellm/llms/openai/chat/o1_transformation.py |  26 +-
 ...odel_prices_and_context_window_backup.json |  14 +-
 litellm/types/utils.py                        |  15 +-
 litellm/utils.py                              | 256 ++++++++----------
 model_prices_and_context_window.json          |  14 +-
 tests/llm_translation/test_openai_o1.py       |  29 ++
 7 files changed, 188 insertions(+), 172 deletions(-)

diff --git a/litellm/llms/ollama/completion/transformation.py b/litellm/llms/ollama/completion/transformation.py
index 3ba3d29587f3..d9cdff20d42b 100644
--- a/litellm/llms/ollama/completion/transformation.py
+++ b/litellm/llms/ollama/completion/transformation.py
@@ -23,6 +23,7 @@
 from litellm.types.utils import (
     GenericStreamingChunk,
     ModelInfo,
+    ModelInfoBase,
     ModelResponse,
     ProviderField,
     StreamingChoices,
@@ -198,7 +199,7 @@ def _get_max_tokens(self, ollama_model_info: dict) -> Optional[int]:
                 return v
         return None
 
-    def get_model_info(self, model: str) -> ModelInfo:
+    def get_model_info(self, model: str) -> ModelInfoBase:
         """
         curl http://localhost:11434/api/show -d '{
           "name": "mistral"
@@ -222,11 +223,10 @@ def get_model_info(self, model: str) -> ModelInfo:
 
         _max_tokens: Optional[int] = self._get_max_tokens(model_info)
 
-        return ModelInfo(
+        return ModelInfoBase(
             key=model,
             litellm_provider="ollama",
             mode="chat",
-            supported_openai_params=self.get_supported_openai_params(model=model),
             supports_function_calling=self._supports_function_calling(model_info),
             input_cost_per_token=0.0,
             output_cost_per_token=0.0,
diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py
index 8572ef54f25b..97899d67fe04 100644
--- a/litellm/llms/openai/chat/o1_transformation.py
+++ b/litellm/llms/openai/chat/o1_transformation.py
@@ -16,7 +16,11 @@
 
 import litellm
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
-from litellm.utils import supports_system_messages
+from litellm.utils import (
+    supports_function_calling,
+    supports_response_schema,
+    supports_system_messages,
+)
 
 from .gpt_transformation import OpenAIGPTConfig
 
@@ -39,11 +43,6 @@ def get_supported_openai_params(self, model: str) -> list:
         all_openai_params = super().get_supported_openai_params(model=model)
         non_supported_params = [
             "logprobs",
-            "tools",
-            "tool_choice",
-            "parallel_tool_calls",
-            "function_call",
-            "functions",
             "top_p",
             "presence_penalty",
             "frequency_penalty",
@@ -51,14 +50,27 @@ def get_supported_openai_params(self, model: str) -> list:
         ]
 
         supported_streaming_models = ["o1-preview", "o1-mini"]
+        _supports_function_calling = supports_function_calling(model, "openai")
+        _supports_response_schema = supports_response_schema(model, "openai")
 
         if model not in supported_streaming_models:
             non_supported_params.append("stream")
             non_supported_params.append("stream_options")
 
-        return [
+        if not _supports_function_calling:
+            non_supported_params.append("tools")
+            non_supported_params.append("tool_choice")
+            non_supported_params.append("parallel_tool_calls")
+            non_supported_params.append("function_call")
+            non_supported_params.append("functions")
+
+        if not _supports_response_schema:
+            non_supported_params.append("response_format")
+
+        returned_params = [
             param for param in all_openai_params if param not in non_supported_params
         ]
+        return returned_params
 
     def map_openai_params(
         self,
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 27d750a6c56e..984e5d940da2 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -137,7 +137,8 @@
         "supports_parallel_function_calling": true,
         "supports_vision": false,
         "supports_prompt_caching": true,
-        "supports_system_messages": true
+        "supports_system_messages": true,
+        "supports_response_schema": true
     },
     "o1-mini": {
         "max_tokens": 65536,
@@ -148,8 +149,6 @@
         "cache_read_input_token_cost": 0.0000015,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_function_calling": true,
-        "supports_parallel_function_calling": true,
         "supports_vision": false,
         "supports_prompt_caching": true
     },
@@ -162,8 +161,6 @@
         "cache_read_input_token_cost": 0.0000015,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_function_calling": true,
-        "supports_parallel_function_calling": true,
         "supports_vision": false,
         "supports_prompt_caching": true
     },
@@ -176,8 +173,6 @@
         "cache_read_input_token_cost": 0.0000075,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_function_calling": true,
-        "supports_parallel_function_calling": true,
         "supports_vision": false,
         "supports_prompt_caching": true
     },
@@ -190,8 +185,6 @@
         "cache_read_input_token_cost": 0.0000075,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_function_calling": true,
-        "supports_parallel_function_calling": true,
         "supports_vision": false,
         "supports_prompt_caching": true
     },
@@ -208,7 +201,8 @@
         "supports_parallel_function_calling": true,
         "supports_vision": false,
         "supports_prompt_caching": true,
-        "supports_system_messages": true
+        "supports_system_messages": true,
+        "supports_response_schema": true
     },
     "chatgpt-4o-latest": {
         "max_tokens": 4096,
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index d4b6c789a409..ca28b15b71aa 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -74,11 +74,7 @@ class ProviderField(TypedDict):
     field_value: str
 
 
-class ModelInfo(TypedDict, total=False):
-    """
-    Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
-    """
-
+class ModelInfoBase(TypedDict, total=False):
     key: Required[str]  # the key in litellm.model_cost which is returned
 
     max_tokens: Required[Optional[int]]
@@ -119,7 +115,6 @@ class ModelInfo(TypedDict, total=False):
             "completion", "embedding", "image_generation", "chat", "audio_transcription"
         ]
     ]
-    supported_openai_params: Required[Optional[List[str]]]
     supports_system_messages: Optional[bool]
     supports_response_schema: Optional[bool]
     supports_vision: Optional[bool]
@@ -133,6 +128,14 @@ class ModelInfo(TypedDict, total=False):
     rpm: Optional[int]
 
 
+class ModelInfo(ModelInfoBase, total=False):
+    """
+    Model info for a given model, this is information found in litellm.model_prices_and_context_window.json
+    """
+
+    supported_openai_params: Required[Optional[List[str]]]
+
+
 class GenericStreamingChunk(TypedDict, total=False):
     text: Required[str]
     tool_use: Optional[ChatCompletionToolCallChunk]
diff --git a/litellm/utils.py b/litellm/utils.py
index 7a3fa7eaffcf..a0666267c959 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -132,6 +132,7 @@
     LlmProviders,
     Message,
     ModelInfo,
+    ModelInfoBase,
     ModelResponse,
     ModelResponseStream,
     ProviderField,
@@ -1678,25 +1679,11 @@ def supports_response_schema(model: str, custom_llm_provider: Optional[str]) ->
 
     if custom_llm_provider in PROVIDERS_GLOBALLY_SUPPORT_RESPONSE_SCHEMA:
         return True
-    try:
-        ## GET MODEL INFO
-        model_info = litellm.get_model_info(
-            model=model, custom_llm_provider=custom_llm_provider
-        )
-
-        if model_info.get("supports_response_schema", False) is True:
-            return True
-    except Exception:
-        ## check if provider supports response schema globally
-        supported_params = get_supported_openai_params(
-            model=model,
-            custom_llm_provider=custom_llm_provider,
-            request_type="chat_completion",
-        )
-        if supported_params is not None and "response_schema" in supported_params:
-            return True
-
-    return False
+    return _supports_factory(
+        model=model,
+        custom_llm_provider=custom_llm_provider,
+        key="supports_response_schema",
+    )
 
 
 def supports_function_calling(
@@ -1715,23 +1702,11 @@ def supports_function_calling(
     Raises:
     Exception: If the given model is not found or there's an error in retrieval.
     """
-    try:
-        model, custom_llm_provider, _, _ = litellm.get_llm_provider(
-            model=model, custom_llm_provider=custom_llm_provider
-        )
-
-        ## CHECK IF MODEL SUPPORTS FUNCTION CALLING ##
-        model_info = litellm.get_model_info(
-            model=model, custom_llm_provider=custom_llm_provider
-        )
-
-        if model_info.get("supports_function_calling", False) is True:
-            return True
-        return False
-    except Exception as e:
-        raise Exception(
-            f"Model not found or error in checking function calling support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
-        )
+    return _supports_factory(
+        model=model,
+        custom_llm_provider=custom_llm_provider,
+        key="supports_function_calling",
+    )
 
 
 def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str) -> bool:
@@ -1753,7 +1728,7 @@ def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str)
             model=model, custom_llm_provider=custom_llm_provider
         )
 
-        model_info = litellm.get_model_info(
+        model_info = _get_model_info_helper(
             model=model, custom_llm_provider=custom_llm_provider
         )
 
@@ -4190,99 +4165,35 @@ def _get_potential_model_names(
     )
 
 
-def get_model_info(  # noqa: PLR0915
-    model: str, custom_llm_provider: Optional[str] = None
-) -> ModelInfo:
-    """
-    Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token  for a given model.
-
-    Parameters:
-    - model (str): The name of the model.
-    - custom_llm_provider (str | null): the provider used for the model. If provided, used to check if the litellm model info is for that provider.
-
-    Returns:
-        dict: A dictionary containing the following information:
-            key: Required[str] # the key in litellm.model_cost which is returned
-            max_tokens: Required[Optional[int]]
-            max_input_tokens: Required[Optional[int]]
-            max_output_tokens: Required[Optional[int]]
-            input_cost_per_token: Required[float]
-            input_cost_per_character: Optional[float]  # only for vertex ai models
-            input_cost_per_token_above_128k_tokens: Optional[float]  # only for vertex ai models
-            input_cost_per_character_above_128k_tokens: Optional[
-                float
-            ]  # only for vertex ai models
-            input_cost_per_query: Optional[float] # only for rerank models
-            input_cost_per_image: Optional[float]  # only for vertex ai models
-            input_cost_per_audio_token: Optional[float]
-            input_cost_per_audio_per_second: Optional[float]  # only for vertex ai models
-            input_cost_per_video_per_second: Optional[float]  # only for vertex ai models
-            output_cost_per_token: Required[float]
-            output_cost_per_audio_token: Optional[float]
-            output_cost_per_character: Optional[float]  # only for vertex ai models
-            output_cost_per_token_above_128k_tokens: Optional[
-                float
-            ]  # only for vertex ai models
-            output_cost_per_character_above_128k_tokens: Optional[
-                float
-            ]  # only for vertex ai models
-            output_cost_per_image: Optional[float]
-            output_vector_size: Optional[int]
-            output_cost_per_video_per_second: Optional[float]  # only for vertex ai models
-            output_cost_per_audio_per_second: Optional[float]  # only for vertex ai models
-            litellm_provider: Required[str]
-            mode: Required[
-                Literal[
-                    "completion", "embedding", "image_generation", "chat", "audio_transcription"
-                ]
-            ]
-            supported_openai_params: Required[Optional[List[str]]]
-            supports_system_messages: Optional[bool]
-            supports_response_schema: Optional[bool]
-            supports_vision: Optional[bool]
-            supports_function_calling: Optional[bool]
-            supports_prompt_caching: Optional[bool]
-            supports_audio_input: Optional[bool]
-            supports_audio_output: Optional[bool]
-            supports_pdf_input: Optional[bool]
-    Raises:
-        Exception: If the model is not mapped yet.
-
-    Example:
-        >>> get_model_info("gpt-4")
-        {
-            "max_tokens": 8192,
-            "input_cost_per_token": 0.00003,
-            "output_cost_per_token": 0.00006,
-            "litellm_provider": "openai",
-            "mode": "chat",
-            "supported_openai_params": ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]
-        }
-    """
-    supported_openai_params: Union[List[str], None] = []
-
-    def _get_max_position_embeddings(model_name):
-        # Construct the URL for the config.json file
-        config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
+def _get_max_position_embeddings(model_name: str) -> Optional[int]:
+    # Construct the URL for the config.json file
+    config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
 
-        try:
-            # Make the HTTP request to get the raw JSON file
-            response = litellm.module_level_client.get(config_url)
-            response.raise_for_status()  # Raise an exception for bad responses (4xx or 5xx)
+    try:
+        # Make the HTTP request to get the raw JSON file
+        response = litellm.module_level_client.get(config_url)
+        response.raise_for_status()  # Raise an exception for bad responses (4xx or 5xx)
 
-            # Parse the JSON response
-            config_json = response.json()
+        # Parse the JSON response
+        config_json = response.json()
 
-            # Extract and return the max_position_embeddings
-            max_position_embeddings = config_json.get("max_position_embeddings")
+        # Extract and return the max_position_embeddings
+        max_position_embeddings = config_json.get("max_position_embeddings")
 
-            if max_position_embeddings is not None:
-                return max_position_embeddings
-            else:
-                return None
-        except Exception:
+        if max_position_embeddings is not None:
+            return max_position_embeddings
+        else:
             return None
+    except Exception:
+        return None
+
 
+def _get_model_info_helper(  # noqa: PLR0915
+    model: str, custom_llm_provider: Optional[str] = None
+) -> ModelInfoBase:
+    """
+    Helper for 'get_model_info'. Separated out to avoid infinite loop caused by returning 'supported_openai_param's
+    """
     try:
         azure_llms = {**litellm.azure_llms, **litellm.azure_embedding_models}
         if model in azure_llms:
@@ -4308,12 +4219,9 @@ def _get_max_position_embeddings(model_name):
         split_model = potential_model_names["split_model"]
         custom_llm_provider = potential_model_names["custom_llm_provider"]
         #########################
-        supported_openai_params = litellm.get_supported_openai_params(
-            model=model, custom_llm_provider=custom_llm_provider
-        )
         if custom_llm_provider == "huggingface":
             max_tokens = _get_max_position_embeddings(model_name=model)
-            return ModelInfo(
+            return ModelInfoBase(
                 key=model,
                 max_tokens=max_tokens,  # type: ignore
                 max_input_tokens=None,
@@ -4322,7 +4230,6 @@ def _get_max_position_embeddings(model_name):
                 output_cost_per_token=0,
                 litellm_provider="huggingface",
                 mode="chat",
-                supported_openai_params=supported_openai_params,
                 supports_system_messages=None,
                 supports_response_schema=None,
                 supports_function_calling=None,
@@ -4347,7 +4254,6 @@ def _get_max_position_embeddings(model_name):
             if combined_model_name in litellm.model_cost:
                 key = combined_model_name
                 _model_info = _get_model_info_from_model_cost(key=key)
-                _model_info["supported_openai_params"] = supported_openai_params
                 if not _check_provider_match(
                     model_info=_model_info, custom_llm_provider=custom_llm_provider
                 ):
@@ -4355,7 +4261,6 @@ def _get_max_position_embeddings(model_name):
             if _model_info is None and model in litellm.model_cost:
                 key = model
                 _model_info = _get_model_info_from_model_cost(key=key)
-                _model_info["supported_openai_params"] = supported_openai_params
                 if not _check_provider_match(
                     model_info=_model_info, custom_llm_provider=custom_llm_provider
                 ):
@@ -4366,7 +4271,6 @@ def _get_max_position_embeddings(model_name):
             ):
                 key = combined_stripped_model_name
                 _model_info = _get_model_info_from_model_cost(key=key)
-                _model_info["supported_openai_params"] = supported_openai_params
                 if not _check_provider_match(
                     model_info=_model_info, custom_llm_provider=custom_llm_provider
                 ):
@@ -4374,7 +4278,6 @@ def _get_max_position_embeddings(model_name):
             if _model_info is None and stripped_model_name in litellm.model_cost:
                 key = stripped_model_name
                 _model_info = _get_model_info_from_model_cost(key=key)
-                _model_info["supported_openai_params"] = supported_openai_params
                 if not _check_provider_match(
                     model_info=_model_info, custom_llm_provider=custom_llm_provider
                 ):
@@ -4382,7 +4285,6 @@ def _get_max_position_embeddings(model_name):
             if _model_info is None and split_model in litellm.model_cost:
                 key = split_model
                 _model_info = _get_model_info_from_model_cost(key=key)
-                _model_info["supported_openai_params"] = supported_openai_params
                 if not _check_provider_match(
                     model_info=_model_info, custom_llm_provider=custom_llm_provider
                 ):
@@ -4420,7 +4322,7 @@ def _get_max_position_embeddings(model_name):
                 )
                 _output_cost_per_token = 0
 
-            return ModelInfo(
+            return ModelInfoBase(
                 key=key,
                 max_tokens=_model_info.get("max_tokens", None),
                 max_input_tokens=_model_info.get("max_input_tokens", None),
@@ -4463,7 +4365,6 @@ def _get_max_position_embeddings(model_name):
                     "litellm_provider", custom_llm_provider
                 ),
                 mode=_model_info.get("mode"),  # type: ignore
-                supported_openai_params=supported_openai_params,
                 supports_system_messages=_model_info.get(
                     "supports_system_messages", None
                 ),
@@ -4496,6 +4397,89 @@ def _get_max_position_embeddings(model_name):
         )
 
 
+def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> ModelInfo:
+    """
+    Get a dict for the maximum tokens (context window), input_cost_per_token, output_cost_per_token  for a given model.
+
+    Parameters:
+    - model (str): The name of the model.
+    - custom_llm_provider (str | null): the provider used for the model. If provided, used to check if the litellm model info is for that provider.
+
+    Returns:
+        dict: A dictionary containing the following information:
+            key: Required[str] # the key in litellm.model_cost which is returned
+            max_tokens: Required[Optional[int]]
+            max_input_tokens: Required[Optional[int]]
+            max_output_tokens: Required[Optional[int]]
+            input_cost_per_token: Required[float]
+            input_cost_per_character: Optional[float]  # only for vertex ai models
+            input_cost_per_token_above_128k_tokens: Optional[float]  # only for vertex ai models
+            input_cost_per_character_above_128k_tokens: Optional[
+                float
+            ]  # only for vertex ai models
+            input_cost_per_query: Optional[float] # only for rerank models
+            input_cost_per_image: Optional[float]  # only for vertex ai models
+            input_cost_per_audio_token: Optional[float]
+            input_cost_per_audio_per_second: Optional[float]  # only for vertex ai models
+            input_cost_per_video_per_second: Optional[float]  # only for vertex ai models
+            output_cost_per_token: Required[float]
+            output_cost_per_audio_token: Optional[float]
+            output_cost_per_character: Optional[float]  # only for vertex ai models
+            output_cost_per_token_above_128k_tokens: Optional[
+                float
+            ]  # only for vertex ai models
+            output_cost_per_character_above_128k_tokens: Optional[
+                float
+            ]  # only for vertex ai models
+            output_cost_per_image: Optional[float]
+            output_vector_size: Optional[int]
+            output_cost_per_video_per_second: Optional[float]  # only for vertex ai models
+            output_cost_per_audio_per_second: Optional[float]  # only for vertex ai models
+            litellm_provider: Required[str]
+            mode: Required[
+                Literal[
+                    "completion", "embedding", "image_generation", "chat", "audio_transcription"
+                ]
+            ]
+            supported_openai_params: Required[Optional[List[str]]]
+            supports_system_messages: Optional[bool]
+            supports_response_schema: Optional[bool]
+            supports_vision: Optional[bool]
+            supports_function_calling: Optional[bool]
+            supports_prompt_caching: Optional[bool]
+            supports_audio_input: Optional[bool]
+            supports_audio_output: Optional[bool]
+            supports_pdf_input: Optional[bool]
+    Raises:
+        Exception: If the model is not mapped yet.
+
+    Example:
+        >>> get_model_info("gpt-4")
+        {
+            "max_tokens": 8192,
+            "input_cost_per_token": 0.00003,
+            "output_cost_per_token": 0.00006,
+            "litellm_provider": "openai",
+            "mode": "chat",
+            "supported_openai_params": ["temperature", "max_tokens", "top_p", "frequency_penalty", "presence_penalty"]
+        }
+    """
+    supported_openai_params = litellm.get_supported_openai_params(
+        model=model, custom_llm_provider=custom_llm_provider
+    )
+
+    _model_info = _get_model_info_helper(
+        model=model,
+        custom_llm_provider=custom_llm_provider,
+    )
+
+    returned_model_info = ModelInfo(
+        **_model_info, supported_openai_params=supported_openai_params
+    )
+
+    return returned_model_info
+
+
 def json_schema_type(python_type_name: str):
     """Converts standard python types to json schema types
 
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 27d750a6c56e..984e5d940da2 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -137,7 +137,8 @@
         "supports_parallel_function_calling": true,
         "supports_vision": false,
         "supports_prompt_caching": true,
-        "supports_system_messages": true
+        "supports_system_messages": true,
+        "supports_response_schema": true
     },
     "o1-mini": {
         "max_tokens": 65536,
@@ -148,8 +149,6 @@
         "cache_read_input_token_cost": 0.0000015,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_function_calling": true,
-        "supports_parallel_function_calling": true,
         "supports_vision": false,
         "supports_prompt_caching": true
     },
@@ -162,8 +161,6 @@
         "cache_read_input_token_cost": 0.0000015,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_function_calling": true,
-        "supports_parallel_function_calling": true,
         "supports_vision": false,
         "supports_prompt_caching": true
     },
@@ -176,8 +173,6 @@
         "cache_read_input_token_cost": 0.0000075,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_function_calling": true,
-        "supports_parallel_function_calling": true,
         "supports_vision": false,
         "supports_prompt_caching": true
     },
@@ -190,8 +185,6 @@
         "cache_read_input_token_cost": 0.0000075,
         "litellm_provider": "openai",
         "mode": "chat",
-        "supports_function_calling": true,
-        "supports_parallel_function_calling": true,
         "supports_vision": false,
         "supports_prompt_caching": true
     },
@@ -208,7 +201,8 @@
         "supports_parallel_function_calling": true,
         "supports_vision": false,
         "supports_prompt_caching": true,
-        "supports_system_messages": true
+        "supports_system_messages": true,
+        "supports_response_schema": true
     },
     "chatgpt-4o-latest": {
         "max_tokens": 4096,
diff --git a/tests/llm_translation/test_openai_o1.py b/tests/llm_translation/test_openai_o1.py
index 9f46003461c2..1e2e9d3929b3 100644
--- a/tests/llm_translation/test_openai_o1.py
+++ b/tests/llm_translation/test_openai_o1.py
@@ -92,6 +92,35 @@ async def test_o1_handle_streaming_optional_params(model, expected_streaming_sup
     assert expected_streaming_support == ("stream" in supported_params)
 
 
+@pytest.mark.parametrize(
+    "model, expected_tool_calling_support",
+    [("o1-preview", False), ("o1-mini", False), ("o1", True)],
+)
+@pytest.mark.asyncio
+async def test_o1_handle_tool_calling_optional_params(
+    model, expected_tool_calling_support
+):
+    """
+    Tests that:
+    - max_tokens is translated to 'max_completion_tokens'
+    - role 'system' is translated to 'user'
+    """
+    from openai import AsyncOpenAI
+    from litellm.utils import ProviderConfigManager
+    from litellm.types.utils import LlmProviders
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    config = ProviderConfigManager.get_provider_chat_config(
+        model=model, provider=LlmProviders.OPENAI
+    )
+
+    supported_params = config.get_supported_openai_params(model=model)
+
+    assert expected_tool_calling_support == ("tools" in supported_params)
+
+
 # @pytest.mark.parametrize(
 #     "model",
 #     ["o1"],  # "o1-preview", "o1-mini",

From e331213ef20a1a373b450263feee12821430021e Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 15:17:24 -0800
Subject: [PATCH 10/17] fix: fix linting errors

---
 litellm/llms/databricks/chat/transformation.py      | 12 ++++++------
 litellm/llms/deepseek/chat/transformation.py        | 10 +++++-----
 litellm/llms/groq/chat/transformation.py            |  2 +-
 litellm/llms/mistral/mistral_chat_transformation.py |  4 ++--
 litellm/llms/oobabooga/chat/transformation.py       |  5 -----
 litellm/llms/openai_like/chat/handler.py            |  4 +++-
 6 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/litellm/llms/databricks/chat/transformation.py b/litellm/llms/databricks/chat/transformation.py
index 581eb1366c83..f154ed5c1c52 100644
--- a/litellm/llms/databricks/chat/transformation.py
+++ b/litellm/llms/databricks/chat/transformation.py
@@ -7,14 +7,14 @@
 
 from pydantic import BaseModel
 
-from litellm.types.llms.openai import AllMessageValues
-from litellm.types.utils import ProviderField
-
-from ...openai_like.chat.transformation import OpenAILikeChatConfig
 from litellm.litellm_core_utils.prompt_templates.common_utils import (
     handle_messages_with_content_list_to_str_conversion,
     strip_name_from_messages,
 )
+from litellm.types.llms.openai import AllMessageValues
+from litellm.types.utils import ProviderField
+
+from ...openai_like.chat.transformation import OpenAILikeChatConfig
 
 
 class DatabricksConfig(OpenAILikeChatConfig):
@@ -86,7 +86,7 @@ def _should_fake_stream(self, optional_params: dict) -> bool:
         return False
 
     def _transform_messages(
-        self, messages: List[AllMessageValues]
+        self, messages: List[AllMessageValues], model: str
     ) -> List[AllMessageValues]:
         """
         Databricks does not support:
@@ -102,4 +102,4 @@ def _transform_messages(
             new_messages.append(_message)
         new_messages = handle_messages_with_content_list_to_str_conversion(new_messages)
         new_messages = strip_name_from_messages(new_messages)
-        return super()._transform_messages(new_messages)
+        return super()._transform_messages(messages=new_messages, model=model)
diff --git a/litellm/llms/deepseek/chat/transformation.py b/litellm/llms/deepseek/chat/transformation.py
index 288b1b7c16a5..b2c72b00107f 100644
--- a/litellm/llms/deepseek/chat/transformation.py
+++ b/litellm/llms/deepseek/chat/transformation.py
@@ -8,26 +8,26 @@
 from pydantic import BaseModel
 
 import litellm
+from litellm.litellm_core_utils.prompt_templates.common_utils import (
+    handle_messages_with_content_list_to_str_conversion,
+)
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionAssistantMessage
 
 from ....utils import _remove_additional_properties, _remove_strict_from_schema
 from ...openai.chat.gpt_transformation import OpenAIGPTConfig
-from litellm.litellm_core_utils.prompt_templates.common_utils import (
-    handle_messages_with_content_list_to_str_conversion,
-)
 
 
 class DeepSeekChatConfig(OpenAIGPTConfig):
 
     def _transform_messages(
-        self, messages: List[AllMessageValues]
+        self, messages: List[AllMessageValues], model: str
     ) -> List[AllMessageValues]:
         """
         DeepSeek does not support content in list format.
         """
         messages = handle_messages_with_content_list_to_str_conversion(messages)
-        return super()._transform_messages(messages)
+        return super()._transform_messages(messages=messages, model=model)
 
     def _get_openai_compatible_provider_info(
         self, api_base: Optional[str], api_key: Optional[str]
diff --git a/litellm/llms/groq/chat/transformation.py b/litellm/llms/groq/chat/transformation.py
index 267d52761820..78e844f5058a 100644
--- a/litellm/llms/groq/chat/transformation.py
+++ b/litellm/llms/groq/chat/transformation.py
@@ -61,7 +61,7 @@ def __init__(
     def get_config(cls):
         return super().get_config()
 
-    def _transform_messages(self, messages: List[AllMessageValues]) -> List:
+    def _transform_messages(self, messages: List[AllMessageValues], model: str) -> List:
         for idx, message in enumerate(messages):
             """
             1. Don't pass 'null' function_call assistant message to groq - https://github.com/BerriAI/litellm/issues/5839
diff --git a/litellm/llms/mistral/mistral_chat_transformation.py b/litellm/llms/mistral/mistral_chat_transformation.py
index 2279e807fccd..97af6d4229d5 100644
--- a/litellm/llms/mistral/mistral_chat_transformation.py
+++ b/litellm/llms/mistral/mistral_chat_transformation.py
@@ -9,11 +9,11 @@
 import types
 from typing import List, Literal, Optional, Tuple, Union
 
-from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
 from litellm.litellm_core_utils.prompt_templates.common_utils import (
     handle_messages_with_content_list_to_str_conversion,
     strip_none_values_from_message,
 )
+from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import AllMessageValues
 
@@ -148,7 +148,7 @@ def _get_openai_compatible_provider_info(
         return api_base, dynamic_api_key
 
     def _transform_messages(
-        self, messages: List[AllMessageValues]
+        self, messages: List[AllMessageValues], model: str
     ) -> List[AllMessageValues]:
         """
         - handles scenario where content is list and not string
diff --git a/litellm/llms/oobabooga/chat/transformation.py b/litellm/llms/oobabooga/chat/transformation.py
index 79ccca840ce8..f3a25f1df2ba 100644
--- a/litellm/llms/oobabooga/chat/transformation.py
+++ b/litellm/llms/oobabooga/chat/transformation.py
@@ -23,11 +23,6 @@
 
 
 class OobaboogaConfig(OpenAIGPTConfig):
-    def _transform_messages(
-        self, messages: List[AllMessageValues]
-    ) -> List[AllMessageValues]:
-        return messages
-
     def get_error_class(
         self,
         error_message: str,
diff --git a/litellm/llms/openai_like/chat/handler.py b/litellm/llms/openai_like/chat/handler.py
index 2252dfc9ccc9..dee57b9a28b5 100644
--- a/litellm/llms/openai_like/chat/handler.py
+++ b/litellm/llms/openai_like/chat/handler.py
@@ -284,7 +284,9 @@ def completion(
             if isinstance(provider_config, OpenAIGPTConfig) or isinstance(
                 provider_config, OpenAIConfig
             ):
-                messages = provider_config._transform_messages(messages)
+                messages = provider_config._transform_messages(
+                    messages=messages, model=model
+                )
 
         data = {
             "model": model,

From fcf515b5001e34d3372ecd9f4f11ceab20f4a521 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 15:25:18 -0800
Subject: [PATCH 11/17] fix: update '_transform_messages'

---
 litellm/litellm_core_utils/prompt_templates/factory.py | 4 +++-
 litellm/llms/anthropic/completion/transformation.py    | 6 ------
 litellm/llms/azure_ai/chat/transformation.py           | 3 ++-
 litellm/llms/clarifai/chat/transformation.py           | 5 -----
 litellm/llms/cloudflare/chat/transformation.py         | 5 -----
 litellm/llms/cohere/chat/transformation.py             | 5 -----
 litellm/llms/cohere/completion/transformation.py       | 6 ------
 litellm/llms/databricks/chat/handler.py                | 7 +++++--
 litellm/llms/groq/chat/handler.py                      | 7 +++++--
 litellm/llms/huggingface/chat/transformation.py        | 6 ------
 litellm/llms/ollama/completion/transformation.py       | 5 -----
 litellm/llms/predibase/chat/transformation.py          | 5 -----
 litellm/llms/replicate/chat/transformation.py          | 5 -----
 litellm/llms/sagemaker/completion/transformation.py    | 6 ------
 litellm/llms/watsonx/completion/transformation.py      | 6 ------
 15 files changed, 15 insertions(+), 66 deletions(-)

diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py
index 0b5bd48bb8d4..71de4398a05b 100644
--- a/litellm/litellm_core_utils/prompt_templates/factory.py
+++ b/litellm/litellm_core_utils/prompt_templates/factory.py
@@ -3144,7 +3144,9 @@ def prompt_factory(
         else:
             return gemini_text_image_pt(messages=messages)
     elif custom_llm_provider == "mistral":
-        return litellm.MistralConfig()._transform_messages(messages=messages)
+        return litellm.MistralConfig()._transform_messages(
+            messages=messages, model=model
+        )
     elif custom_llm_provider == "bedrock":
         if "amazon.titan-text" in model:
             return amazon_titan_pt(messages=messages)
diff --git a/litellm/llms/anthropic/completion/transformation.py b/litellm/llms/anthropic/completion/transformation.py
index df8064ddf48d..57cdd95524a9 100644
--- a/litellm/llms/anthropic/completion/transformation.py
+++ b/litellm/llms/anthropic/completion/transformation.py
@@ -260,12 +260,6 @@ def _get_anthropic_text_prompt_from_messages(
 
         return str(prompt)
 
-    def _transform_messages(
-        self, messages: List[AllMessageValues]
-    ) -> List[AllMessageValues]:
-        "Not required"
-        raise NotImplementedError
-
     def get_model_response_iterator(
         self,
         streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
diff --git a/litellm/llms/azure_ai/chat/transformation.py b/litellm/llms/azure_ai/chat/transformation.py
index bce48e6fc01a..4c60c93f04fb 100644
--- a/litellm/llms/azure_ai/chat/transformation.py
+++ b/litellm/llms/azure_ai/chat/transformation.py
@@ -2,11 +2,11 @@
 
 import litellm
 from litellm._logging import verbose_logger
-from litellm.llms.openai.openai import OpenAIConfig
 from litellm.litellm_core_utils.prompt_templates.common_utils import (
     _audio_or_image_in_message_content,
     convert_content_list_to_str,
 )
+from litellm.llms.openai.openai import OpenAIConfig
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import ProviderField
@@ -33,6 +33,7 @@ def get_required_params(self) -> List[ProviderField]:
     def _transform_messages(
         self,
         messages: List[AllMessageValues],
+        model: str,
     ) -> List:
         """
         - Azure AI Studio doesn't support content as a list. This handles:
diff --git a/litellm/llms/clarifai/chat/transformation.py b/litellm/llms/clarifai/chat/transformation.py
index 5dc22c284ef5..c832ff89244f 100644
--- a/litellm/llms/clarifai/chat/transformation.py
+++ b/litellm/llms/clarifai/chat/transformation.py
@@ -131,11 +131,6 @@ def validate_environment(
             headers["Authorization"] = f"Bearer {api_key}"
         return headers
 
-    def _transform_messages(
-        self, messages: List[AllMessageValues]
-    ) -> List[AllMessageValues]:
-        raise NotImplementedError
-
     def get_error_class(
         self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
     ) -> BaseLLMException:
diff --git a/litellm/llms/cloudflare/chat/transformation.py b/litellm/llms/cloudflare/chat/transformation.py
index 596875919a3c..8f6d5ccc1eae 100644
--- a/litellm/llms/cloudflare/chat/transformation.py
+++ b/litellm/llms/cloudflare/chat/transformation.py
@@ -158,11 +158,6 @@ def get_error_class(
             message=error_message,
         )
 
-    def _transform_messages(
-        self, messages: List[AllMessageValues]
-    ) -> List[AllMessageValues]:
-        raise NotImplementedError
-
     def get_model_response_iterator(
         self,
         streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
diff --git a/litellm/llms/cohere/chat/transformation.py b/litellm/llms/cohere/chat/transformation.py
index 39df1e021f43..464ef1f2687c 100644
--- a/litellm/llms/cohere/chat/transformation.py
+++ b/litellm/llms/cohere/chat/transformation.py
@@ -365,8 +365,3 @@ def get_error_class(
         self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
     ) -> BaseLLMException:
         return CohereError(status_code=status_code, message=error_message)
-
-    def _transform_messages(
-        self, messages: List[AllMessageValues]
-    ) -> List[AllMessageValues]:
-        raise NotImplementedError
diff --git a/litellm/llms/cohere/completion/transformation.py b/litellm/llms/cohere/completion/transformation.py
index 61d5ca5ad39e..23ba87f11544 100644
--- a/litellm/llms/cohere/completion/transformation.py
+++ b/litellm/llms/cohere/completion/transformation.py
@@ -121,12 +121,6 @@ def validate_environment(
             api_key=api_key,
         )
 
-    def _transform_messages(
-        self,
-        messages: List[AllMessageValues],
-    ) -> List[AllMessageValues]:
-        raise NotImplementedError
-
     def get_error_class(
         self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
     ) -> BaseLLMException:
diff --git a/litellm/llms/databricks/chat/handler.py b/litellm/llms/databricks/chat/handler.py
index 078235a284c2..39fb79493bce 100644
--- a/litellm/llms/databricks/chat/handler.py
+++ b/litellm/llms/databricks/chat/handler.py
@@ -2,11 +2,12 @@
 Handles the chat completion request for Databricks
 """
 
-from typing import Any, Callable, Literal, Optional, Tuple, Union
+from typing import Any, Callable, List, Literal, Optional, Tuple, Union, cast
 
 from httpx._config import Timeout
 
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import CustomStreamingDecoder
 from litellm.utils import ModelResponse
 
@@ -44,7 +45,9 @@ def completion(
         streaming_decoder: Optional[CustomStreamingDecoder] = None,
         fake_stream: bool = False,
     ):
-        messages = DatabricksConfig()._transform_messages(messages)  # type: ignore
+        messages = DatabricksConfig()._transform_messages(
+            messages=cast(List[AllMessageValues], messages), model=model
+        )
         api_base, headers = self.databricks_validate_environment(
             api_base=api_base,
             api_key=api_key,
diff --git a/litellm/llms/groq/chat/handler.py b/litellm/llms/groq/chat/handler.py
index a6d6822a5e8a..a29a9009dd75 100644
--- a/litellm/llms/groq/chat/handler.py
+++ b/litellm/llms/groq/chat/handler.py
@@ -2,11 +2,12 @@
 Handles the chat completion request for groq
 """
 
-from typing import Any, Callable, Optional, Union
+from typing import Any, Callable, List, Optional, Union, cast
 
 from httpx._config import Timeout
 
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import CustomStreamingDecoder
 from litellm.utils import ModelResponse
 
@@ -42,7 +43,9 @@ def completion(
         streaming_decoder: Optional[CustomStreamingDecoder] = None,
         fake_stream: bool = False,
     ):
-        messages = GroqChatConfig()._transform_messages(messages)  # type: ignore
+        messages = GroqChatConfig()._transform_messages(
+            messages=cast(List[AllMessageValues], messages), model=model
+        )
 
         if optional_params.get("stream") is True:
             fake_stream = GroqChatConfig()._should_fake_stream(optional_params)
diff --git a/litellm/llms/huggingface/chat/transformation.py b/litellm/llms/huggingface/chat/transformation.py
index c1bdc9ca6738..2c35f2a20d78 100644
--- a/litellm/llms/huggingface/chat/transformation.py
+++ b/litellm/llms/huggingface/chat/transformation.py
@@ -369,12 +369,6 @@ def validate_environment(
         headers = {**headers, **default_headers}
         return headers
 
-    def _transform_messages(
-        self,
-        messages: List[AllMessageValues],
-    ) -> List[AllMessageValues]:
-        return messages
-
     def get_error_class(
         self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
     ) -> BaseLLMException:
diff --git a/litellm/llms/ollama/completion/transformation.py b/litellm/llms/ollama/completion/transformation.py
index d9cdff20d42b..46e67b4720e0 100644
--- a/litellm/llms/ollama/completion/transformation.py
+++ b/litellm/llms/ollama/completion/transformation.py
@@ -235,11 +235,6 @@ def get_model_info(self, model: str) -> ModelInfoBase:
             max_output_tokens=_max_tokens,
         )
 
-    def _transform_messages(
-        self, messages: List[AllMessageValues]
-    ) -> List[AllMessageValues]:
-        return messages
-
     def get_error_class(
         self, error_message: str, status_code: int, headers: Union[dict, Headers]
     ) -> BaseLLMException:
diff --git a/litellm/llms/predibase/chat/transformation.py b/litellm/llms/predibase/chat/transformation.py
index 016b9e700f7c..597f24794b2a 100644
--- a/litellm/llms/predibase/chat/transformation.py
+++ b/litellm/llms/predibase/chat/transformation.py
@@ -139,11 +139,6 @@ def transform_response(
             "Predibase transformation currently done in handler.py. Need to migrate to this file."
         )
 
-    def _transform_messages(
-        self, messages: List[AllMessageValues]
-    ) -> List[AllMessageValues]:
-        return messages
-
     def transform_request(
         self,
         model: str,
diff --git a/litellm/llms/replicate/chat/transformation.py b/litellm/llms/replicate/chat/transformation.py
index b4d8b008d57a..ea0fbd035f38 100644
--- a/litellm/llms/replicate/chat/transformation.py
+++ b/litellm/llms/replicate/chat/transformation.py
@@ -130,11 +130,6 @@ def model_to_version_id(self, model: str) -> str:
             return split_model[1]
         return model
 
-    def _transform_messages(
-        self, messages: List[AllMessageValues]
-    ) -> List[AllMessageValues]:
-        return messages
-
     def get_error_class(
         self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
     ) -> BaseLLMException:
diff --git a/litellm/llms/sagemaker/completion/transformation.py b/litellm/llms/sagemaker/completion/transformation.py
index 6e4d2ac9c544..e411bea519df 100644
--- a/litellm/llms/sagemaker/completion/transformation.py
+++ b/litellm/llms/sagemaker/completion/transformation.py
@@ -57,12 +57,6 @@ def __init__(
     def get_config(cls):
         return super().get_config()
 
-    def _transform_messages(
-        self,
-        messages: List[AllMessageValues],
-    ) -> List[AllMessageValues]:
-        return messages
-
     def get_error_class(
         self, error_message: str, status_code: int, headers: Union[dict, Headers]
     ) -> BaseLLMException:
diff --git a/litellm/llms/watsonx/completion/transformation.py b/litellm/llms/watsonx/completion/transformation.py
index 566b6ad2ce50..dd5657763308 100644
--- a/litellm/llms/watsonx/completion/transformation.py
+++ b/litellm/llms/watsonx/completion/transformation.py
@@ -240,12 +240,6 @@ def get_us_regions(self) -> List[str]:
             "us-south",
         ]
 
-    def _transform_messages(
-        self,
-        messages: List[AllMessageValues],
-    ) -> List[AllMessageValues]:
-        return messages
-
     def get_error_class(
         self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]
     ) -> BaseLLMException:

From ec9ae28e0374c99b861c9c729cf266b4a88acff8 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 16:02:47 -0800
Subject: [PATCH 12/17] fix(o1_transformation.py): fix provider passed for
 supported param checks

---
 litellm/llms/openai/chat/o1_transformation.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py
index 97899d67fe04..6a2d7bd06ccf 100644
--- a/litellm/llms/openai/chat/o1_transformation.py
+++ b/litellm/llms/openai/chat/o1_transformation.py
@@ -15,6 +15,8 @@
 from typing import Any, List, Optional, Union
 
 import litellm
+from litellm import verbose_logger
+from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
 from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
 from litellm.utils import (
     supports_function_calling,
@@ -50,8 +52,20 @@ def get_supported_openai_params(self, model: str) -> list:
         ]
 
         supported_streaming_models = ["o1-preview", "o1-mini"]
-        _supports_function_calling = supports_function_calling(model, "openai")
-        _supports_response_schema = supports_response_schema(model, "openai")
+        try:
+            model, custom_llm_provider, api_base, api_key = get_llm_provider(
+                model=model
+            )
+        except Exception:
+            verbose_logger.debug(
+                f"Unable to infer model provider for model={model}, defaulting to openai for o1 supported param check"
+            )
+            custom_llm_provider = "openai"
+
+        _supports_function_calling = supports_function_calling(
+            model, custom_llm_provider
+        )
+        _supports_response_schema = supports_response_schema(model, custom_llm_provider)
 
         if model not in supported_streaming_models:
             non_supported_params.append("stream")

From 8fdf421b430fbfd6c73e92a6856fcf2d3df1827b Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 16:53:39 -0800
Subject: [PATCH 13/17] test(base_llm_unit_tests.py): skip test if api takes
 >5s to respond

---
 tests/llm_translation/base_llm_unit_tests.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/llm_translation/base_llm_unit_tests.py b/tests/llm_translation/base_llm_unit_tests.py
index ab42bc5fe998..968655f43376 100644
--- a/tests/llm_translation/base_llm_unit_tests.py
+++ b/tests/llm_translation/base_llm_unit_tests.py
@@ -219,6 +219,7 @@ class TestModel(BaseModel):
                     },
                 ],
                 response_format=TestModel,
+                timeout=5,
             )
             assert res is not None
 
@@ -226,6 +227,8 @@ class TestModel(BaseModel):
 
             assert res.choices[0].message.content is not None
             assert res.choices[0].message.tool_calls is None
+        except litellm.Timeout:
+            pytest.skip("Model took too long to respond")
         except litellm.InternalServerError:
             pytest.skip("Model is overloaded")
 

From ef20c8badeffd3f76e7e2e9fecfc9994239db643 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 16:55:56 -0800
Subject: [PATCH 14/17] fix(utils.py): return false in 'supports_factory' if
 can't find value

---
 litellm/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/litellm/utils.py b/litellm/utils.py
index a0666267c959..360d31093695 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1736,9 +1736,10 @@ def _supports_factory(model: str, custom_llm_provider: Optional[str], key: str)
             return True
         return False
     except Exception as e:
-        raise Exception(
+        verbose_logger.debug(
             f"Model not found or error in checking {key} support. You passed model={model}, custom_llm_provider={custom_llm_provider}. Error: {str(e)}"
         )
+        return False
 
 
 def supports_audio_input(model: str, custom_llm_provider: Optional[str] = None) -> bool:

From a082a72668e06f8a6de0caf48ab3580ff721eaed Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 17:07:47 -0800
Subject: [PATCH 15/17] fix(o1_transformation.py): always return stream +
 stream_options as supported params + handle stream options being passed in
 for azure o1

---
 litellm/llms/azure/chat/o1_handler.py         | 2 ++
 litellm/llms/openai/chat/o1_transformation.py | 8 +-------
 tests/local_testing/test_streaming.py         | 8 ++++----
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/litellm/llms/azure/chat/o1_handler.py b/litellm/llms/azure/chat/o1_handler.py
index 45c35d6276b0..3660ffdc73f6 100644
--- a/litellm/llms/azure/chat/o1_handler.py
+++ b/litellm/llms/azure/chat/o1_handler.py
@@ -57,6 +57,7 @@ def completion(
         client=None,
     ):
         stream: Optional[bool] = optional_params.pop("stream", False)
+        stream_options: Optional[dict] = optional_params.pop("stream_options", None)
         response = super().completion(
             model,
             messages,
@@ -90,6 +91,7 @@ def completion(
                 model=model,
                 custom_llm_provider="openai",
                 logging_obj=logging_obj,
+                stream_options=stream_options,
             )
 
             return streaming_response
diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py
index 6a2d7bd06ccf..4f6197c3abf9 100644
--- a/litellm/llms/openai/chat/o1_transformation.py
+++ b/litellm/llms/openai/chat/o1_transformation.py
@@ -51,7 +51,6 @@ def get_supported_openai_params(self, model: str) -> list:
             "top_logprobs",
         ]
 
-        supported_streaming_models = ["o1-preview", "o1-mini"]
         try:
             model, custom_llm_provider, api_base, api_key = get_llm_provider(
                 model=model
@@ -67,10 +66,6 @@ def get_supported_openai_params(self, model: str) -> list:
         )
         _supports_response_schema = supports_response_schema(model, custom_llm_provider)
 
-        if model not in supported_streaming_models:
-            non_supported_params.append("stream")
-            non_supported_params.append("stream_options")
-
         if not _supports_function_calling:
             non_supported_params.append("tools")
             non_supported_params.append("tool_choice")
@@ -81,10 +76,9 @@ def get_supported_openai_params(self, model: str) -> list:
         if not _supports_response_schema:
             non_supported_params.append("response_format")
 
-        returned_params = [
+        return [
             param for param in all_openai_params if param not in non_supported_params
         ]
-        return returned_params
 
     def map_openai_params(
         self,
diff --git a/tests/local_testing/test_streaming.py b/tests/local_testing/test_streaming.py
index 285b8b298c04..cf342c79069a 100644
--- a/tests/local_testing/test_streaming.py
+++ b/tests/local_testing/test_streaming.py
@@ -2068,10 +2068,10 @@ def test_openai_chat_completion_complete_response_call():
 @pytest.mark.parametrize(
     "model",
     [
-        "gpt-3.5-turbo",
-        "azure/chatgpt-v-2",
-        "claude-3-haiku-20240307",
-        "o1-preview",
+        # "gpt-3.5-turbo",
+        # "azure/chatgpt-v-2",
+        # "claude-3-haiku-20240307",
+        # "o1-preview",
         "azure/fake-o1-mini",
     ],
 )

From a702326a05cb3d88834ee22f74d3980fbd270660 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 17:44:56 -0800
Subject: [PATCH 16/17] feat(openai.py): support stream faking natively in
 openai handler

Allows o1 calls to be faked for just the "o1" model, allows native streaming for o1-mini, o1-preview

 Fixes https://github.com/BerriAI/litellm/issues/7292
---
 litellm/llms/base_llm/chat/transformation.py  |  8 ++
 litellm/llms/openai/chat/o1_transformation.py |  9 +++
 litellm/llms/openai/openai.py                 | 79 +++++++++++++++++--
 tests/llm_translation/test_openai_o1.py       | 27 -------
 tests/local_testing/test_streaming.py         |  9 ++-
 5 files changed, 95 insertions(+), 37 deletions(-)

diff --git a/litellm/llms/base_llm/chat/transformation.py b/litellm/llms/base_llm/chat/transformation.py
index aa0f0838d37b..626712a80636 100644
--- a/litellm/llms/base_llm/chat/transformation.py
+++ b/litellm/llms/base_llm/chat/transformation.py
@@ -82,6 +82,14 @@ def get_config(cls):
             and v is not None
         }
 
+    def should_fake_stream(
+        self, model: str, custom_llm_provider: Optional[str] = None
+    ) -> bool:
+        """
+        Returns True if the model/provider should fake stream
+        """
+        return False
+
     @abstractmethod
     def get_supported_openai_params(self, model: str) -> list:
         pass
diff --git a/litellm/llms/openai/chat/o1_transformation.py b/litellm/llms/openai/chat/o1_transformation.py
index 4f6197c3abf9..ab7e2182bda1 100644
--- a/litellm/llms/openai/chat/o1_transformation.py
+++ b/litellm/llms/openai/chat/o1_transformation.py
@@ -36,6 +36,15 @@ class OpenAIO1Config(OpenAIGPTConfig):
     def get_config(cls):
         return super().get_config()
 
+    def should_fake_stream(
+        self, model: str, custom_llm_provider: Optional[str] = None
+    ) -> bool:
+        supported_stream_models = ["o1-mini", "o1-preview"]
+        for supported_model in supported_stream_models:
+            if supported_model in model:
+                return False
+        return True
+
     def get_supported_openai_params(self, model: str) -> list:
         """
         Get the supported OpenAI params for the given model
diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py
index 62b193e2dbd8..66ff5dfe1bda 100644
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@@ -33,6 +33,7 @@
     prompt_factory,
 )
 from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
+from litellm.llms.bedrock.chat.invoke_handler import MockResponseIterator
 from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENTS
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.utils import (
@@ -410,6 +411,24 @@ def make_sync_openai_chat_completion_request(
             else:
                 raise e
 
+    def mock_streaming(
+        self,
+        response: ModelResponse,
+        logging_obj: LiteLLMLoggingObj,
+        model: str,
+        stream_options: Optional[dict] = None,
+    ) -> CustomStreamWrapper:
+        completion_stream = MockResponseIterator(model_response=response)
+        streaming_response = CustomStreamWrapper(
+            completion_stream=completion_stream,
+            model=model,
+            custom_llm_provider="openai",
+            logging_obj=logging_obj,
+            stream_options=stream_options,
+        )
+
+        return streaming_response
+
     def completion(  # type: ignore # noqa: PLR0915
         self,
         model_response: ModelResponse,
@@ -433,8 +452,21 @@ def completion(  # type: ignore # noqa: PLR0915
     ):
         super().completion()
         try:
+            fake_stream: bool = False
+            if custom_llm_provider is not None and model is not None:
+                provider_config = ProviderConfigManager.get_provider_chat_config(
+                    model=model, provider=LlmProviders(custom_llm_provider)
+                )
+                fake_stream = provider_config.should_fake_stream(
+                    model=model, custom_llm_provider=custom_llm_provider
+                )
+            inference_params = optional_params.copy()
+            stream_options: Optional[dict] = inference_params.pop(
+                "stream_options", None
+            )
+            stream: Optional[bool] = inference_params.pop("stream", False)
             if headers:
-                optional_params["extra_headers"] = headers
+                inference_params["extra_headers"] = headers
             if model is None or messages is None:
                 raise OpenAIError(status_code=422, message="Missing model or messages")
 
@@ -466,7 +498,7 @@ def completion(  # type: ignore # noqa: PLR0915
                 data = OpenAIConfig().transform_request(
                     model=model,
                     messages=messages,
-                    optional_params=optional_params,
+                    optional_params=inference_params,
                     litellm_params=litellm_params,
                     headers=headers or {},
                 )
@@ -474,7 +506,7 @@ def completion(  # type: ignore # noqa: PLR0915
                 try:
                     max_retries = data.pop("max_retries", 2)
                     if acompletion is True:
-                        if optional_params.get("stream", False):
+                        if stream is True and fake_stream is False:
                             return self.async_streaming(
                                 logging_obj=logging_obj,
                                 headers=headers,
@@ -487,11 +519,13 @@ def completion(  # type: ignore # noqa: PLR0915
                                 max_retries=max_retries,
                                 organization=organization,
                                 drop_params=drop_params,
+                                stream_options=stream_options,
                             )
                         else:
                             return self.acompletion(
                                 data=data,
                                 headers=headers,
+                                model=model,
                                 logging_obj=logging_obj,
                                 model_response=model_response,
                                 api_base=api_base,
@@ -501,8 +535,9 @@ def completion(  # type: ignore # noqa: PLR0915
                                 max_retries=max_retries,
                                 organization=organization,
                                 drop_params=drop_params,
+                                fake_stream=fake_stream,
                             )
-                    elif optional_params.get("stream", False):
+                    elif stream is True and fake_stream is False:
                         return self.streaming(
                             logging_obj=logging_obj,
                             headers=headers,
@@ -514,6 +549,7 @@ def completion(  # type: ignore # noqa: PLR0915
                             client=client,
                             max_retries=max_retries,
                             organization=organization,
+                            stream_options=stream_options,
                         )
                     else:
                         if not isinstance(max_retries, int):
@@ -559,11 +595,21 @@ def completion(  # type: ignore # noqa: PLR0915
                             original_response=stringified_response,
                             additional_args={"complete_input_dict": data},
                         )
-                        return convert_to_model_response_object(
+                        final_response_obj = convert_to_model_response_object(
                             response_object=stringified_response,
                             model_response_object=model_response,
                             _response_headers=headers,
                         )
+
+                        if fake_stream is True:
+                            return self.mock_streaming(
+                                response=cast(ModelResponse, final_response_obj),
+                                logging_obj=logging_obj,
+                                model=model,
+                                stream_options=stream_options,
+                            )
+
+                        return final_response_obj
                 except openai.UnprocessableEntityError as e:
                     ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
                     if litellm.drop_params is True or drop_params is True:
@@ -625,6 +671,7 @@ def completion(  # type: ignore # noqa: PLR0915
     async def acompletion(
         self,
         data: dict,
+        model: str,
         model_response: ModelResponse,
         logging_obj: LiteLLMLoggingObj,
         timeout: Union[float, httpx.Timeout],
@@ -635,6 +682,8 @@ async def acompletion(
         max_retries=None,
         headers=None,
         drop_params: Optional[bool] = None,
+        stream_options: Optional[dict] = None,
+        fake_stream: bool = False,
     ):
         response = None
         for _ in range(
@@ -676,12 +725,22 @@ async def acompletion(
                     additional_args={"complete_input_dict": data},
                 )
                 logging_obj.model_call_details["response_headers"] = headers
-                return convert_to_model_response_object(
+                final_response_obj = convert_to_model_response_object(
                     response_object=stringified_response,
                     model_response_object=model_response,
                     hidden_params={"headers": headers},
                     _response_headers=headers,
                 )
+
+                if fake_stream is True:
+                    return self.mock_streaming(
+                        response=cast(ModelResponse, final_response_obj),
+                        logging_obj=logging_obj,
+                        model=model,
+                        stream_options=stream_options,
+                    )
+
+                return final_response_obj
             except openai.UnprocessableEntityError as e:
                 ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
                 if litellm.drop_params is True or drop_params is True:
@@ -712,7 +771,11 @@ def streaming(
         client=None,
         max_retries=None,
         headers=None,
+        stream_options: Optional[dict] = None,
     ):
+        data["stream"] = True
+        if stream_options is not None:
+            data["stream_options"] = stream_options
         openai_client: OpenAI = self._get_openai_client(  # type: ignore
             is_async=False,
             api_key=api_key,
@@ -763,8 +826,12 @@ async def async_streaming(
         max_retries=None,
         headers=None,
         drop_params: Optional[bool] = None,
+        stream_options: Optional[dict] = None,
     ):
         response = None
+        data["stream"] = True
+        if stream_options is not None:
+            data["stream_options"] = stream_options
         for _ in range(2):
             try:
                 openai_aclient: AsyncOpenAI = self._get_openai_client(  # type: ignore
diff --git a/tests/llm_translation/test_openai_o1.py b/tests/llm_translation/test_openai_o1.py
index 1e2e9d3929b3..48f8cfdd387f 100644
--- a/tests/llm_translation/test_openai_o1.py
+++ b/tests/llm_translation/test_openai_o1.py
@@ -65,33 +65,6 @@ async def test_o1_handle_system_role(model):
             ]
 
 
-@pytest.mark.parametrize(
-    "model, expected_streaming_support",
-    [("o1-preview", True), ("o1-mini", True), ("o1", False)],
-)
-@pytest.mark.asyncio
-async def test_o1_handle_streaming_optional_params(model, expected_streaming_support):
-    """
-    Tests that:
-    - max_tokens is translated to 'max_completion_tokens'
-    - role 'system' is translated to 'user'
-    """
-    from openai import AsyncOpenAI
-    from litellm.utils import ProviderConfigManager
-    from litellm.types.utils import LlmProviders
-
-    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
-    litellm.model_cost = litellm.get_model_cost_map(url="")
-
-    config = ProviderConfigManager.get_provider_chat_config(
-        model=model, provider=LlmProviders.OPENAI
-    )
-
-    supported_params = config.get_supported_openai_params(model=model)
-
-    assert expected_streaming_support == ("stream" in supported_params)
-
-
 @pytest.mark.parametrize(
     "model, expected_tool_calling_support",
     [("o1-preview", False), ("o1-mini", False), ("o1", True)],
diff --git a/tests/local_testing/test_streaming.py b/tests/local_testing/test_streaming.py
index cf342c79069a..67a0400283f3 100644
--- a/tests/local_testing/test_streaming.py
+++ b/tests/local_testing/test_streaming.py
@@ -2068,10 +2068,11 @@ def test_openai_chat_completion_complete_response_call():
 @pytest.mark.parametrize(
     "model",
     [
-        # "gpt-3.5-turbo",
-        # "azure/chatgpt-v-2",
-        # "claude-3-haiku-20240307",
-        # "o1-preview",
+        "gpt-3.5-turbo",
+        "azure/chatgpt-v-2",
+        "claude-3-haiku-20240307",
+        "o1-preview",
+        "o1",
         "azure/fake-o1-mini",
     ],
 )

From 10530e0afc8f95252a7b38c11b2d99d0a3c4f672 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 18 Dec 2024 18:36:33 -0800
Subject: [PATCH 17/17] fix(openai.py): use inference param instead of original
 optional param

---
 litellm/llms/openai/openai.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/litellm/llms/openai/openai.py b/litellm/llms/openai/openai.py
index 66ff5dfe1bda..7cecb9d0491d 100644
--- a/litellm/llms/openai/openai.py
+++ b/litellm/llms/openai/openai.py
@@ -595,12 +595,12 @@ def completion(  # type: ignore # noqa: PLR0915
                             original_response=stringified_response,
                             additional_args={"complete_input_dict": data},
                         )
+
                         final_response_obj = convert_to_model_response_object(
                             response_object=stringified_response,
                             model_response_object=model_response,
                             _response_headers=headers,
                         )
-
                         if fake_stream is True:
                             return self.mock_streaming(
                                 response=cast(ModelResponse, final_response_obj),
@@ -613,8 +613,8 @@ def completion(  # type: ignore # noqa: PLR0915
                 except openai.UnprocessableEntityError as e:
                     ## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
                     if litellm.drop_params is True or drop_params is True:
-                        optional_params = drop_params_from_unprocessable_entity_error(
-                            e, optional_params
+                        inference_params = drop_params_from_unprocessable_entity_error(
+                            e, inference_params
                         )
                     else:
                         raise e
@@ -718,6 +718,7 @@ async def acompletion(
                     openai_aclient=openai_aclient, data=data, timeout=timeout
                 )
                 stringified_response = response.model_dump()
+
                 logging_obj.post_call(
                     input=data["messages"],
                     api_key=api_key,