From 81f50abe2d0487318536a8e1195ef83cbd32e39d Mon Sep 17 00:00:00 2001
From: Rishabh Srivastava <rishabhsriv@gmail.com>
Date: Tue, 28 Jan 2025 01:30:22 +0800
Subject: [PATCH] add reasoning effort param to chat_async and
 chat_openai_async

---
 defog_utils/utils_llm.py       | 10 ++++++++++
 defog_utils/utils_multi_llm.py |  4 +++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/defog_utils/utils_llm.py b/defog_utils/utils_llm.py
index e425def..1f96eb0 100644
--- a/defog_utils/utils_llm.py
+++ b/defog_utils/utils_llm.py
@@ -124,6 +124,8 @@ async def chat_anthropic_async(
     store=True,
     metadata=None,
     timeout=100,
+    prediction=None,
+    reasoning_effort=None,
 ) -> LLMResponse:
     """
     Returns the response from the Anthropic API, the time taken to generate the response, the number of input tokens used, and the number of output tokens used.
@@ -248,6 +250,7 @@ async def chat_openai_async(
     base_url: str = "https://api.openai.com/v1/",
     api_key: str = os.environ.get("OPENAI_API_KEY", ""),
     prediction: Dict[str,str] = None,
+    reasoning_effort=None,
 ) -> LLMResponse:
     """
     Returns the response from the OpenAI API, the time taken to generate the response, the number of input tokens used, and the number of output tokens used.
@@ -293,6 +296,9 @@ async def chat_openai_async(
     if model in ["o1-mini", "o1-preview", "deepseek-chat", "deepseek-reasoner"]:
         del request_params["response_format"]
     
+    if model == "o1" and reasoning_effort is not None:
+        request_params["reasoning_effort"] = reasoning_effort
+    
     if "response_format" in request_params and request_params["response_format"]:
         del request_params["stop"] # cannot have stop when using response_format, as that often leads to invalid JSON
         response = await client_openai.beta.chat.completions.parse(**request_params)
@@ -367,6 +373,8 @@ async def chat_together_async(
     store=True,
     metadata=None,
     timeout=100,
+    prediction=None,
+    reasoning_effort=None,
 ) -> LLMResponse:
     """
     Returns the response from the Together API, the time taken to generate the response, the number of input tokens used, and the number of output tokens used.
@@ -408,6 +416,8 @@ def chat_gemini(
     seed: int = 0,
     store=True,
     metadata=None,
+    prediction=None,
+    reasoning_effort=None,
 ) -> LLMResponse:
     from google import genai
     from google.genai import types
diff --git a/defog_utils/utils_multi_llm.py b/defog_utils/utils_multi_llm.py
index 40e737a..8079525 100644
--- a/defog_utils/utils_multi_llm.py
+++ b/defog_utils/utils_multi_llm.py
@@ -70,7 +70,8 @@ async def chat_async(
     metadata=None,
     timeout=100, # in seconds
     backup_model=None,
-    prediction=None
+    prediction=None,
+    reasoning_effort=None,
 ) -> LLMResponse:
     """
     Returns the response from the LLM API for a single model that is passed in.
@@ -115,6 +116,7 @@ async def chat_async(
                         store=store,
                         metadata=metadata,
                         timeout=timeout,
+                        reasoning_effort=reasoning_effort
                     )
             else:
                 if not os.getenv("DEEPSEEK_API_KEY"):