From 81f50abe2d0487318536a8e1195ef83cbd32e39d Mon Sep 17 00:00:00 2001 From: Rishabh Srivastava Date: Tue, 28 Jan 2025 01:30:22 +0800 Subject: [PATCH] add reasoning effort param to chat_async and chat_openai_async --- defog_utils/utils_llm.py | 10 ++++++++++ defog_utils/utils_multi_llm.py | 4 +++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/defog_utils/utils_llm.py b/defog_utils/utils_llm.py index e425def..1f96eb0 100644 --- a/defog_utils/utils_llm.py +++ b/defog_utils/utils_llm.py @@ -124,6 +124,8 @@ async def chat_anthropic_async( store=True, metadata=None, timeout=100, + prediction=None, + reasoning_effort=None, ) -> LLMResponse: """ Returns the response from the Anthropic API, the time taken to generate the response, the number of input tokens used, and the number of output tokens used. @@ -248,6 +250,7 @@ async def chat_openai_async( base_url: str = "https://api.openai.com/v1/", api_key: str = os.environ.get("OPENAI_API_KEY", ""), prediction: Dict[str,str] = None, + reasoning_effort=None, ) -> LLMResponse: """ Returns the response from the OpenAI API, the time taken to generate the response, the number of input tokens used, and the number of output tokens used. @@ -293,6 +296,9 @@ async def chat_openai_async( if model in ["o1-mini", "o1-preview", "deepseek-chat", "deepseek-reasoner"]: del request_params["response_format"] + if model == "o1" and reasoning_effort is not None: + request_params["reasoning_effort"] = reasoning_effort + if "response_format" in request_params and request_params["response_format"]: del request_params["stop"] # cannot have stop when using response_format, as that often leads to invalid JSON response = await client_openai.beta.chat.completions.parse(**request_params) @@ -367,6 +373,8 @@ async def chat_together_async( store=True, metadata=None, timeout=100, + prediction=None, + reasoning_effort=None, ) -> LLMResponse: """ Returns the response from the Together API, the time taken to generate the response, the number of input tokens used, and the number of output tokens used. @@ -408,6 +416,8 @@ def chat_gemini( seed: int = 0, store=True, metadata=None, + prediction=None, + reasoning_effort=None, ) -> LLMResponse: from google import genai from google.genai import types diff --git a/defog_utils/utils_multi_llm.py b/defog_utils/utils_multi_llm.py index 40e737a..8079525 100644 --- a/defog_utils/utils_multi_llm.py +++ b/defog_utils/utils_multi_llm.py @@ -70,7 +70,8 @@ async def chat_async( metadata=None, timeout=100, # in seconds backup_model=None, - prediction=None + prediction=None, + reasoning_effort=None, ) -> LLMResponse: """ Returns the response from the LLM API for a single model that is passed in. @@ -115,6 +116,7 @@ async def chat_async( store=store, metadata=metadata, timeout=timeout, + reasoning_effort=reasoning_effort ) else: if not os.getenv("DEEPSEEK_API_KEY"):