From abf059e4b539d5b1cd6bd2f1f214085765b0c01f Mon Sep 17 00:00:00 2001
From: Mark Ericksen <brainlid@gmail.com>
Date: Thu, 30 Jan 2025 08:41:54 -0700
Subject: [PATCH] support LMStudio when using ChatOpenAI (#243)

---
 README.md                       | 5 +++--
 lib/chat_models/chat_open_ai.ex | 8 ++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 3f700511..2ba1bdc7 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@ Currently supported AI services:
 - Ollama
 - Mistral
 - Bumblebee self-hosted models - including Llama, Mistral and Zephyr
+- [LMStudio](https://lmstudio.ai/docs/api/endpoints/openai) via their OpenAI compatibility API
 
 **LangChain** is short for Language Chain. An LLM, or Large Language Model, is the "Language" part. This library makes it easier for Elixir applications to "chain" or connect different processes, integrations, libraries, services, or functionality together with an LLM.
 
@@ -124,11 +125,11 @@ A list of models to use:
 
 ## Prompt caching
 
-ChatGPT and Claude both offer prefix-based prompt caching, which can offer cost and performance benefits for longer prompts. Gemini offers context caching, which is similar. 
+ChatGPT and Claude both offer prefix-based prompt caching, which can offer cost and performance benefits for longer prompts. Gemini offers context caching, which is similar.
 
 - [ChatGPT's prompt caching](https://openai.com/index/api-prompt-caching/) is automatic for prompts longer than 1024 tokens, caching the longest common prefix.
 - [Claude's prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) is not automatic. It's prefixing processes tools, system, and then messages, in that order, up to and including the block designated with {"cache_control": {"type": "ephemeral"}} . See LangChain.ChatModels.ChatAnthropicTest and for an example.
-- [Gemini's context caching]((https://ai.google.dev/gemini-api/docs/caching?lang=python)) requires a seperate call which is not supported by Langchain. 
+- [Gemini's context caching]((https://ai.google.dev/gemini-api/docs/caching?lang=python)) requires a seperate call which is not supported by Langchain.
 
 ## Usage
 
diff --git a/lib/chat_models/chat_open_ai.ex b/lib/chat_models/chat_open_ai.ex
index 6f4cc113..89c04d77 100644
--- a/lib/chat_models/chat_open_ai.ex
+++ b/lib/chat_models/chat_open_ai.ex
@@ -312,9 +312,9 @@ defmodule LangChain.ChatModels.ChatOpenAI do
           end
         end)
         |> Enum.reverse(),
-      response_format: set_response_format(openai),
       user: openai.user
     }
+    |> Utils.conditionally_add_to_map(:response_format, set_response_format(openai))
     |> Utils.conditionally_add_to_map(
       :reasoning_effort,
       if(openai.reasoning_mode, do: openai.reasoning_effort, else: nil)
@@ -357,7 +357,11 @@ defmodule LangChain.ChatModels.ChatOpenAI do
   end
 
   defp set_response_format(%ChatOpenAI{json_response: false}) do
-    %{"type" => "text"}
+    # NOTE: The default handling when unspecified is `%{"type" => "text"}`
+    #
+    # For improved compatibility with other APIs like LMStudio, this returns a
+    # `nil` which has the same effect.
+    nil
   end
 
   defp get_tool_choice(%ChatOpenAI{