From 8c7e9b7468323283b694e83d9dc32bd56dbc32a1 Mon Sep 17 00:00:00 2001
From: Mark Ericksen <brainlid@gmail.com>
Date: Wed, 8 Jan 2025 20:22:16 -0700
Subject: [PATCH 01/11] Breaking change: consolidate LLM callback functions
 (#228)

* breaking change to consolidate LLM callback functions

* update to CI builds
---
 .github/workflows/elixir.yml                  |  13 +-
 CHANGELOG.md                                  | 140 ++++++++++++++++++
 lib/callbacks.ex                              |   8 +-
 lib/chains/chain_callbacks.ex                 | 103 +++++++++++--
 lib/chains/llm_chain.ex                       |  15 +-
 lib/chains/summarize_conversation_chain.ex    |  13 +-
 lib/chat_models/chat_anthropic.ex             |  28 ++--
 lib/chat_models/chat_bumblebee.ex             |  12 +-
 lib/chat_models/chat_google_ai.ex             |   7 +-
 lib/chat_models/chat_mistral_ai.ex            |   7 +-
 lib/chat_models/chat_model.ex                 |  12 --
 lib/chat_models/chat_ollama_ai.ex             |   5 +-
 lib/chat_models/chat_open_ai.ex               |  10 +-
 lib/chat_models/chat_vertex_ai.ex             |   7 +-
 lib/chat_models/llm_callbacks.ex              | 105 -------------
 lib/utils.ex                                  |  36 ++++-
 test/chains/llm_chain_test.exs                |  20 ++-
 .../summarize_conversation_chain_test.exs     |  29 +++-
 test/chat_models/chat_anthropic_test.exs      |  70 +++++----
 test/chat_models/chat_bumblebee_test.exs      |  21 ++-
 test/chat_models/chat_google_ai_test.exs      |  37 +++--
 test/chat_models/chat_model_test.exs          |  17 ---
 test/chat_models/chat_open_ai_test.exs        |  63 +++++---
 test/chat_models/chat_vertex_ai_test.exs      |   5 +-
 test/function_param_test.exs                  |   6 +-
 test/support/bedrock_helpers.ex               |   2 +-
 test/test_helper.exs                          |   6 +
 test/tools/calculator_test.exs                |  19 +--
 test/utils_test.exs                           |  64 ++++++++
 29 files changed, 567 insertions(+), 313 deletions(-)
 delete mode 100644 lib/chat_models/llm_callbacks.ex

diff --git a/.github/workflows/elixir.yml b/.github/workflows/elixir.yml
index 5237d5d6..3c6a2c3c 100644
--- a/.github/workflows/elixir.yml
+++ b/.github/workflows/elixir.yml
@@ -2,6 +2,8 @@
 # They are provided by a third-party and are governed by
 # separate terms of service, privacy policy, and support
 # documentation.
+#
+# https://github.com/erlef/setup-beam/tree/v1.18.2
 
 name: Elixir CI
 
@@ -17,6 +19,7 @@ env:
   GOOGLE_API_KEY: invalid
   AWS_ACCESS_KEY_ID: invalid
   AWS_SECRET_ACCESS_KEY: invalid
+  AWS_REGION: invalid
 
 permissions:
   contents: read
@@ -28,12 +31,12 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Elixir
-      uses: erlef/setup-beam@61e01a43a562a89bfc54c7f9a378ff67b03e4a21 # v1.16.0
+    - uses: actions/checkout@v4
+      name: Set up Elixir
+    - uses: erlef/setup-beam@v1
       with:
-        elixir-version: '1.15.2' # [Required] Define the Elixir version
-        otp-version: '26.0'      # [Required] Define the Erlang/OTP version
+        elixir-version: 'v1.18.1-otp-27' # [Required] Define the Elixir version
+        otp-version: 'OTP-27.0'      # [Required] Define the Erlang/OTP version
     - name: Restore dependencies cache
       uses: actions/cache@v3
       with:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc136f05..01e54bfd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,145 @@
 # Changelog
 
+## v0.3.0-rc.2 (2025-01-08)
+
+### Breaking Changes
+
+How LLM callbacks are registered has changed. The callback function's arguments have also changed.
+
+Specifically, this refers to the callbacks:
+
+- `on_llm_new_delta`
+- `on_llm_new_message`
+- `on_llm_ratelimit_info`
+- `on_llm_token_usage`
+
+The callbacks are still supported, but _how_ they are registered and the arguments passed to the linked functions has changed.
+
+Previously, an LLM callback's first argument was the chat model, it is now the LLMChain that is running it.
+
+A ChatModel still has the `callbacks` struct attribute, but it should be considered private.
+
+#### Why the change
+Having some callback functions registered on the chat model and some registered on the chain was confusing. What goes where? Why the difference?
+
+This change moves them all to the same place, removing a source of confusion.
+
+The primary reason for the change is that important information about the **context** of the callback event was not available to the callback function. Information stored in the chain's `custom_context` can be valuable and important, like a user's account ID, but it was not easily accessible in a callback like `on_llm_token_usage` where we might want to record the user's token usage linked to their account.
+
+This important change passes the entire `LLMChain` through to the callback function, giving the function access to the `custom_context`. This makes the LLM (aka chat model) callback functions expect the same arguments as the other chain focused callback functions.
+
+This both unifies how the callbacks operate and what data they have available, and it groups them all together.
+
+#### Adapting to the change
+A before example:
+
+```elixir
+llm_events = %{
+  # 1st argument was the chat model
+  on_llm_new_delta: fn _chat_model, %MessageDelta{} = delta ->
+    # ...
+  end,
+  on_llm_token_usage: fn _chat_model, usage_data ->
+    # ...
+  end
+}
+
+chain_events = %{
+  on_message_processed: fn _chain, tool_msg ->
+    # ...
+  end
+}
+
+# LLM callback events were registered on the chat model
+chat_model = ChatOpenAI.new!(%{stream: true, callbacks: [llm_events]})
+
+{:ok, updated_chain} =
+  %{
+    llm: chat_model,
+    custom_context: %{user_id: 123}
+  }
+  |> LLMChain.new!()
+  |> LLMChain.add_message(Message.new_system!())
+  |> LLMChain.add_message(Message.new_user!("Say hello!"))
+  # Chain callback events were registered on the chain
+  |> LLMChain.add_callback(chain_events)
+  |> LLMChain.run()
+```
+
+This is updated to: (comments highlight changes)
+
+```elixir
+# Events are all combined together
+events = %{
+  # 1st argument is now the LLMChain
+  on_llm_new_delta: fn _chain, %MessageDelta{} = delta ->
+    # ...
+  end,
+  on_llm_token_usage: fn %LLMChain{} = chain, usage_data ->
+    # ... `chain.custom_context` is available
+  end,
+  on_message_processed: fn _chain, tool_msg ->
+    # ...
+  end
+}
+
+# callbacks removed from Chat Model setup
+chat_model = ChatOpenAI.new!(%{stream: true})
+
+{:ok, updated_chain} =
+  %{
+    llm: chat_model,
+    custom_context: %{user_id: 123}
+  }
+  |> LLMChain.new!()
+  |> LLMChain.add_message(Message.new_system!())
+  |> LLMChain.add_message(Message.new_user!("Say hello!"))
+  # All events are registered through `add_callback`
+  |> LLMChain.add_callback(events)
+  |> LLMChain.run()
+```
+
+If you still need access to the LLM in the callback functions, it's available in `chain.llm`.
+
+The change is a breaking change, but should be fairly easy to update.
+
+This consolidates how callback events work and them more powerful by exposing important information to the callback functions.
+
+If you were using the `LLMChain.add_llm_callback/2`, the change is even easier:
+
+From:
+```elixir
+  %{
+    llm: chat_model,
+    custom_context: %{user_id: 123}
+  }
+  |> LLMChain.new!()
+  # ...
+  # LLM callback events could be added later this way
+  |> LLMChain.add_llm_callback(llm_events)
+  |> LLMChain.run()
+```
+
+To:
+```elixir
+  %{
+    llm: chat_model,
+    custom_context: %{user_id: 123}
+  }
+  |> LLMChain.new!()
+  # ...
+  # Use the `add_callback` function instead
+  |> LLMChain.add_callback(llm_events)
+  |> LLMChain.run()
+```
+
+#### Details of the change
+- Removal of the `LangChain.ChatModels.LLMCallbacks` module.
+- The LLM-specific callbacks were migrated to `LangChain.Chains.ChainCallbacks`.
+- Removal of `LangChain.Chains.LLMChain.add_llm_callback/2`
+- `LangChain.ChatModels.ChatOpenAI.new/1` and `LangChain.ChatModels.ChatOpenAI.new!/1` no longer accept `:callbacks` on the chat model.
+- Removal of `LangChain.ChatModels.ChatModel.add_callback/2`
+
 ## v0.3.0-rc.1 (2024-12-15)
 
 ### Breaking Changes
diff --git a/lib/callbacks.ex b/lib/callbacks.ex
index e9e10280..191310c5 100644
--- a/lib/callbacks.ex
+++ b/lib/callbacks.ex
@@ -13,15 +13,15 @@ defmodule LangChain.Callbacks do
   @spec fire([map()], atom(), [any()]) :: :ok | no_return()
   def fire(callbacks, callback_name, arguments)
 
-  def fire(callbacks, :on_llm_new_message, [model, messages]) when is_list(messages) do
+  def fire(callbacks, :on_llm_new_message, [messages]) when is_list(messages) do
     Enum.each(messages, fn m ->
-      fire(callbacks, :on_llm_new_message, [model, m])
+      fire(callbacks, :on_llm_new_message, [m])
     end)
   end
 
-  def fire(callbacks, :on_llm_new_delta, [model, deltas]) when is_list(deltas) do
+  def fire(callbacks, :on_llm_new_delta, [deltas]) when is_list(deltas) do
     Enum.each(deltas, fn d ->
-      fire(callbacks, :on_llm_new_delta, [model, d])
+      fire(callbacks, :on_llm_new_delta, [d])
     end)
   end
 
diff --git a/lib/chains/chain_callbacks.ex b/lib/chains/chain_callbacks.ex
index 37015c75..53c291f0 100644
--- a/lib/chains/chain_callbacks.ex
+++ b/lib/chains/chain_callbacks.ex
@@ -1,6 +1,6 @@
 defmodule LangChain.Chains.ChainCallbacks do
   @moduledoc """
-  Defines the callbacks fired by an LLMChain.
+  Defines the callbacks fired by an LLMChain and LLM module.
 
   A callback handler is a map that defines the specific callback event with a
   function to execute for that event.
@@ -13,15 +13,93 @@ defmodule LangChain.Chains.ChainCallbacks do
       live_view_pid = self()
 
       my_handlers = %{
-        handle_chain_error_message_created: fn new_message -> send(live_view_pid, {:received_message, new_message})
+        on_llm_new_message: fn _context, new_message -> send(live_view_pid, {:received_message, new_message}),
+
+        handle_chain_error_message_created: fn _context, new_message -> send(live_view_pid, {:received_message, new_message})
       }
 
-      model = SomeLLM.new!(%{callbacks: [my_handlers]})
-      chain = LLMChain.new!(%{llm: model})
+      model = SomeLLM.new!(%{...})
+
+      chain =
+        %{llm: model}
+        |> LLMChain.new!()
+        |> LLMChain.add_callback(my_handlers)
 
   """
+
   alias LangChain.Chains.LLMChain
   alias LangChain.Message
+  alias LangChain.MessageDelta
+  alias LangChain.TokenUsage
+
+  @typedoc """
+  Executed when an LLM is streaming a response and a new MessageDelta (or token)
+  was received.
+
+  - `:index` is optionally present if the LLM supports sending `n` versions of a
+    response.
+
+  The return value is discarded.
+
+  ## Example
+
+  A function declaration that matches the signature.
+
+      def handle_llm_new_delta(chain, delta) do
+        IO.write(delta)
+      end
+  """
+  @type llm_new_delta :: (LLMChain.t(), MessageDelta.t() -> any())
+
+  @typedoc """
+  Executed when an LLM is not streaming and a full message was received.
+
+  The return value is discarded.
+
+  ## Example
+
+  A function declaration that matches the signature.
+
+      def handle_llm_new_message(chain, message) do
+        IO.inspect(message)
+      end
+  """
+  @type llm_new_message :: (LLMChain.t(), Message.t() -> any())
+
+  @typedoc """
+  Executed when an LLM (typically a service) responds with rate limiting
+  information.
+
+  The specific rate limit information depends on the LLM. It returns a map with
+  all the available information included.
+
+  The return value is discarded.
+
+  ## Example
+
+  A function declaration that matches the signature.
+
+      def handle_llm_ratelimit_info(chain, %{} = info) do
+        IO.inspect(info)
+      end
+  """
+  @type llm_ratelimit_info :: (LLMChain.t(), info :: %{String.t() => any()} -> any())
+
+  @typedoc """
+  Executed when an LLM response reports the token usage in a
+  `LangChain.TokenUsage` struct. The data returned depends on the LLM.
+
+  The return value is discarded.
+
+  ## Example
+
+  A function declaration that matches the signature.
+
+      def handle_llm_token_usage(chain, %TokenUsage{} = usage) do
+        IO.inspect(usage)
+      end
+  """
+  @type llm_token_usage :: (LLMChain.t(), TokenUsage.t() -> any())
 
   @typedoc """
   Executed when an LLMChain has completed processing a received assistant
@@ -108,10 +186,17 @@ defmodule LangChain.Chains.ChainCallbacks do
   The supported set of callbacks for an LLM module.
   """
   @type chain_callback_handler :: %{
-          on_message_processed: chain_message_processed(),
-          on_message_processing_error: chain_message_processing_error(),
-          on_error_message_created: chain_error_message_created(),
-          on_tool_response_created: chain_tool_response_created(),
-          on_retries_exceeded: chain_retries_exceeded()
+          # model-level callbacks
+          optional(:on_llm_new_delta) => llm_new_delta(),
+          optional(:on_llm_new_message) => llm_new_message(),
+          optional(:on_llm_ratelimit_info) => llm_ratelimit_info(),
+          optional(:on_llm_token_usage) => llm_token_usage(),
+
+          # Chain-level callbacks
+          optional(:on_message_processed) => chain_message_processed(),
+          optional(:on_message_processing_error) => chain_message_processing_error(),
+          optional(:on_error_message_created) => chain_error_message_created(),
+          optional(:on_tool_response_created) => chain_tool_response_created(),
+          optional(:on_retries_exceeded) => chain_retries_exceeded()
         }
 end
diff --git a/lib/chains/llm_chain.ex b/lib/chains/llm_chain.ex
index bc12060d..f4b2a5ce 100644
--- a/lib/chains/llm_chain.ex
+++ b/lib/chains/llm_chain.ex
@@ -122,7 +122,6 @@ defmodule LangChain.Chains.LLMChain do
   use Ecto.Schema
   import Ecto.Changeset
   require Logger
-  alias LangChain.ChatModels.ChatModel
   alias LangChain.Callbacks
   alias LangChain.Chains.ChainCallbacks
   alias LangChain.PromptTemplate
@@ -523,8 +522,11 @@ defmodule LangChain.Chains.LLMChain do
     # then execute the `.call` function on that module.
     %module{} = chain.llm
 
+    # wrap and link the model's callbacks.
+    use_llm = Utils.rewrap_callbacks_for_model(chain.llm, chain.callbacks, chain)
+
     # handle and output response
-    case module.call(chain.llm, chain.messages, chain.tools) do
+    case module.call(use_llm, chain.messages, chain.tools) do
       {:ok, [%Message{} = message]} ->
         if chain.verbose, do: IO.inspect(message, label: "SINGLE MESSAGE RESPONSE")
         {:ok, process_message(chain, message)}
@@ -1023,15 +1025,6 @@ defmodule LangChain.Chains.LLMChain do
     %LLMChain{chain | callbacks: callbacks ++ [additional_callback]}
   end
 
-  @doc """
-  Add a `LangChain.ChatModels.LLMCallbacks` callback map to the chain's `:llm` model if
-  it supports the `:callback` key.
-  """
-  @spec add_llm_callback(t(), map()) :: t()
-  def add_llm_callback(%LLMChain{llm: model} = chain, callback_map) do
-    %LLMChain{chain | llm: ChatModel.add_callback(model, callback_map)}
-  end
-
   # a pipe-friendly execution of callbacks that returns the chain
   defp fire_callback_and_return(%LLMChain{} = chain, callback_name, additional_arguments)
        when is_list(additional_arguments) do
diff --git a/lib/chains/summarize_conversation_chain.ex b/lib/chains/summarize_conversation_chain.ex
index eaca5669..9607eb13 100644
--- a/lib/chains/summarize_conversation_chain.ex
+++ b/lib/chains/summarize_conversation_chain.ex
@@ -94,7 +94,14 @@ defmodule LangChain.Chains.SummarizeConversationChain do
 
   @type t :: %SummarizeConversationChain{}
 
-  @create_fields [:llm, :keep_count, :threshold_count, :override_system_prompt, :messages, :verbose]
+  @create_fields [
+    :llm,
+    :keep_count,
+    :threshold_count,
+    :override_system_prompt,
+    :messages,
+    :verbose
+  ]
   @required_fields [:llm, :keep_count, :threshold_count]
 
   @default_system_prompt ~s|You expertly summarize a conversation into concise bullet points that capture significant details and sentiment for future reference. Summarize the conversation starting with the initial user message. Return only the summary with no additional commentary.
@@ -286,7 +293,9 @@ defmodule LangChain.Chains.SummarizeConversationChain do
 
   def create_summary_messages(summary_text) when is_binary(summary_text) do
     [
-      Message.new_user!("Summarize our entire conversation up to this point for future reference."),
+      Message.new_user!(
+        "Summarize our entire conversation up to this point for future reference."
+      ),
       Message.new_assistant!(summary_text)
     ]
   end
diff --git a/lib/chat_models/chat_anthropic.ex b/lib/chat_models/chat_anthropic.ex
index b0eea877..40c1ff17 100644
--- a/lib/chat_models/chat_anthropic.ex
+++ b/lib/chat_models/chat_anthropic.ex
@@ -15,13 +15,17 @@ defmodule LangChain.ChatModels.ChatAnthropic do
   Anthropic returns rate limit information in the response headers. Those can be
   accessed using an LLM callback like this:
 
-      handlers = %{
-        on_llm_ratelimit_info: fn _model, headers ->
+      handler = %{
+        on_llm_ratelimit_info: fn _chain, headers ->
           IO.inspect(headers)
         end
       }
 
-      {:ok, chat} = ChatAnthropic.new(%{callbacks: [handlers]})
+      %{llm: ChatAnthropic.new!(%{model: "..."})}
+      |> LLMChain.new!()
+      # ... add messages ...
+      |> LLMChain.add_callback(handler)
+      |> LLMChain.run()
 
   When a request is received, something similar to the following will be output
   to the console.
@@ -152,7 +156,7 @@ defmodule LangChain.ChatModels.ChatAnthropic do
     # Whether to stream the response
     field :stream, :boolean, default: false
 
-    # A list of maps for callback handlers
+    # A list of maps for callback handlers (treat as private)
     field :callbacks, {:array, :map}, default: []
 
     # Tool choice option
@@ -172,7 +176,6 @@ defmodule LangChain.ChatModels.ChatAnthropic do
     :top_p,
     :top_k,
     :stream,
-    :callbacks,
     :tool_choice
   ]
   @required_fields [:endpoint, :model]
@@ -368,12 +371,10 @@ defmodule LangChain.ChatModels.ChatAnthropic do
     |> case do
       {:ok, %Req.Response{status: 200, body: data} = response} ->
         Callbacks.fire(anthropic.callbacks, :on_llm_ratelimit_info, [
-          anthropic,
           get_ratelimit_info(response.headers)
         ])
 
         Callbacks.fire(anthropic.callbacks, :on_llm_token_usage, [
-          anthropic,
           get_token_usage(data)
         ])
 
@@ -382,7 +383,7 @@ defmodule LangChain.ChatModels.ChatAnthropic do
             {:error, reason}
 
           result ->
-            Callbacks.fire(anthropic.callbacks, :on_llm_new_message, [anthropic, result])
+            Callbacks.fire(anthropic.callbacks, :on_llm_new_message, [result])
             result
         end
 
@@ -398,6 +399,10 @@ defmodule LangChain.ChatModels.ChatAnthropic do
         Logger.debug(fn -> "Mint connection closed: retry count = #{inspect(retry_count)}" end)
         do_api_request(anthropic, messages, tools, retry_count - 1)
 
+      {:error, %LangChainError{}} = error ->
+        # pass through the already handled exception
+        error
+
       other ->
         message = "Unexpected and unhandled API response! #{inspect(other)}"
         Logger.error(message)
@@ -429,7 +434,6 @@ defmodule LangChain.ChatModels.ChatAnthropic do
     |> case do
       {:ok, %Req.Response{body: data} = response} ->
         Callbacks.fire(anthropic.callbacks, :on_llm_ratelimit_info, [
-          anthropic,
           get_ratelimit_info(response.headers)
         ])
 
@@ -449,6 +453,10 @@ defmodule LangChain.ChatModels.ChatAnthropic do
         Logger.debug(fn -> "Mint connection closed: retry count = #{inspect(retry_count)}" end)
         do_api_request(anthropic, messages, tools, retry_count - 1)
 
+      {:error, %LangChainError{}} = error ->
+        # pass through the already handled exception
+        error
+
       other ->
         message = "Unhandled and unexpected response from streamed post call. #{inspect(other)}"
         Logger.error(message)
@@ -592,7 +600,7 @@ defmodule LangChain.ChatModels.ChatAnthropic do
         } = data
       ) do
     # if we received usage data, fire any callbacks for it.
-    Callbacks.fire(model.callbacks, :on_llm_token_usage, [model, get_token_usage(data)])
+    Callbacks.fire(model.callbacks, :on_llm_token_usage, [get_token_usage(data)])
 
     %{
       role: :assistant,
diff --git a/lib/chat_models/chat_bumblebee.ex b/lib/chat_models/chat_bumblebee.ex
index 2d0fc346..c32b5eb3 100644
--- a/lib/chat_models/chat_bumblebee.ex
+++ b/lib/chat_models/chat_bumblebee.ex
@@ -136,7 +136,7 @@ defmodule LangChain.ChatModels.ChatBumblebee do
     # for testing.
     field :seed, :integer, default: nil
 
-    # A list of maps for callback handlers
+    # A list of maps for callback handlers (treat as private)
     field :callbacks, {:array, :map}, default: []
   end
 
@@ -153,8 +153,7 @@ defmodule LangChain.ChatModels.ChatBumblebee do
     # :temperature,
     :seed,
     :template_format,
-    :stream,
-    :callbacks
+    :stream
   ]
   @required_fields [:serving]
 
@@ -255,7 +254,7 @@ defmodule LangChain.ChatModels.ChatBumblebee do
     case Message.new(%{role: :assistant, status: :complete, content: content}) do
       {:ok, message} ->
         # execute the callback with the final message
-        Callbacks.fire(model.callbacks, :on_llm_new_message, [model, message])
+        Callbacks.fire(model.callbacks, :on_llm_new_message, [message])
         # return a list of the complete message. As a list for compatibility.
         [message]
 
@@ -287,13 +286,13 @@ defmodule LangChain.ChatModels.ChatBumblebee do
         fire_token_usage_callback(model, token_summary)
 
         final_delta = MessageDelta.new!(%{role: :assistant, status: :complete})
-        Callbacks.fire(model.callbacks, :on_llm_new_delta, [model, final_delta])
+        Callbacks.fire(model.callbacks, :on_llm_new_delta, [final_delta])
         final_delta
 
       content when is_binary(content) ->
         case MessageDelta.new(%{content: content, role: :assistant, status: :incomplete}) do
           {:ok, delta} ->
-            Callbacks.fire(model.callbacks, :on_llm_new_delta, [model, delta])
+            Callbacks.fire(model.callbacks, :on_llm_new_delta, [delta])
             delta
 
           {:error, %Ecto.Changeset{} = changeset} ->
@@ -318,7 +317,6 @@ defmodule LangChain.ChatModels.ChatBumblebee do
 
   defp fire_token_usage_callback(model, %{input: input, output: output} = _token_summary) do
     Callbacks.fire(model.callbacks, :on_llm_token_usage, [
-      model,
       TokenUsage.new!(%{input: input, output: output})
     ])
   end
diff --git a/lib/chat_models/chat_google_ai.ex b/lib/chat_models/chat_google_ai.ex
index bb5678d7..b0228c19 100644
--- a/lib/chat_models/chat_google_ai.ex
+++ b/lib/chat_models/chat_google_ai.ex
@@ -84,7 +84,7 @@ defmodule LangChain.ChatModels.ChatGoogleAI do
 
     field :stream, :boolean, default: false
 
-    # A list of maps for callback handlers
+    # A list of maps for callback handlers (treat as private)
     field :callbacks, {:array, :map}, default: []
   end
 
@@ -100,7 +100,6 @@ defmodule LangChain.ChatModels.ChatGoogleAI do
     :top_k,
     :receive_timeout,
     :stream,
-    :callbacks,
     :safety_settings
   ]
   @required_fields [
@@ -383,7 +382,7 @@ defmodule LangChain.ChatModels.ChatGoogleAI do
             {:error, reason}
 
           result ->
-            Callbacks.fire(google_ai.callbacks, :on_llm_new_message, [google_ai, result])
+            Callbacks.fire(google_ai.callbacks, :on_llm_new_message, [result])
             result
         end
 
@@ -480,7 +479,7 @@ defmodule LangChain.ChatModels.ChatGoogleAI do
     # else do this. For now, we fire each and every TokenUsage we receive.
     case get_token_usage(data) do
       %TokenUsage{} = token_usage ->
-        Callbacks.fire(model.callbacks, :on_llm_token_usage, [model, token_usage])
+        Callbacks.fire(model.callbacks, :on_llm_token_usage, [token_usage])
         :ok
 
       nil ->
diff --git a/lib/chat_models/chat_mistral_ai.ex b/lib/chat_models/chat_mistral_ai.ex
index 4cab112e..a8684f8c 100644
--- a/lib/chat_models/chat_mistral_ai.ex
+++ b/lib/chat_models/chat_mistral_ai.ex
@@ -54,7 +54,7 @@ defmodule LangChain.ChatModels.ChatMistralAI do
 
     field :stream, :boolean, default: false
 
-    # A list of maps for callback handlers
+    # A list of maps for callback handlers (treat as private)
     field :callbacks, {:array, :map}, default: []
   end
 
@@ -70,8 +70,7 @@ defmodule LangChain.ChatModels.ChatMistralAI do
     :max_tokens,
     :safe_prompt,
     :random_seed,
-    :stream,
-    :callbacks
+    :stream
   ]
   @required_fields [
     :model
@@ -198,7 +197,7 @@ defmodule LangChain.ChatModels.ChatMistralAI do
             {:error, reason}
 
           result ->
-            Callbacks.fire(mistral.callbacks, :on_llm_new_message, [mistral, result])
+            Callbacks.fire(mistral.callbacks, :on_llm_new_message, [result])
             result
         end
 
diff --git a/lib/chat_models/chat_model.ex b/lib/chat_models/chat_model.ex
index 72187407..3f0a83a2 100644
--- a/lib/chat_models/chat_model.ex
+++ b/lib/chat_models/chat_model.ex
@@ -24,18 +24,6 @@ defmodule LangChain.ChatModels.ChatModel do
 
   @callback restore_from_map(%{String.t() => any()}) :: {:ok, struct()} | {:error, String.t()}
 
-  @doc """
-  Add a `LangChain.ChatModels.LLMCallbacks` callback map to the ChatModel if
-  it includes the `:callback` key.
-  """
-  @spec add_callback(%{optional(:callbacks) => nil | map()}, map()) :: map() | struct()
-  def add_callback(%_{callbacks: callbacks} = model, callback_map) do
-    existing_callbacks = callbacks || []
-    %{model | callbacks: existing_callbacks ++ [callback_map]}
-  end
-
-  def add_callback(model, _callback_map), do: model
-
   @doc """
   Create a serializable map from a ChatModel's current configuration that can
   later be restored.
diff --git a/lib/chat_models/chat_ollama_ai.ex b/lib/chat_models/chat_ollama_ai.ex
index 24751c61..82a78934 100644
--- a/lib/chat_models/chat_ollama_ai.ex
+++ b/lib/chat_models/chat_ollama_ai.ex
@@ -67,8 +67,7 @@ defmodule LangChain.ChatModels.ChatOllamaAI do
     :temperature,
     :tfs_z,
     :top_k,
-    :top_p,
-    :callbacks
+    :top_p
   ]
 
   @required_fields [:endpoint, :model]
@@ -150,7 +149,7 @@ defmodule LangChain.ChatModels.ChatOllamaAI do
     # while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)
     field :top_p, :float, default: 0.9
 
-    # A list of maps for callback handlers
+    # A list of maps for callback handlers (treat as private)
     field :callbacks, {:array, :map}, default: []
   end
 
diff --git a/lib/chat_models/chat_open_ai.ex b/lib/chat_models/chat_open_ai.ex
index fa3e76eb..994518b7 100644
--- a/lib/chat_models/chat_open_ai.ex
+++ b/lib/chat_models/chat_open_ai.ex
@@ -178,7 +178,7 @@ defmodule LangChain.ChatModels.ChatOpenAI do
     # Tool choice option
     field :tool_choice, :map
 
-    # A list of maps for callback handlers
+    # A list of maps for callback handlers (treated as internal)
     field :callbacks, {:array, :map}, default: []
 
     # Can send a string user_id to help ChatGPT detect abuse by users of the
@@ -204,7 +204,6 @@ defmodule LangChain.ChatModels.ChatOpenAI do
     :max_tokens,
     :stream_options,
     :user,
-    :callbacks,
     :tool_choice
   ]
   @required_fields [:endpoint, :model]
@@ -580,12 +579,10 @@ defmodule LangChain.ChatModels.ChatOpenAI do
     |> case do
       {:ok, %Req.Response{body: data} = response} ->
         Callbacks.fire(openai.callbacks, :on_llm_ratelimit_info, [
-          openai,
           get_ratelimit_info(response.headers)
         ])
 
         Callbacks.fire(openai.callbacks, :on_llm_token_usage, [
-          openai,
           get_token_usage(data)
         ])
 
@@ -594,7 +591,7 @@ defmodule LangChain.ChatModels.ChatOpenAI do
             {:error, reason}
 
           result ->
-            Callbacks.fire(openai.callbacks, :on_llm_new_message, [openai, result])
+            Callbacks.fire(openai.callbacks, :on_llm_new_message, [result])
             result
         end
 
@@ -638,7 +635,6 @@ defmodule LangChain.ChatModels.ChatOpenAI do
     |> case do
       {:ok, %Req.Response{body: data} = response} ->
         Callbacks.fire(openai.callbacks, :on_llm_ratelimit_info, [
-          openai,
           get_ratelimit_info(response.headers)
         ])
 
@@ -743,7 +739,7 @@ defmodule LangChain.ChatModels.ChatOpenAI do
   def do_process_response(model, %{"choices" => [], "usage" => %{} = _usage} = data) do
     case get_token_usage(data) do
       %TokenUsage{} = token_usage ->
-        Callbacks.fire(model.callbacks, :on_llm_token_usage, [model, token_usage])
+        Callbacks.fire(model.callbacks, :on_llm_token_usage, [token_usage])
         :ok
 
       nil ->
diff --git a/lib/chat_models/chat_vertex_ai.ex b/lib/chat_models/chat_vertex_ai.ex
index 90cfb9ed..6981aeea 100644
--- a/lib/chat_models/chat_vertex_ai.ex
+++ b/lib/chat_models/chat_vertex_ai.ex
@@ -64,7 +64,7 @@ defmodule LangChain.ChatModels.ChatVertexAI do
     field :stream, :boolean, default: false
     field :json_response, :boolean, default: false
 
-    # A list of maps for callback handlers
+    # A list of maps for callback handlers (treated as internal)
     field :callbacks, {:array, :map}, default: []
   end
 
@@ -79,8 +79,7 @@ defmodule LangChain.ChatModels.ChatVertexAI do
     :top_k,
     :receive_timeout,
     :stream,
-    :json_response,
-    :callbacks
+    :json_response
   ]
   @required_fields [
     :endpoint,
@@ -317,7 +316,7 @@ defmodule LangChain.ChatModels.ChatVertexAI do
             {:error, reason}
 
           result ->
-            Callbacks.fire(vertex_ai.callbacks, :on_llm_new_message, [vertex_ai, result])
+            Callbacks.fire(vertex_ai.callbacks, :on_llm_new_message, [result])
             result
         end
 
diff --git a/lib/chat_models/llm_callbacks.ex b/lib/chat_models/llm_callbacks.ex
deleted file mode 100644
index ddafd999..00000000
--- a/lib/chat_models/llm_callbacks.ex
+++ /dev/null
@@ -1,105 +0,0 @@
-defmodule LangChain.ChatModels.LLMCallbacks do
-  @moduledoc """
-  Defines the callbacks fired by an LLM module.
-
-  A callback handler is a map that defines the specific callback event with a
-  function to execute for that event.
-
-  ## Example
-
-  A sample configured callback handler that forwards received data to a specific
-  LiveView.
-
-      live_view_pid = self()
-
-      my_handlers = %{
-        on_llm_new_message: fn new_message -> send(live_view_pid, {:received_message, new_message})
-      }
-
-      model = SomeLLM.new!(%{callbacks: [my_handlers]})
-      chain = LLMChain.new!(%{llm: model})
-
-  """
-  alias LangChain.Message
-  alias LangChain.MessageDelta
-  alias LangChain.TokenUsage
-
-  @typedoc """
-  Executed when an LLM is streaming a response and a new MessageDelta (or token)
-  was received.
-
-  - `:index` is optionally present if the LLM supports sending `n` versions of a
-    response.
-
-  The return value is discarded.
-
-  ## Example
-
-  A function declaration that matches the signature.
-
-      def handle_llm_new_delta(_chat_model, delta) do
-        IO.write(delta)
-      end
-  """
-  @type llm_new_delta :: (model :: struct(), MessageDelta.t() -> any())
-
-  @typedoc """
-  Executed when an LLM is not streaming and a full message was received.
-
-  The return value is discarded.
-
-  ## Example
-
-  A function declaration that matches the signature.
-
-      def handle_llm_new_message(_chat_model, message) do
-        IO.inspect(message)
-      end
-  """
-  @type llm_new_message :: (model :: struct(), Message.t() -> any())
-
-  @typedoc """
-  Executed when an LLM (typically a service) responds with rate limiting
-  information.
-
-  The specific rate limit information depends on the LLM. It returns a map with
-  all the available information included.
-
-  The return value is discarded.
-
-  ## Example
-
-  A function declaration that matches the signature.
-
-      def handle_llm_ratelimit_info(_chat_model, %{} = info) do
-        IO.inspect(info)
-      end
-  """
-  @type llm_ratelimit_info :: (model :: struct(), info :: %{String.t() => any()} -> any())
-
-  @typedoc """
-  Executed when an LLM response reports the token usage in a
-  `LangChain.TokenUsage` struct. The data returned depends on the LLM.
-
-  The return value is discarded.
-
-  ## Example
-
-  A function declaration that matches the signature.
-
-      def handle_llm_token_usage(_chat_model, %TokenUsage{} = usage) do
-        IO.inspect(usage)
-      end
-  """
-  @type llm_token_usage :: (model :: struct(), TokenUsage.t() -> any())
-
-  @typedoc """
-  The supported set of callbacks for an LLM module.
-  """
-  @type llm_callback_handler :: %{
-          on_llm_new_delta: llm_new_delta(),
-          on_llm_new_message: llm_new_message(),
-          on_llm_ratelimit_info: llm_ratelimit_info(),
-          on_llm_token_usage: llm_token_usage()
-        }
-end
diff --git a/lib/utils.ex b/lib/utils.ex
index 98241b5e..f314eace 100644
--- a/lib/utils.ex
+++ b/lib/utils.ex
@@ -28,6 +28,40 @@ defmodule LangChain.Utils do
     Map.put(map, key, value)
   end
 
+  # Generate wrapped LLM callbacks on the model that include the chain as part
+  # of the context.
+  @doc false
+  @spec rewrap_callbacks_for_model(
+          llm :: struct(),
+          callbacks :: [%{atom() => fun()}],
+          context :: struct()
+        ) :: struct()
+  def rewrap_callbacks_for_model(llm, callbacks, context) do
+    to_wrap = [
+      :on_llm_new_delta,
+      :on_llm_new_message,
+      :on_llm_ratelimit_info,
+      :on_llm_token_usage
+    ]
+
+    # get the LLM callbacks from the chain.
+    new_callbacks =
+      callbacks
+      |> Enum.map(fn callback_map ->
+        callback_map
+        |> Map.take(to_wrap)
+        |> Enum.map(fn {key, fun} ->
+          # return a wrapped/curried function that embeds the chain context into
+          # the call
+          {key, fn arg -> fun.(context, arg) end}
+        end)
+        |> Enum.into(%{})
+      end)
+
+    # put those onto the model and return it
+    %{llm | callbacks: new_callbacks}
+  end
+
   @doc """
   Translates an error message using gettext.
   """
@@ -111,7 +145,7 @@ defmodule LangChain.Utils do
 
   def fire_streamed_callback(model, %MessageDelta{} = delta) do
     # Execute callback handler for single received delta element
-    Callbacks.fire(model.callbacks, :on_llm_new_delta, [model, delta])
+    Callbacks.fire(model.callbacks, :on_llm_new_delta, [delta])
   end
 
   # received unexpected data in the callback, do nothing.
diff --git a/test/chains/llm_chain_test.exs b/test/chains/llm_chain_test.exs
index 3c0b2c86..d06efe54 100644
--- a/test/chains/llm_chain_test.exs
+++ b/test/chains/llm_chain_test.exs
@@ -273,7 +273,7 @@ defmodule LangChain.Chains.LLMChainTest do
         )
 
       handler = %{
-        on_llm_new_delta: fn _chain, delta ->
+        on_llm_new_delta: fn %LLMChain{} = _chain, delta ->
           send(self(), {:test_stream_deltas, delta})
         end,
         on_message_processed: fn _chain, message ->
@@ -281,7 +281,7 @@ defmodule LangChain.Chains.LLMChainTest do
         end
       }
 
-      model = ChatOpenAI.new!(%{temperature: 1, seed: 0, stream: true, callbacks: [handler]})
+      model = ChatOpenAI.new!(%{temperature: 1, seed: 0, stream: true})
 
       # We can construct an LLMChain from a PromptTemplate and an LLM.
       {:ok, updated_chain} =
@@ -1670,7 +1670,11 @@ defmodule LangChain.Chains.LLMChainTest do
         |> LLMChain.add_message(Message.new_user!("Say hello!"))
         |> LLMChain.add_message(
           new_function_calls!([
-            ToolCall.new!(%{call_id: "call_fake123", name: "greet", arguments: %{"name" => "Tim"}}),
+            ToolCall.new!(%{
+              call_id: "call_fake123",
+              name: "greet",
+              arguments: %{"name" => "Tim"}
+            }),
             ToolCall.new!(%{call_id: "call_fake234", name: "hello_world", arguments: nil}),
             ToolCall.new!(%{
               call_id: "call_fake345",
@@ -1835,8 +1839,8 @@ defmodule LangChain.Chains.LLMChainTest do
 
   describe "add_llm_callback/2" do
     test "appends a callback handler to the chain's LLM", %{chat: chat} do
-      handler1 = %{on_llm_new_message: fn _chain, _msg -> IO.puts("MESSAGE 1!") end}
-      handler2 = %{on_llm_new_message: fn _chain, _msg -> IO.puts("MESSAGE 2!") end}
+      handler1 = %{on_llm_new_message: fn %LLMChain{} = _chain, _msg -> IO.puts("MESSAGE 1!") end}
+      handler2 = %{on_llm_new_message: fn %LLMChain{} = _chain, _msg -> IO.puts("MESSAGE 2!") end}
 
       # none to start with
       assert chat.callbacks == []
@@ -1844,10 +1848,10 @@ defmodule LangChain.Chains.LLMChainTest do
       chain =
         %{llm: chat}
         |> LLMChain.new!()
-        |> LLMChain.add_llm_callback(handler1)
-        |> LLMChain.add_llm_callback(handler2)
+        |> LLMChain.add_callback(handler1)
+        |> LLMChain.add_callback(handler2)
 
-      assert chain.llm.callbacks == [handler1, handler2]
+      assert chain.callbacks == [handler1, handler2]
     end
   end
 
diff --git a/test/chains/summarize_conversation_chain_test.exs b/test/chains/summarize_conversation_chain_test.exs
index 9ae0ee88..5d290e72 100644
--- a/test/chains/summarize_conversation_chain_test.exs
+++ b/test/chains/summarize_conversation_chain_test.exs
@@ -204,7 +204,10 @@ defmodule LangChain.Chains.SummarizeConversationChainTest do
       [system, summary_1, summary_2, user_2, ai_2] = updated_chain.messages
       assert system.content == "System"
       assert summary_1.role == :user
-      assert summary_1.content == "Summarize our entire conversation up to this point for future reference."
+
+      assert summary_1.content ==
+               "Summarize our entire conversation up to this point for future reference."
+
       assert summary_2.role == :assistant
       assert summary_2.content == "fake summary text"
       assert user_2.content == "Question 2"
@@ -229,7 +232,10 @@ defmodule LangChain.Chains.SummarizeConversationChainTest do
       [system, summary_1, summary_2] = updated_chain.messages
       assert system.content == "System"
       assert summary_1.role == :user
-      assert summary_1.content == "Summarize our entire conversation up to this point for future reference."
+
+      assert summary_1.content ==
+               "Summarize our entire conversation up to this point for future reference."
+
       assert summary_2.role == :assistant
       assert summary_2.content == "fake summary text"
     end
@@ -252,7 +258,10 @@ defmodule LangChain.Chains.SummarizeConversationChainTest do
 
       [summary_1, summary_2, user_2, ai_2] = updated_chain.messages
       assert summary_1.role == :user
-      assert summary_1.content == "Summarize our entire conversation up to this point for future reference."
+
+      assert summary_1.content ==
+               "Summarize our entire conversation up to this point for future reference."
+
       assert summary_2.role == :assistant
       assert summary_2.content == "fake summary text"
       assert user_2.content == "Question 2"
@@ -322,7 +331,10 @@ defmodule LangChain.Chains.SummarizeConversationChainTest do
       assert system.role == :system
       assert String.starts_with?(system.content, "You are a helpful travel assistant.")
       assert summary_1.role == :user
-      assert summary_1.content == "Summarize our entire conversation up to this point for future reference."
+
+      assert summary_1.content ==
+               "Summarize our entire conversation up to this point for future reference."
+
       assert summary_2.role == :assistant
       assert keep_1.role == :user
       assert keep_2.role == :assistant
@@ -386,7 +398,10 @@ defmodule LangChain.Chains.SummarizeConversationChainTest do
       assert system.role == :system
       assert String.starts_with?(system.content, "You are a helpful travel assistant.")
       assert summary_1.role == :user
-      assert summary_1.content == "Summarize our entire conversation up to this point for future reference."
+
+      assert summary_1.content ==
+               "Summarize our entire conversation up to this point for future reference."
+
       assert summary_2.role == :assistant
       assert summary_2.content == "- Fake OpenAI summary"
     end
@@ -575,7 +590,9 @@ I can provide you with the official booking websites for both. Would you also li
   def previously_summarized_conversation() do
     [
       Message.new_system!("You are a helpful travel assistant."),
-      Message.new_user!("Summarize our entire conversation up to this point for future reference."),
+      Message.new_user!(
+        "Summarize our entire conversation up to this point for future reference."
+      ),
       Message.new_assistant!(
         "- User seeking help planning a 2-week Italy vacation in September\n- Proposed travel route: Rome (4 days), Florence (4 days), Amalfi Coast (5 days)\n- Transportation recommendations:\n  * Trains for city travel (Rome and Florence)\n  * Potential car rental for Amalfi Coast\n- Conversation focused on itinerary planning and transportation logistics\n- User interested in both urban and coastal experiences\n- Detailed location and time allocation discussed\n- Awaiting further details on hotel preferences and specific interests in each destination"
       ),
diff --git a/test/chat_models/chat_anthropic_test.exs b/test/chat_models/chat_anthropic_test.exs
index 4ca60ff0..c7304ae3 100644
--- a/test/chat_models/chat_anthropic_test.exs
+++ b/test/chat_models/chat_anthropic_test.exs
@@ -16,8 +16,8 @@ defmodule LangChain.ChatModels.ChatAnthropicTest do
   alias LangChain.BedrockHelpers
   alias LangChain.LangChainError
 
-  @test_model "claude-3-opus-20240229"
-  @bedrock_test_model "anthropic.claude-3-5-sonnet-20240620-v1:0"
+  @test_model "claude-3-5-sonnet-20241022"
+  @bedrock_test_model "anthropic.claude-3-5-sonnet-20241022-v2:0"
   @apis [:anthropic, :anthropic_bedrock]
 
   defp hello_world(_args, _context) do
@@ -28,6 +28,10 @@ defmodule LangChain.ChatModels.ChatAnthropicTest do
     %{bedrock: BedrockHelpers.bedrock_config(), model: @bedrock_test_model}
   end
 
+  defp api_config_for(:anthropic) do
+    %{model: @test_model}
+  end
+
   defp api_config_for(_), do: %{}
 
   setup context do
@@ -577,16 +581,18 @@ defmodule LangChain.ChatModels.ChatAnthropicTest do
       test "#{BedrockHelpers.prefix_for(api)}basic streamed content example and fires ratelimit callback and token usage",
            %{live_api: api, api_config: api_config} do
         handlers = %{
-          on_llm_ratelimit_info: fn _model, headers ->
+          on_llm_ratelimit_info: fn headers ->
             send(self(), {:fired_ratelimit_info, headers})
           end,
-          on_llm_token_usage: fn _model, usage ->
+          on_llm_token_usage: fn usage ->
             send(self(), {:fired_token_usage, usage})
           end
         }
 
         {:ok, chat} =
-          ChatAnthropic.new(%{stream: true, callbacks: [handlers]} |> Map.merge(api_config))
+          ChatAnthropic.new(%{stream: true} |> Map.merge(api_config))
+
+        chat = %ChatAnthropic{chat | callbacks: [handlers]}
 
         {:ok, result} =
           ChatAnthropic.call(chat, [
@@ -1340,7 +1346,7 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
         api_config: api_config
       } do
         # https://docs.anthropic.com/en/docs/tool-use
-        {:ok, chat} = ChatAnthropic.new(%{model: @test_model} |> Map.merge(api_config))
+        {:ok, chat} = ChatAnthropic.new(%{model: api_config.model} |> Map.merge(api_config))
 
         message = Message.new_user!("Use the 'do_something' tool.")
 
@@ -1389,7 +1395,7 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
       api_config: api_config
     } do
       # https://docs.anthropic.com/claude/reference/messages-examples#vision
-      {:ok, chat} = ChatAnthropic.new(%{model: @test_model} |> Map.merge(api_config))
+      {:ok, chat} = ChatAnthropic.new(%{model: api_config.model} |> Map.merge(api_config))
 
       message = Message.new_user!("Use the 'do_something' tool with the value 'cat'.")
 
@@ -1428,16 +1434,19 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
       # }
     end
 
-    @tag live_call: true, live_anthropic: true
-    test "#{BedrockHelpers.prefix_for(api)} streams a tool call with parameters" do
+    @tag live_call: true, live_api: api
+    test "#{BedrockHelpers.prefix_for(api)} streams a tool call with parameters", %{
+      api_config: api_config
+    } do
       handler = %{
-        on_llm_new_delta: fn _model, delta ->
+        on_llm_new_delta: fn %LLMChain{} = _chain, delta ->
           # IO.inspect(delta, label: "DELTA")
           send(self(), {:streamed_fn, delta})
         end
       }
 
-      {:ok, chat} = ChatAnthropic.new(%{model: @test_model, stream: true, callbacks: [handler]})
+      {:ok, chat} =
+        ChatAnthropic.new(%{model: api_config.model, stream: true} |> Map.merge(api_config))
 
       text =
         "People tell me I should be more patient, but I can't just sit around waiting for that to happen"
@@ -1459,6 +1468,7 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
         LLMChain.new!(%{llm: chat, verbose: false})
         |> LLMChain.add_message(user_message)
         |> LLMChain.add_tools(tool)
+        |> LLMChain.add_callback(handler)
         |> LLMChain.run(mode: :until_success)
 
       # has the result from the function execution
@@ -1473,20 +1483,22 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
         test_pid = self()
 
         handler = %{
-          on_llm_new_delta: fn _model, delta ->
+          on_llm_new_delta: fn %LLMChain{} = _chain, delta ->
             send(test_pid, {:streamed_fn, delta})
           end
         }
 
         {:ok, chat} =
           ChatAnthropic.new(
-            %{stream: true, callbacks: [handler]}
+            %{stream: true}
             |> Map.merge(api_config)
           )
 
         {:ok, updated_chain} =
-          LLMChain.new!(%{llm: chat})
+          %{llm: chat}
+          |> LLMChain.new!()
           |> LLMChain.add_message(Message.new_user!("Say, 'Hi!'!"))
+          |> LLMChain.add_callback(handler)
           |> LLMChain.run()
 
         assert updated_chain.last_message.content == "Hi!"
@@ -1506,23 +1518,23 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
         test_pid = self()
 
         handler = %{
-          on_llm_new_message: fn _model, message ->
+          on_llm_new_message: fn %LLMChain{} = _chain, message ->
             send(test_pid, {:received_msg, message})
           end,
-          on_llm_ratelimit_info: fn _model, headers ->
+          on_llm_ratelimit_info: fn %LLMChain{} = _chain, headers ->
             send(test_pid, {:fired_ratelimit_info, headers})
           end,
-          on_llm_token_usage: fn _model, usage ->
+          on_llm_token_usage: fn %LLMChain{} = _chain, usage ->
             send(self(), {:fired_token_usage, usage})
           end
         }
 
         {:ok, updated_chain} =
           LLMChain.new!(%{
-            llm:
-              ChatAnthropic.new!(%{stream: false, callbacks: [handler]} |> Map.merge(api_config))
+            llm: ChatAnthropic.new!(%{stream: false} |> Map.merge(api_config))
           })
           |> LLMChain.add_message(Message.new_user!("Say, 'Hi!'!"))
+          |> LLMChain.add_callback(handler)
           |> LLMChain.run()
 
         assert updated_chain.last_message.content == "Hi!"
@@ -1558,24 +1570,26 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
         test_pid = self()
 
         handler = %{
-          on_llm_new_delta: fn _model, delta ->
+          on_llm_new_delta: fn %LLMChain{} = _chain, delta ->
             # IO.inspect(data, label: "DATA")
             send(test_pid, {:streamed_fn, delta})
           end
         }
 
+        chat =
+          ChatAnthropic.new!(
+            %{model: api_config.model, stream: true}
+            |> Map.merge(api_config)
+          )
+
         {:ok, updated_chain} =
-          LLMChain.new!(%{
-            llm:
-              ChatAnthropic.new!(
-                %{model: @test_model, stream: true, callbacks: [handler]}
-                |> Map.merge(api_config)
-              )
-          })
+          %{llm: chat}
+          |> LLMChain.new!()
           |> LLMChain.add_message(Message.new_system!("You are a helpful and concise assistant."))
           |> LLMChain.add_message(Message.new_user!("Say, 'Hi!'!"))
           |> LLMChain.add_message(Message.new_assistant!("Hi!"))
           |> LLMChain.add_message(Message.new_user!("What's the capitol of Norway?"))
+          |> LLMChain.add_callback(handler)
           |> LLMChain.run()
 
         assert updated_chain.last_message.content =~ "Oslo"
@@ -1599,7 +1613,7 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
     #   }
 
     #   {:ok, result_chain, last_message} =
-    #     LLMChain.new!(%{llm: %ChatAnthropic{model: @test_model, stream: true, callbacks: [handler]}})
+    #     LLMChain.new!(%{llm: %ChatAnthropic{model: api_config.model, stream: true, callbacks: [handler]}})
     #     |> LLMChain.add_message(Message.new_system!("You are a helpful and concise assistant."))
     #     |> LLMChain.add_message(
     #       Message.new_user!(
diff --git a/test/chat_models/chat_bumblebee_test.exs b/test/chat_models/chat_bumblebee_test.exs
index cab03d53..38fa826f 100644
--- a/test/chat_models/chat_bumblebee_test.exs
+++ b/test/chat_models/chat_bumblebee_test.exs
@@ -25,13 +25,13 @@ defmodule LangChain.ChatModels.ChatBumblebeeTest do
   describe "do_process_response/3" do
     setup do
       handler = %{
-        on_llm_new_delta: fn _model, delta ->
+        on_llm_new_delta: fn delta ->
           send(self(), {:callback_delta, delta})
         end,
-        on_llm_new_message: fn _model, message ->
+        on_llm_new_message: fn message ->
           send(self(), {:callback_message, message})
         end,
-        on_llm_token_usage: fn _model, usage ->
+        on_llm_token_usage: fn usage ->
           send(self(), {:callback_usage, usage})
         end
       }
@@ -40,7 +40,8 @@ defmodule LangChain.ChatModels.ChatBumblebeeTest do
     end
 
     test "handles non-streamed full text response", %{handler: handler} do
-      model = ChatBumblebee.new!(%{serving: Fake, stream: false, callbacks: [handler]})
+      model = ChatBumblebee.new!(%{serving: Fake, stream: false})
+      model = %ChatBumblebee{model | callbacks: [handler]}
 
       response = %{
         results: [
@@ -61,7 +62,8 @@ defmodule LangChain.ChatModels.ChatBumblebeeTest do
     end
 
     test "handles stream when stream: false", %{handler: handler} do
-      model = ChatBumblebee.new!(%{serving: Fake, stream: false, callbacks: [handler]})
+      model = ChatBumblebee.new!(%{serving: Fake, stream: false})
+      model = %ChatBumblebee{model | callbacks: [handler]}
 
       expected_message = Message.new_assistant!(%{content: "Hello.", status: :complete})
 
@@ -79,7 +81,8 @@ defmodule LangChain.ChatModels.ChatBumblebeeTest do
     test "handles a stream when stream: false and no stream_done requested", %{
       handler: handler
     } do
-      model = ChatBumblebee.new!(%{serving: Fake, stream: false, callbacks: [handler]})
+      model = ChatBumblebee.new!(%{serving: Fake, stream: false})
+      model = %ChatBumblebee{model | callbacks: [handler]}
 
       expected_message = Message.new_assistant!(%{content: "Hello.", status: :complete})
 
@@ -97,7 +100,8 @@ defmodule LangChain.ChatModels.ChatBumblebeeTest do
     test "handles a stream when stream: true and no stream_done requested", %{
       handler: handler
     } do
-      model = ChatBumblebee.new!(%{serving: Fake, stream: true, callbacks: [handler]})
+      model = ChatBumblebee.new!(%{serving: Fake, stream: true})
+      model = %ChatBumblebee{model | callbacks: [handler]}
 
       expected_deltas = [
         MessageDelta.new!(%{content: "Hel", status: :incomplete, role: :assistant}),
@@ -121,7 +125,8 @@ defmodule LangChain.ChatModels.ChatBumblebeeTest do
     end
 
     test "handles stream when stream: true", %{handler: handler} do
-      model = ChatBumblebee.new!(%{serving: Fake, stream: true, callbacks: [handler]})
+      model = ChatBumblebee.new!(%{serving: Fake, stream: true})
+      model = %ChatBumblebee{model | callbacks: [handler]}
 
       expected_deltas = [
         %MessageDelta{content: "He", status: :incomplete, role: :assistant},
diff --git a/test/chat_models/chat_google_ai_test.exs b/test/chat_models/chat_google_ai_test.exs
index b99c8179..d02ebfb7 100644
--- a/test/chat_models/chat_google_ai_test.exs
+++ b/test/chat_models/chat_google_ai_test.exs
@@ -680,18 +680,19 @@ defmodule ChatModels.ChatGoogleAITest do
     @tag live_call: true, live_google_ai: true
     test "basic non-streamed response works and fires token usage callback" do
       handlers = %{
-        on_llm_token_usage: fn _model, usage ->
+        on_llm_token_usage: fn usage ->
           send(self(), {:fired_token_usage, usage})
         end
       }
 
-      {:ok, chat} =
-        ChatGoogleAI.new(%{
+      chat =
+        ChatGoogleAI.new!(%{
           temperature: 0,
-          stream: false,
-          callbacks: [handlers]
+          stream: false
         })
 
+      chat = %ChatGoogleAI{chat | callbacks: [handlers]}
+
       {:ok, result} =
         ChatGoogleAI.call(chat, [
           Message.new_user!("Return the response 'Colorful Threads'.")
@@ -721,7 +722,7 @@ defmodule ChatModels.ChatGoogleAITest do
     @tag live_call: true, live_google_ai: true
     test "streamed response works and fires token usage callback" do
       handlers = %{
-        on_llm_token_usage: fn _model, usage ->
+        on_llm_token_usage: fn usage ->
           # NOTE: The token usage fires for every received delta. That's an
           # oddity with Google.
           #
@@ -730,13 +731,14 @@ defmodule ChatModels.ChatGoogleAITest do
         end
       }
 
-      {:ok, chat} =
-        ChatGoogleAI.new(%{
+      chat =
+        ChatGoogleAI.new!(%{
           temperature: 0,
-          stream: true,
-          callbacks: [handlers]
+          stream: true
         })
 
+      chat = %ChatGoogleAI{chat | callbacks: [handlers]}
+
       {:ok, result} =
         ChatGoogleAI.call(chat, [
           Message.new_user!("Return the response 'Colorful Threads'.")
@@ -767,31 +769,28 @@ defmodule ChatModels.ChatGoogleAITest do
 
       test_pid = self()
 
-      llm_handler = %{
-        on_llm_new_message: fn _model, %Message{} = message ->
+      handlers = %{
+        on_llm_new_message: fn %LLMChain{} = _chain, %Message{} = message ->
           send(test_pid, {:callback_msg, message})
-        end
-      }
-
-      chain_handler = %{
+        end,
         on_tool_response_created: fn _chain, %Message{} = tool_message ->
           send(test_pid, {:callback_tool_msg, tool_message})
         end
       }
 
-      model = ChatGoogleAI.new!(%{temperature: 0, stream: false, callbacks: [llm_handler]})
+      model = ChatGoogleAI.new!(%{temperature: 0, stream: false})
 
       {:ok, updated_chain} =
         LLMChain.new!(%{
           llm: model,
           verbose: false,
-          stream: false,
-          callbacks: [chain_handler]
+          stream: false
         })
         |> LLMChain.add_message(
           Message.new_user!("Answer the following math question: What is 100 + 300 - 200?")
         )
         |> LLMChain.add_tools(Calculator.new!())
+        |> LLMChain.add_callback(handlers)
         |> LLMChain.run(mode: :while_needs_response)
 
       assert %Message{} = updated_chain.last_message
diff --git a/test/chat_models/chat_model_test.exs b/test/chat_models/chat_model_test.exs
index d2640162..36d8fe79 100644
--- a/test/chat_models/chat_model_test.exs
+++ b/test/chat_models/chat_model_test.exs
@@ -4,23 +4,6 @@ defmodule LangChain.ChatModels.ChatModelTest do
   alias LangChain.ChatModels.ChatModel
   alias LangChain.ChatModels.ChatOpenAI
 
-  describe "add_callback/2" do
-    test "appends the callback to the model" do
-      model = %ChatOpenAI{}
-      assert model.callbacks == []
-      handler = %{on_llm_new_message: fn _model, _msg -> :ok end}
-      %ChatOpenAI{} = updated = ChatModel.add_callback(model, handler)
-      assert updated.callbacks == [handler]
-    end
-
-    test "does nothing on a model that doesn't support callbacks" do
-      handler = %{on_llm_new_message: fn _model, _msg -> :ok end}
-      non_model = %{something: "else"}
-      updated = ChatModel.add_callback(non_model, handler)
-      assert updated == non_model
-    end
-  end
-
   describe "serialize_config/1" do
     test "creates a map from a chat model" do
       model = ChatOpenAI.new!(%{model: "gpt-4o"})
diff --git a/test/chat_models/chat_open_ai_test.exs b/test/chat_models/chat_open_ai_test.exs
index f600a85b..b0a7594e 100644
--- a/test/chat_models/chat_open_ai_test.exs
+++ b/test/chat_models/chat_open_ai_test.exs
@@ -543,17 +543,19 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
     @tag live_call: true, live_open_ai: true
     test "basic content example and fires ratelimit callback" do
       handlers = %{
-        on_llm_ratelimit_info: fn _model, headers ->
+        on_llm_ratelimit_info: fn headers ->
           send(self(), {:fired_ratelimit_info, headers})
         end,
-        on_llm_token_usage: fn _model, usage ->
+        on_llm_token_usage: fn usage ->
           send(self(), {:fired_token_usage, usage})
         end
       }
 
       # https://js.langchain.com/docs/modules/models/chat/
       {:ok, chat} =
-        ChatOpenAI.new(%{temperature: 1, seed: 0, stream: false, callbacks: [handlers]})
+        ChatOpenAI.new(%{temperature: 1, seed: 0, stream: false})
+
+      chat = %ChatOpenAI{chat | callbacks: [handlers]}
 
       {:ok, [%Message{role: :assistant, content: response}]} =
         ChatOpenAI.call(chat, [
@@ -581,14 +583,16 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
     @tag live_call: true, live_open_ai: true
     test "basic streamed content example's final result and fires ratelimit callback" do
       handlers = %{
-        on_llm_ratelimit_info: fn _model, headers ->
+        on_llm_ratelimit_info: fn headers ->
           send(self(), {:fired_ratelimit_info, headers})
         end
       }
 
       # https://js.langchain.com/docs/modules/models/chat/
       {:ok, chat} =
-        ChatOpenAI.new(%{temperature: 1, seed: 0, stream: true, callbacks: [handlers]})
+        ChatOpenAI.new(%{temperature: 1, seed: 0, stream: true})
+
+      chat = %ChatOpenAI{chat | callbacks: [handlers]}
 
       {:ok, result} =
         ChatOpenAI.call(chat, [
@@ -655,7 +659,7 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
     @tag live_call: true, live_open_ai: true
     test "basic streamed content fires token usage callback" do
       handlers = %{
-        on_llm_token_usage: fn _model, usage ->
+        on_llm_token_usage: fn usage ->
           send(self(), {:fired_token_usage, usage})
         end
       }
@@ -666,10 +670,11 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
           temperature: 1,
           seed: 0,
           stream: true,
-          stream_options: %{include_usage: true},
-          callbacks: [handlers]
+          stream_options: %{include_usage: true}
         })
 
+      chat = %ChatOpenAI{chat | callbacks: [handlers]}
+
       # %{
       #   "choices" => [],
       #   "created" => 1717878896,
@@ -841,13 +846,14 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
     @tag live_call: true, live_open_ai: true
     test "executes callback function when data is streamed" do
       handler = %{
-        on_llm_new_delta: fn _model, %MessageDelta{} = delta ->
+        on_llm_new_delta: fn %MessageDelta{} = delta ->
           send(self(), {:message_delta, delta})
         end
       }
 
       # https://js.langchain.com/docs/modules/models/chat/
-      {:ok, chat} = ChatOpenAI.new(%{seed: 0, temperature: 1, stream: true, callbacks: [handler]})
+      chat = ChatOpenAI.new!(%{seed: 0, temperature: 1, stream: true})
+      chat = %ChatOpenAI{chat | callbacks: [handler]}
 
       {:ok, _post_results} =
         ChatOpenAI.call(
@@ -880,7 +886,7 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
     @tag live_call: true, live_open_ai: true
     test "executes callback function when data is NOT streamed" do
       handler = %{
-        on_llm_new_message: fn _model, %Message{} = new_message ->
+        on_llm_new_message: fn %Message{} = new_message ->
           send(self(), {:message_received, new_message})
         end
       }
@@ -888,7 +894,9 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
       # https://js.langchain.com/docs/modules/models/chat/
       # NOTE streamed. Should receive complete message.
       {:ok, chat} =
-        ChatOpenAI.new(%{seed: 0, temperature: 1, stream: false, callbacks: [handler]})
+        ChatOpenAI.new(%{seed: 0, temperature: 1, stream: false})
+
+      chat = %ChatOpenAI{chat | callbacks: [handler]}
 
       {:ok, [message]} =
         ChatOpenAI.call(
@@ -1292,13 +1300,15 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
     @tag live_call: true, live_open_ai: true
     test "supports streaming response calling function with args" do
       handler = %{
-        on_llm_new_delta: fn _model, %MessageDelta{} = data ->
+        on_llm_new_delta: fn %MessageDelta{} = data ->
           # IO.inspect(data, label: "DATA")
           send(self(), {:streamed_fn, data})
         end
       }
 
-      {:ok, chat} = ChatOpenAI.new(%{seed: 0, stream: true, callbacks: [handler]})
+      {:ok, chat} = ChatOpenAI.new(%{seed: 0, stream: true})
+
+      chat = %ChatOpenAI{chat | callbacks: [handler]}
 
       {:ok, message} =
         Message.new_user("Answer the following math question: What is 100 + 300 - 200?")
@@ -1329,13 +1339,15 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
     @tag live_call: true, live_open_ai: true
     test "STREAMING handles receiving a timeout error" do
       handler = %{
-        on_llm_new_delta: fn _model, %MessageDelta{} = data ->
+        on_llm_new_delta: fn %MessageDelta{} = data ->
           send(self(), {:streamed_fn, data})
         end
       }
 
-      {:ok, chat} =
-        ChatOpenAI.new(%{seed: 0, stream: true, receive_timeout: 50, callbacks: [handler]})
+      chat =
+        ChatOpenAI.new!(%{seed: 0, stream: true, receive_timeout: 50})
+
+      chat = %ChatOpenAI{chat | callbacks: [handler]}
 
       {:error, %LangChainError{} = reason} =
         ChatOpenAI.call(chat, [Message.new_user!("Why is the sky blue?")], [])
@@ -1677,25 +1689,29 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
   end
 
   # describe "works within a chain" do
+  #   alias LangChain.Chains.LLMChain
   #   @tag live_call: true, live_open_ai: true
-  #   test "supports starting the assistant's response message and continuing it" do
+  #   test "LLM callbacks pass pass the chain context" do
   #     test_pid = self()
 
   #     handler = %{
-  #       on_llm_new_delta: fn _model, %MessageDelta{} = data ->
+  #       on_llm_new_delta: fn %LLMChain{} = _chain, %MessageDelta{} = data ->
   #         send(test_pid, {:streamed_fn, data})
+  #       end,
+  #       on_llm_new_message: fn %LLMChain{} = _chain, %Message{} = data ->
+  #         send(test_pid, {:msg_fn, data})
   #       end
   #     }
 
-  #     {:ok, result_chain, last_message} =
-  #       LLMChain.new!(%{llm: %ChatOpenAI{model: @gpt4, stream: true, callbacks: [handler]}})
+  #     {:ok, result_chain} =
+  #       LLMChain.new!(%{llm: %ChatOpenAI{model: @gpt4, stream: true}})
   #       |> LLMChain.add_message(Message.new_system!("You are a helpful and concise assistant."))
   #       |> LLMChain.add_message(
   #         Message.new_user!(
   #           "What's the capitol of Norway? Please respond with the answer <answer>{{ANSWER}}</answer>"
   #         )
   #       )
-  #       |> LLMChain.add_message(Message.new_assistant!("<answer>"))
+  #       |> LLMChain.add_callback(handler)
   #       |> LLMChain.run()
 
   #     # %LangChain.Message{
@@ -1708,6 +1724,7 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
   #     #   tool_call_id: nil,
   #     # },
 
+  #     last_message = result_chain.last_message
   #     IO.inspect(result_chain.messages)
   #     IO.inspect(last_message)
   #     # TODO: The received message is not appended to the sent assistant message
@@ -1720,8 +1737,6 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
 
   #     assert_received {:streamed_fn, data}
   #     assert %MessageDelta{role: :assistant} = data
-
-  #     assert false
   #   end
   # end
 
diff --git a/test/chat_models/chat_vertex_ai_test.exs b/test/chat_models/chat_vertex_ai_test.exs
index 4c6bf6f6..63b7b485 100644
--- a/test/chat_models/chat_vertex_ai_test.exs
+++ b/test/chat_models/chat_vertex_ai_test.exs
@@ -26,7 +26,10 @@ defmodule ChatModels.ChatVertexAITest do
   describe "new/1" do
     test "works with minimal attr" do
       assert {:ok, %ChatVertexAI{} = vertex_ai} =
-               ChatVertexAI.new(%{"model" => "gemini-pro", "endpoint" => "http://localhost:1234/"})
+               ChatVertexAI.new(%{
+                 "model" => "gemini-pro",
+                 "endpoint" => "http://localhost:1234/"
+               })
 
       assert vertex_ai.model == "gemini-pro"
     end
diff --git a/test/function_param_test.exs b/test/function_param_test.exs
index dffb659e..08fe7ec9 100644
--- a/test/function_param_test.exs
+++ b/test/function_param_test.exs
@@ -134,7 +134,11 @@ defmodule LangChain.FunctionParamTest do
       assert expected == FunctionParam.to_json_schema(%{}, param)
 
       param =
-        FunctionParam.new!(%{name: "enabled", type: :boolean, description: "If option is enabled"})
+        FunctionParam.new!(%{
+          name: "enabled",
+          type: :boolean,
+          description: "If option is enabled"
+        })
 
       expected = %{"enabled" => %{"type" => "boolean", "description" => "If option is enabled"}}
       assert expected == FunctionParam.to_json_schema(%{}, param)
diff --git a/test/support/bedrock_helpers.ex b/test/support/bedrock_helpers.ex
index 540928ae..dbf858f7 100644
--- a/test/support/bedrock_helpers.ex
+++ b/test/support/bedrock_helpers.ex
@@ -7,7 +7,7 @@ defmodule LangChain.BedrockHelpers do
           secret_access_key: Application.fetch_env!(:langchain, :aws_secret_access_key)
         ]
       end,
-      region: "us-east-1"
+      region: Application.fetch_env!(:langchain, :aws_region)
     }
   end
 
diff --git a/test/test_helper.exs b/test/test_helper.exs
index d8a79873..0b50cb7e 100644
--- a/test/test_helper.exs
+++ b/test/test_helper.exs
@@ -10,6 +10,12 @@ Application.put_env(
   System.fetch_env!("AWS_SECRET_ACCESS_KEY")
 )
 
+Application.put_env(
+  :langchain,
+  :aws_region,
+  System.get_env("AWS_REGION", "us-east-1")
+)
+
 Mimic.copy(LangChain.Utils.BedrockStreamDecoder)
 Mimic.copy(LangChain.Utils.AwsEventstreamDecoder)
 
diff --git a/test/tools/calculator_test.exs b/test/tools/calculator_test.exs
index 3cd3a227..a9b14aa2 100644
--- a/test/tools/calculator_test.exs
+++ b/test/tools/calculator_test.exs
@@ -68,30 +68,25 @@ defmodule LangChain.Tools.CalculatorTest do
     test "performs repeated calls until complete with a live LLM" do
       test_pid = self()
 
-      llm_handler = %{
-        on_llm_new_message: fn _model, %Message{} = message ->
+      handlers = %{
+        on_llm_new_message: fn %LLMChain{}, %Message{} = message ->
           send(test_pid, {:callback_msg, message})
-        end
-      }
-
-      chain_handler = %{
+        end,
         on_tool_response_created: fn _chain, %Message{} = tool_message ->
           send(test_pid, {:callback_tool_msg, tool_message})
         end
       }
 
-      model = ChatOpenAI.new!(%{seed: 0, temperature: 0, stream: false, callbacks: [llm_handler]})
+      model = ChatOpenAI.new!(%{seed: 0, temperature: 0, stream: false})
 
       {:ok, updated_chain} =
-        LLMChain.new!(%{
-          llm: model,
-          verbose: false,
-          callbacks: [chain_handler]
-        })
+        %{llm: model, verbose: false}
+        |> LLMChain.new!()
         |> LLMChain.add_message(
           Message.new_user!("Answer the following math question: What is 100 + 300 - 200?")
         )
         |> LLMChain.add_tools(Calculator.new!())
+        |> LLMChain.add_callback(handlers)
         |> LLMChain.run(mode: :while_needs_response)
 
       assert updated_chain.last_message.role == :assistant
diff --git a/test/utils_test.exs b/test/utils_test.exs
index e7b6a58d..dcaf5864 100644
--- a/test/utils_test.exs
+++ b/test/utils_test.exs
@@ -5,6 +5,7 @@ defmodule LangChain.UtilsTest do
   alias LangChain.Message
   alias LangChain.ChatModels.ChatOpenAI
   alias LangChain.Utils
+  alias LangChain.Chains.LLMChain
 
   defmodule FakeSchema do
     use Ecto.Schema
@@ -237,4 +238,67 @@ defmodule LangChain.UtilsTest do
       assert new_system.content == "System B"
     end
   end
+
+  describe "rewrap_callbacks_for_model/2" do
+    test "wraps all LLM callback functions (not chain callbacks)" do
+      # split across two callback maps
+      callback_1 =
+        %{
+          on_llm_new_delta: fn %LLMChain{custom_context: context}, arg ->
+            "Custom: #{inspect(context)} + #{arg} in on_llm_new_delta"
+          end,
+          on_llm_new_message: fn %LLMChain{custom_context: context}, arg ->
+            "Custom: #{inspect(context)} + #{arg} in on_llm_new_message-1"
+          end
+        }
+
+      callback_2 =
+        %{
+          on_llm_new_message: fn %LLMChain{custom_context: context}, arg ->
+            # a repeated callback
+            "Custom: #{inspect(context)} + #{arg} in on_llm_new_message-2"
+          end,
+          on_llm_ratelimit_info: fn %LLMChain{custom_context: context}, arg ->
+            "Custom: #{inspect(context)} + #{arg} in on_llm_ratelimit_info"
+          end,
+          on_llm_token_usage: fn %LLMChain{custom_context: context}, arg ->
+            "Custom: #{inspect(context)} + #{arg} in on_llm_token_usage"
+          end,
+          on_message_processed: fn _chain, _arg ->
+            :ok
+          end
+        }
+
+      llm = ChatOpenAI.new!(%{})
+
+      chain =
+        %{llm: llm}
+        |> LLMChain.new!()
+        |> LLMChain.update_custom_context(%{value: 1})
+        |> LLMChain.add_callback(callback_1)
+        |> LLMChain.add_callback(callback_2)
+
+      updated_llm = Utils.rewrap_callbacks_for_model(llm, chain.callbacks, chain)
+
+      [group_1, group_2] = updated_llm.callbacks
+
+      assert "Custom: %{value: 1} + delta in on_llm_new_delta" ==
+               group_1.on_llm_new_delta.("delta")
+
+      assert "Custom: %{value: 1} + msg in on_llm_new_message-1" ==
+               group_1.on_llm_new_message.("msg")
+
+      assert "Custom: %{value: 1} + msg in on_llm_new_message-2" ==
+               group_2.on_llm_new_message.("msg")
+
+      assert "Custom: %{value: 1} + info in on_llm_ratelimit_info" ==
+               group_2.on_llm_ratelimit_info.("info")
+
+      assert "Custom: %{value: 1} + usage in on_llm_token_usage" ==
+               group_2.on_llm_token_usage.("usage")
+
+      # not an LLM event. Not included
+      assert group_2[:on_message_processed] == nil
+    end
+  end
 end

From eabfc6ab7f8418e88cc9b80d7f016d1a2ebe6d9c Mon Sep 17 00:00:00 2001
From: Matthew Pope <mpope9@users.noreply.github.com>
Date: Wed, 8 Jan 2025 19:28:10 -0800
Subject: [PATCH 02/11] feat: Enable :inet6 for Req.new (#227)

---
 lib/chat_models/chat_ollama_ai.ex | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/chat_models/chat_ollama_ai.ex b/lib/chat_models/chat_ollama_ai.ex
index 82a78934..3a178504 100644
--- a/lib/chat_models/chat_ollama_ai.ex
+++ b/lib/chat_models/chat_ollama_ai.ex
@@ -298,6 +298,7 @@ defmodule LangChain.ChatModels.ChatOllamaAI do
         receive_timeout: ollama_ai.receive_timeout,
         retry: :transient,
         max_retries: 3,
+        inet6: true,
         retry_delay: fn attempt -> 300 * attempt end
       )
 
@@ -336,6 +337,7 @@ defmodule LangChain.ChatModels.ChatOllamaAI do
     Req.new(
       url: ollama_ai.endpoint,
       json: for_api(ollama_ai, messages, functions),
+      inet6: true,
       receive_timeout: ollama_ai.receive_timeout
     )
     |> Req.post(

From 51527b4a0fd360dd45dcf263bf389e425aa40533 Mon Sep 17 00:00:00 2001
From: Cristine Guadelupe <cristineguadelupe@me.com>
Date: Thu, 9 Jan 2025 10:31:22 +0700
Subject: [PATCH 03/11] fix: enable verbose_deltas (#197)

---
 lib/chains/llm_chain.ex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/chains/llm_chain.ex b/lib/chains/llm_chain.ex
index f4b2a5ce..f3f3f98b 100644
--- a/lib/chains/llm_chain.ex
+++ b/lib/chains/llm_chain.ex
@@ -204,7 +204,7 @@ defmodule LangChain.Chains.LLMChain do
   """
   @type message_processor :: (t(), Message.t() -> processor_return())
 
-  @create_fields [:llm, :tools, :custom_context, :max_retry_count, :callbacks, :verbose]
+  @create_fields [:llm, :tools, :custom_context, :max_retry_count, :callbacks, :verbose, :verbose_deltas]
   @required_fields [:llm]
 
   @doc """

From eea93fff4436fef7a3c98cad72b875429e7a0fa2 Mon Sep 17 00:00:00 2001
From: Mark Ericksen <brainlid@gmail.com>
Date: Wed, 8 Jan 2025 20:33:58 -0700
Subject: [PATCH 04/11] update version and docs outline (#229)

---
 mix.exs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/mix.exs b/mix.exs
index 406dde88..a486913f 100644
--- a/mix.exs
+++ b/mix.exs
@@ -2,7 +2,7 @@ defmodule LangChain.MixProject do
   use Mix.Project
 
   @source_url "https://github.com/brainlid/langchain"
-  @version "0.3.0-rc.1"
+  @version "0.3.0-rc.2"
 
   def project do
     [
@@ -18,7 +18,7 @@ defmodule LangChain.MixProject do
       name: "LangChain",
       homepage_url: @source_url,
       description: """
-      Elixir implementation of a LangChain style framework.
+      Elixir implementation of a LangChain style framework that lets Elixir projects work well with LLMs.
       """
     ]
   end
@@ -93,7 +93,6 @@ defmodule LangChain.MixProject do
         ],
         Callbacks: [
           LangChain.Callbacks,
-          LangChain.ChatModels.LLMCallbacks,
           LangChain.Chains.ChainCallbacks
         ],
         Routing: [

From ead0e556ad2eaa19881e5977434e69b96bfbb77a Mon Sep 17 00:00:00 2001
From: Mark Ericksen <brainlid@gmail.com>
Date: Wed, 8 Jan 2025 20:39:22 -0700
Subject: [PATCH 05/11] updated changelog

---
 CHANGELOG.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 01e54bfd..21b5ecca 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -140,6 +140,19 @@ To:
 - `LangChain.ChatModels.ChatOpenAI.new/1` and `LangChain.ChatModels.ChatOpenAI.new!/1` no longer accept `:callbacks` on the chat model.
 - Removal of `LangChain.ChatModels.ChatModel.add_callback/2`
 
+### What else Changed
+* add explicit message support in summarizer by @brainlid in https://github.com/brainlid/langchain/pull/220
+* Change abacus to optional dep by @nallwhy in https://github.com/brainlid/langchain/pull/223
+* Remove constraint of alternating user, assistant by @GenericJam in https://github.com/brainlid/langchain/pull/222
+* Breaking change: consolidate LLM callback functions by @brainlid in https://github.com/brainlid/langchain/pull/228
+* feat: Enable :inet6 for Req.new for Ollama by @mpope9 in https://github.com/brainlid/langchain/pull/227
+* fix: enable verbose_deltas by @cristineguadelupe in https://github.com/brainlid/langchain/pull/197
+
+### New Contributors
+* @nallwhy made their first contribution in https://github.com/brainlid/langchain/pull/223
+* @GenericJam made their first contribution in https://github.com/brainlid/langchain/pull/222
+* @mpope9 made their first contribution in https://github.com/brainlid/langchain/pull/227
+
 ## v0.3.0-rc.1 (2024-12-15)
 
 ### Breaking Changes

From 92ad64701577947ce5739bfdc86f5ef87cffe9d4 Mon Sep 17 00:00:00 2001
From: "Marc N." <marcnnn@users.noreply.github.com>
Date: Mon, 13 Jan 2025 20:20:22 +0100
Subject: [PATCH 06/11] Add Bumblebee Phi-4 (#233)

This commit adds the Prompt template for the Phi-4 model.

Tested with quantized Phi-4 on 40GB A100. (17.7 GB vram and 100GB ram to quantize)

template_format: :phi_4
---
 lib/chat_models/chat_bumblebee.ex |  9 ++++++++-
 lib/utils/chat_templates.ex       | 23 +++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/lib/chat_models/chat_bumblebee.ex b/lib/chat_models/chat_bumblebee.ex
index c32b5eb3..c7d3185f 100644
--- a/lib/chat_models/chat_bumblebee.ex
+++ b/lib/chat_models/chat_bumblebee.ex
@@ -123,7 +123,14 @@ defmodule LangChain.ChatModels.ChatBumblebee do
     # # more focused and deterministic.
     # field :temperature, :float, default: 1.0
 
-    field :template_format, Ecto.Enum, values: [:inst, :im_start, :zephyr, :llama_2, :llama_3]
+    field :template_format, Ecto.Enum, values: [
+      :inst,
+      :im_start,
+      :zephyr,
+      :phi_4,
+      :llama_2,
+      :llama_3
+    ]
 
     # The bumblebee model may compile differently based on the stream true/false
     # option on the serving. Therefore, streaming should be enabled on the
diff --git a/lib/utils/chat_templates.ex b/lib/utils/chat_templates.ex
index ac0dfc34..4f71eef7 100644
--- a/lib/utils/chat_templates.ex
+++ b/lib/utils/chat_templates.ex
@@ -261,6 +261,29 @@ defmodule LangChain.Utils.ChatTemplates do
     )
   end
 
+  def apply_chat_template!(messages, :phi_4, _opts) do
+    # translation form https://huggingface.co/microsoft/phi-4/blob/main/tokenizer_config.json#L774 to Elixir via Claude 3.5 Sonnet Copilot
+    # {% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|><|im_start|>assistant<|im_sep|>'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}
+    {system, first_user, rest} = prep_and_validate_messages(messages)
+
+    text = """
+    <%= if @system != nil do %><|im_start|>system<|im_sep|><%= @system.content %><|im_end|><% end %>\
+    <%= if @first_user != nil do %><|im_start|>user<|im_sep|><%= @first_user.content %><|im_end|><|im_start|>assistant<|im_sep|><% end %>\
+    <%= for m <- @rest do %>\
+    <%= if m.role == :user do %><|im_start|>user<|im_sep|><%= m.content %><|im_end|><|im_start|>assistant<|im_sep|>\
+    <% else %><%= m.content %><|im_end|><% end %>\
+    <% end %>
+    """
+
+    EEx.eval_string(text,
+      assigns: [
+        system: system,
+        first_user: first_user,
+        rest: rest
+      ]
+    )
+  end
+
   # Does LLaMa 2 formatted text
   def apply_chat_template!(messages, :llama_2, _opts) do
     # https://huggingface.co/blog/llama2#how-to-prompt-llama-2

From 674a13f1d16e3b2b20fd3533c55e6c9745396d62 Mon Sep 17 00:00:00 2001
From: Joel Koch <joel@joelkoch.dev>
Date: Mon, 13 Jan 2025 20:24:08 +0100
Subject: [PATCH 07/11] feat: apply chat template from callback (#231)

* Enable applying chat template from callback

* Add typespec and docs
---
 lib/utils/chat_templates.ex        | 11 ++++++++++-
 test/utils/chat_templates_test.exs | 26 ++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/lib/utils/chat_templates.ex b/lib/utils/chat_templates.ex
index 4f71eef7..a959f3f1 100644
--- a/lib/utils/chat_templates.ex
+++ b/lib/utils/chat_templates.ex
@@ -79,11 +79,16 @@ defmodule LangChain.Utils.ChatTemplates do
 
   Note: The `:zephyr` format supports specific system messages.
 
+  ## Template callback
+
+  It's possible to pass a callback as a template.
+  The function receives the list of messages as first argument and `opts` as second and must return a string.
   """
   alias LangChain.LangChainError
   alias LangChain.Message
 
-  @type chat_format :: :inst | :im_start | :llama_2 | :llama_3 | :zephyr
+  @type template_callback :: ([Message.t()], Keyword.t() -> String.t())
+  @type chat_format :: :inst | :im_start | :llama_2 | :llama_3 | :zephyr | template_callback()
 
   # Option:
   # - `add_generation_prompt`: boolean. Defaults to False.
@@ -338,6 +343,10 @@ defmodule LangChain.Utils.ChatTemplates do
     )
   end
 
+  def apply_chat_template!(messages, template_callback, opts)
+      when is_function(template_callback, 2),
+      do: template_callback.(messages, opts)
+
   # return the desired true/false value. Only set to true when the last message
   # is a user prompt.
   defp default_add_generation_prompt_value(messages) do
diff --git a/test/utils/chat_templates_test.exs b/test/utils/chat_templates_test.exs
index 782e64db..09e08beb 100644
--- a/test/utils/chat_templates_test.exs
+++ b/test/utils/chat_templates_test.exs
@@ -479,4 +479,30 @@ defmodule LangChain.Utils.ChatTemplatesTest do
       assert result == expected
     end
   end
+
+  describe "apply_chat_template!/3 - with template callback" do
+    test "formats according to template callback" do
+      messages = [
+        Message.new_system!("system_message"),
+        Message.new_user!("user_prompt"),
+        Message.new_assistant!("assistant_response"),
+        Message.new_user!("user_2nd")
+      ]
+
+      format =
+        "<|start_of_template|><%= for message <- @messages do %><%= message.role %>\n<%= message.content %>\n\n<% end %><|end_of_template|>"
+
+      template_callback = fn messages, _opts ->
+        EEx.eval_string(format,
+          assigns: [messages: messages]
+        )
+      end
+
+      expected =
+        "<|start_of_template|>system\nsystem_message\n\nuser\nuser_prompt\n\nassistant\nassistant_response\n\nuser\nuser_2nd\n\n<|end_of_template|>"
+
+      result = ChatTemplates.apply_chat_template!(messages, template_callback)
+      assert result == expected
+    end
+  end
 end

From 3630129f1ad95f6b1025212ddb311667e46b7c51 Mon Sep 17 00:00:00 2001
From: Mark Ericksen <brainlid@gmail.com>
Date: Mon, 13 Jan 2025 12:27:44 -0700
Subject: [PATCH 08/11] phi_4 chat template support fix after merge

---
 lib/utils/chat_templates.ex | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/utils/chat_templates.ex b/lib/utils/chat_templates.ex
index a959f3f1..8e55d06f 100644
--- a/lib/utils/chat_templates.ex
+++ b/lib/utils/chat_templates.ex
@@ -79,6 +79,11 @@ defmodule LangChain.Utils.ChatTemplates do
 
   Note: The `:zephyr` format supports specific system messages.
 
+
+  ### `:phi_4`
+
+  The `:phi_4` template format is also supported.
+
   ## Template callback
 
   It's possible to pass a callback as a template.
@@ -88,7 +93,7 @@ defmodule LangChain.Utils.ChatTemplates do
   alias LangChain.Message
 
   @type template_callback :: ([Message.t()], Keyword.t() -> String.t())
-  @type chat_format :: :inst | :im_start | :llama_2 | :llama_3 | :zephyr | template_callback()
+  @type chat_format :: :inst | :im_start | :llama_2 | :llama_3 | :phi_4 | :zephyr | template_callback()
 
   # Option:
   # - `add_generation_prompt`: boolean. Defaults to False.

From 9ee95a73b46e45c3245ec4bbeab60662a00e335e Mon Sep 17 00:00:00 2001
From: Mark Ericksen <brainlid@gmail.com>
Date: Mon, 13 Jan 2025 12:27:55 -0700
Subject: [PATCH 09/11] minor test cleanup

---
 test/chains/llm_chain_test.exs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/test/chains/llm_chain_test.exs b/test/chains/llm_chain_test.exs
index d06efe54..b4f711b4 100644
--- a/test/chains/llm_chain_test.exs
+++ b/test/chains/llm_chain_test.exs
@@ -1835,9 +1835,7 @@ defmodule LangChain.Chains.LLMChainTest do
       updated_chain = LLMChain.add_callback(chain, handler2)
       assert updated_chain.callbacks == [handler1, handler2]
     end
-  end
 
-  describe "add_llm_callback/2" do
     test "appends a callback handler to the chain's LLM", %{chat: chat} do
       handler1 = %{on_llm_new_message: fn %LLMChain{} = _chain, _msg -> IO.puts("MESSAGE 1!") end}
       handler2 = %{on_llm_new_message: fn %LLMChain{} = _chain, _msg -> IO.puts("MESSAGE 2!") end}

From 45708db3f624a176b82cf358660fa9eea3c0a570 Mon Sep 17 00:00:00 2001
From: Mark Ericksen <brainlid@gmail.com>
Date: Tue, 14 Jan 2025 20:43:23 -0700
Subject: [PATCH 10/11] support for o1 OpenAI model (#234)

- adds new "reasoning_mode". Sends "developer" role instead of "system" role.
- adds support for "reasoning_effort" API option
- refactors many of the for_api functions to include the model. The reasoning model changes how things are sent
---
 lib/chat_models/chat_google_ai.ex      |   7 +-
 lib/chat_models/chat_mistral_ai.ex     |   2 +-
 lib/chat_models/chat_ollama_ai.ex      |   2 +-
 lib/chat_models/chat_open_ai.ex        | 157 +++++++++++++++------
 lib/chat_models/chat_vertex_ai.ex      |   2 +-
 test/chat_models/chat_open_ai_test.exs | 181 +++++++++++++++----------
 6 files changed, 234 insertions(+), 117 deletions(-)

diff --git a/lib/chat_models/chat_google_ai.ex b/lib/chat_models/chat_google_ai.ex
index b0228c19..5caeeb9a 100644
--- a/lib/chat_models/chat_google_ai.ex
+++ b/lib/chat_models/chat_google_ai.ex
@@ -300,7 +300,12 @@ defmodule LangChain.ChatModels.ChatGoogleAI do
   end
 
   def for_api(%Function{} = function) do
-    encoded = ChatOpenAI.for_api(function)
+    encoded =
+      %{
+        "name" => function.name,
+        "parameters" => ChatOpenAI.get_parameters(function)
+      }
+      |> Utils.conditionally_add_to_map("description", function.description)
 
     # For functions with no parameters, Google AI needs the parameters field removing, otherwise it will error
     # with "* GenerateContentRequest.tools[0].function_declarations[0].parameters.properties: should be non-empty for OBJECT type\n"
diff --git a/lib/chat_models/chat_mistral_ai.ex b/lib/chat_models/chat_mistral_ai.ex
index a8684f8c..cc06883e 100644
--- a/lib/chat_models/chat_mistral_ai.ex
+++ b/lib/chat_models/chat_mistral_ai.ex
@@ -125,7 +125,7 @@ defmodule LangChain.ChatModels.ChatMistralAI do
       top_p: mistral.top_p,
       safe_prompt: mistral.safe_prompt,
       stream: mistral.stream,
-      messages: Enum.map(messages, &ChatOpenAI.for_api/1)
+      messages: Enum.map(messages, &ChatOpenAI.for_api(mistral, &1))
     }
     |> Utils.conditionally_add_to_map(:random_seed, mistral.random_seed)
     |> Utils.conditionally_add_to_map(:max_tokens, mistral.max_tokens)
diff --git a/lib/chat_models/chat_ollama_ai.ex b/lib/chat_models/chat_ollama_ai.ex
index 3a178504..8b948eca 100644
--- a/lib/chat_models/chat_ollama_ai.ex
+++ b/lib/chat_models/chat_ollama_ai.ex
@@ -192,7 +192,7 @@ defmodule LangChain.ChatModels.ChatOllamaAI do
     %{
       model: model.model,
       temperature: model.temperature,
-      messages: messages |> Enum.map(&ChatOpenAI.for_api/1),
+      messages: messages |> Enum.map(&ChatOpenAI.for_api(model, &1)),
       stream: model.stream,
       seed: model.seed,
       num_ctx: model.num_ctx,
diff --git a/lib/chat_models/chat_open_ai.ex b/lib/chat_models/chat_open_ai.ex
index 994518b7..2435f847 100644
--- a/lib/chat_models/chat_open_ai.ex
+++ b/lib/chat_models/chat_open_ai.ex
@@ -108,6 +108,18 @@ defmodule LangChain.ChatModels.ChatOpenAI do
 
   `https://some-subdomain.cognitiveservices.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2024-08-01-preview"`
 
+  ## Reasoning Model Support
+
+  OpenAI made some significant API changes with the introduction of their "reasoning" models. This includes the `o1` and `o1-mini` models.
+
+  To enable this mode, set `:reasoning_mode` to `true`:
+
+      model = ChatOpenAI.new!(%{reasoning_mode: true})
+
+  Setting `reasoning_mode` to `true` does at least the two following things:
+
+  - Set `:developer` as the `role` for system messages. The OpenAI documentation says API calls to `o1` and newer models must use the `role: :developer` instead of `role: :system` and errors if not set correctly.
+  - The `:reasoning_effort` option included in LLM requests. This setting is only permitted on a reasoning model. The `:reasoning_effort` values support the "low", "medium" (default), and "high" options specified in the OpenAI documentation. This instructs the LLM on how much time, and tokens, should be spent on thinking through and reasoning about the request and the response.
   """
   use Ecto.Schema
   require Logger
@@ -115,6 +127,7 @@ defmodule LangChain.ChatModels.ChatOpenAI do
   alias __MODULE__
   alias LangChain.Config
   alias LangChain.ChatModels.ChatModel
+  alias LangChain.PromptTemplate
   alias LangChain.Message
   alias LangChain.Message.ContentPart
   alias LangChain.Message.ToolCall
@@ -155,6 +168,19 @@ defmodule LangChain.ChatModels.ChatOpenAI do
     # their existing frequency in the text so far, decreasing the model's
     # likelihood to repeat the same line verbatim.
     field :frequency_penalty, :float, default: 0.0
+
+    # Used when working with a reasoning model like `o1` and newer. This setting
+    # is required when working with those models as the API behavior needs to
+    # change.
+    field :reasoning_mode, :boolean, default: false
+
+    # o1 models only
+    #
+    # Constrains effort on reasoning for reasoning models. Currently supported
+    # values are `low`, `medium`, and `high`. Reducing reasoning effort can result in
+    # faster responses and fewer tokens used on reasoning in a response.
+    field :reasoning_effort, :string, default: "medium"
+
     # Duration in seconds for the response to be received. When streaming a very
     # lengthy response, a longer time limit may be required. However, when it
     # goes on too long by itself, it tends to hallucinate more.
@@ -198,6 +224,8 @@ defmodule LangChain.ChatModels.ChatOpenAI do
     :seed,
     :n,
     :stream,
+    :reasoning_mode,
+    :reasoning_effort,
     :receive_timeout,
     :json_response,
     :json_schema,
@@ -275,7 +303,7 @@ defmodule LangChain.ChatModels.ChatOpenAI do
       messages:
         messages
         |> Enum.reduce([], fn m, acc ->
-          case for_api(m) do
+          case for_api(openai, m) do
             %{} = data ->
               [data | acc]
 
@@ -287,22 +315,26 @@ defmodule LangChain.ChatModels.ChatOpenAI do
       response_format: set_response_format(openai),
       user: openai.user
     }
+    |> Utils.conditionally_add_to_map(
+      :reasoning_effort,
+      if(openai.reasoning_mode, do: openai.reasoning_effort, else: nil)
+    )
     |> Utils.conditionally_add_to_map(:max_tokens, openai.max_tokens)
     |> Utils.conditionally_add_to_map(:seed, openai.seed)
     |> Utils.conditionally_add_to_map(
       :stream_options,
       get_stream_options_for_api(openai.stream_options)
     )
-    |> Utils.conditionally_add_to_map(:tools, get_tools_for_api(tools))
+    |> Utils.conditionally_add_to_map(:tools, get_tools_for_api(openai, tools))
     |> Utils.conditionally_add_to_map(:tool_choice, get_tool_choice(openai))
   end
 
-  defp get_tools_for_api(nil), do: []
+  defp get_tools_for_api(%_{} = _model, nil), do: []
 
-  defp get_tools_for_api(tools) do
+  defp get_tools_for_api(%_{} = model, tools) do
     Enum.map(tools, fn
       %Function{} = function ->
-        %{"type" => "function", "function" => for_api(function)}
+        %{"type" => "function", "function" => for_api(model, function)}
     end)
   end
 
@@ -341,48 +373,44 @@ defmodule LangChain.ChatModels.ChatOpenAI do
   defp get_tool_choice(%ChatOpenAI{}), do: nil
 
   @doc """
-  Convert a LangChain structure to the expected map of data for the OpenAI API.
+  Convert a LangChain Message-based structure to the expected map of data for
+  the OpenAI API. This happens within the context of the model configuration as
+  well. The additional context is needed to correctly convert a role to either
+  `:system` or `:developer`.
+
+  NOTE: The `ChatOpenAI` model's functions are reused in other modules. For this
+  reason, model is more generally defined as a struct.
   """
-  @spec for_api(Message.t() | ContentPart.t() | Function.t()) ::
+  @spec for_api(
+          struct(),
+          Message.t()
+          | PromptTemplate.t()
+          | ToolCall.t()
+          | ToolResult.t()
+          | ContentPart.t()
+          | Function.t()
+        ) ::
           %{String.t() => any()} | [%{String.t() => any()}]
-  def for_api(%Message{role: :assistant, tool_calls: tool_calls} = msg)
-      when is_list(tool_calls) do
-    %{
-      "role" => :assistant,
-      "content" => msg.content
-    }
-    |> Utils.conditionally_add_to_map("tool_calls", Enum.map(tool_calls, &for_api(&1)))
-  end
-
-  def for_api(%Message{role: :tool, tool_results: tool_results} = _msg)
-      when is_list(tool_results) do
-    # ToolResults turn into a list of tool messages for OpenAI
-    Enum.map(tool_results, fn result ->
-      %{
-        "role" => :tool,
-        "tool_call_id" => result.tool_call_id,
-        "content" => result.content
-      }
-    end)
-  end
+  def for_api(%_{} = model, %Message{content: content} = msg) when is_binary(content) do
+    role = get_message_role(model, msg.role)
 
-  def for_api(%Message{content: content} = msg) when is_binary(content) do
     %{
-      "role" => msg.role,
+      "role" => role,
       "content" => msg.content
     }
     |> Utils.conditionally_add_to_map("name", msg.name)
   end
 
-  def for_api(%Message{role: :user, content: content} = msg) when is_list(content) do
+  def for_api(%_{} = model, %Message{role: :user, content: content} = msg)
+      when is_list(content) do
     %{
       "role" => msg.role,
-      "content" => Enum.map(content, &for_api(&1))
+      "content" => Enum.map(content, &for_api(model, &1))
     }
     |> Utils.conditionally_add_to_map("name", msg.name)
   end
 
-  def for_api(%ToolResult{type: :function} = result) do
+  def for_api(%_{} = _model, %ToolResult{type: :function} = result) do
     # a ToolResult becomes a stand-alone %Message{role: :tool} response.
     %{
       "role" => :tool,
@@ -391,15 +419,33 @@ defmodule LangChain.ChatModels.ChatOpenAI do
     }
   end
 
-  def for_api(%LangChain.PromptTemplate{} = _template) do
-    raise LangChain.LangChainError, "PromptTemplates must be converted to messages."
+  def for_api(%_{} = model, %Message{role: :assistant, tool_calls: tool_calls} = msg)
+      when is_list(tool_calls) do
+    %{
+      "role" => :assistant,
+      "content" => msg.content
+    }
+    |> Utils.conditionally_add_to_map("tool_calls", Enum.map(tool_calls, &for_api(model, &1)))
   end
 
-  def for_api(%ContentPart{type: :text} = part) do
+  def for_api(%_{} = _model, %Message{role: :tool, tool_results: tool_results} = _msg)
+      when is_list(tool_results) do
+    # ToolResults turn into a list of tool messages for OpenAI
+    Enum.map(tool_results, fn result ->
+      %{
+        "role" => :tool,
+        "tool_call_id" => result.tool_call_id,
+        "content" => result.content
+      }
+    end)
+  end
+
+  def for_api(%_{} = _model, %ContentPart{type: :text} = part) do
     %{"type" => "text", "text" => part.content}
   end
 
-  def for_api(%ContentPart{type: image} = part) when image in [:image, :image_url] do
+  def for_api(%_{} = _model, %ContentPart{type: image} = part)
+      when image in [:image, :image_url] do
     media_prefix =
       case Keyword.get(part.options || [], :media, nil) do
         nil ->
@@ -437,7 +483,7 @@ defmodule LangChain.ChatModels.ChatOpenAI do
   end
 
   # ToolCall support
-  def for_api(%ToolCall{type: :function} = fun) do
+  def for_api(%_{} = _model, %ToolCall{type: :function} = fun) do
     %{
       "id" => fun.call_id,
       "type" => "function",
@@ -449,7 +495,7 @@ defmodule LangChain.ChatModels.ChatOpenAI do
   end
 
   # Function support
-  def for_api(%Function{} = fun) do
+  def for_api(%_{} = _model, %Function{} = fun) do
     %{
       "name" => fun.name,
       "parameters" => get_parameters(fun)
@@ -457,22 +503,33 @@ defmodule LangChain.ChatModels.ChatOpenAI do
     |> Utils.conditionally_add_to_map("description", fun.description)
   end
 
-  defp get_parameters(%Function{parameters: [], parameters_schema: nil} = _fun) do
+  def for_api(%_{} = _model, %PromptTemplate{} = _template) do
+    raise LangChain.LangChainError, "PromptTemplates must be converted to messages."
+  end
+
+  @doc false
+  def get_parameters(%Function{parameters: [], parameters_schema: nil} = _fun) do
     %{
       "type" => "object",
       "properties" => %{}
     }
   end
 
-  defp get_parameters(%Function{parameters: [], parameters_schema: schema} = _fun)
-       when is_map(schema) do
+  def get_parameters(%Function{parameters: [], parameters_schema: schema} = _fun)
+      when is_map(schema) do
     schema
   end
 
-  defp get_parameters(%Function{parameters: params} = _fun) do
+  def get_parameters(%Function{parameters: params} = _fun) do
     FunctionParam.to_parameters_schema(params)
   end
 
+  # Convert a message role into either `:system` or :developer` based on the
+  # message role and the system config.
+  defp get_message_role(%ChatOpenAI{reasoning_mode: true}, :system), do: :developer
+  defp get_message_role(%ChatOpenAI{}, role), do: role
+  defp get_message_role(_model, role), do: role
+
   @doc """
   Calls the OpenAI API passing the ChatOpenAI struct with configuration, plus
   either a simple message or the list of messages to act as the prompt.
@@ -889,12 +946,24 @@ defmodule LangChain.ChatModels.ChatOpenAI do
   # MS Azure returns numeric error codes. Interpret them when possible to give a computer-friendly reason
   #
   # https://learn.microsoft.com/en-us/troubleshoot/azure/azure-kubernetes/create-upgrade-delete/429-too-many-requests-errors
-  def do_process_response(_model, %{"error" => %{"code" => code, "message" => reason}}) do
+  def do_process_response(_model, %{
+        "error" => %{"code" => code, "message" => reason} = error_data
+      }) do
     type =
       case code do
         "429" ->
           "rate_limit_exceeded"
 
+        "unsupported_value" ->
+          if String.contains?(reason, "does not support 'system' with this model") do
+            Logger.error(
+              "This model requires 'reasoning_mode' to be enabled. Reason: #{inspect(reason)}"
+            )
+
+            # return the API error type as the exception type information
+            error_data["type"]
+          end
+
         _other ->
           nil
       end
@@ -996,6 +1065,8 @@ defmodule LangChain.ChatModels.ChatOpenAI do
         :model,
         :temperature,
         :frequency_penalty,
+        :reasoning_mode,
+        :reasoning_effort,
         :receive_timeout,
         :seed,
         :n,
diff --git a/lib/chat_models/chat_vertex_ai.ex b/lib/chat_models/chat_vertex_ai.ex
index 6981aeea..700a96b7 100644
--- a/lib/chat_models/chat_vertex_ai.ex
+++ b/lib/chat_models/chat_vertex_ai.ex
@@ -152,7 +152,7 @@ defmodule LangChain.ChatModels.ChatVertexAI do
         %{
           # Google AI functions use an OpenAI compatible format.
           # See: https://ai.google.dev/docs/function_calling#how_it_works
-          "functionDeclarations" => Enum.map(functions, &ChatOpenAI.for_api/1)
+          "functionDeclarations" => Enum.map(functions, &ChatOpenAI.for_api(vertex_ai, &1))
         }
       ])
     else
diff --git a/test/chat_models/chat_open_ai_test.exs b/test/chat_models/chat_open_ai_test.exs
index b0a7594e..598e712b 100644
--- a/test/chat_models/chat_open_ai_test.exs
+++ b/test/chat_models/chat_open_ai_test.exs
@@ -94,6 +94,16 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
       assert openai.json_response == true
       assert openai.json_schema == json_schema
     end
+
+    test "supports overriding reasoning_effort" do
+      # defaults to "medium"
+      %ChatOpenAI{} = openai = ChatOpenAI.new!()
+      assert openai.reasoning_effort == "medium"
+
+      # can override the default to "high"
+      %ChatOpenAI{} = openai = ChatOpenAI.new!(%{"reasoning_effort" => "high"})
+      assert openai.reasoning_effort == "high"
+    end
   end
 
   describe "for_api/3" do
@@ -212,54 +222,15 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
   end
 
   describe "for_api/1" do
-    test "turns a basic user message into the expected JSON format" do
-      expected = %{"role" => :user, "content" => "Hi."}
-      result = ChatOpenAI.for_api(Message.new_user!("Hi."))
-      assert result == expected
-    end
-
-    test "includes 'name' when set" do
-      expected = %{"role" => :user, "content" => "Hi.", "name" => "Harold"}
-      result = ChatOpenAI.for_api(Message.new!(%{role: :user, content: "Hi.", name: "Harold"}))
-      assert result == expected
-    end
-
-    test "turns an assistant message into expected JSON format" do
-      # NOTE: Does not include tool_calls if empty
-      expected = %{"role" => :assistant, "content" => "Hi."}
-      result = ChatOpenAI.for_api(Message.new_assistant!(%{content: "Hi.", tool_calls: []}))
-      assert result == expected
-    end
-
-    test "turns a multi-modal user message into the expected JSON format" do
-      expected = %{
-        "role" => :user,
-        "content" => [
-          %{"type" => "text", "text" => "Tell me about this image:"},
-          %{"type" => "image_url", "image_url" => %{"url" => "url-to-image"}}
-        ]
-      }
-
-      result =
-        ChatOpenAI.for_api(
-          Message.new_user!([
-            ContentPart.text!("Tell me about this image:"),
-            ContentPart.image_url!("url-to-image")
-          ])
-        )
-
-      assert result == expected
-    end
-
     test "turns a text ContentPart into the expected JSON format" do
       expected = %{"type" => "text", "text" => "Tell me about this image:"}
-      result = ChatOpenAI.for_api(ContentPart.text!("Tell me about this image:"))
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), ContentPart.text!("Tell me about this image:"))
       assert result == expected
     end
 
     test "turns an image ContentPart into the expected JSON format" do
       expected = %{"type" => "image_url", "image_url" => %{"url" => "image_base64_data"}}
-      result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data"))
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), ContentPart.image!("image_base64_data"))
       assert result == expected
     end
 
@@ -269,40 +240,40 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
         "image_url" => %{"url" => "image_base64_data", "detail" => "low"}
       }
 
-      result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", detail: "low"))
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), ContentPart.image!("image_base64_data", detail: "low"))
       assert result == expected
     end
 
     test "turns ContentPart's media type the expected JSON values" do
       expected = "data:image/jpg;base64,image_base64_data"
-      result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: :jpg))
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), ContentPart.image!("image_base64_data", media: :jpg))
       assert %{"image_url" => %{"url" => ^expected}} = result
 
       expected = "data:image/jpg;base64,image_base64_data"
-      result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: :jpeg))
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), ContentPart.image!("image_base64_data", media: :jpeg))
       assert %{"image_url" => %{"url" => ^expected}} = result
 
       expected = "data:image/gif;base64,image_base64_data"
-      result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: :gif))
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), ContentPart.image!("image_base64_data", media: :gif))
       assert %{"image_url" => %{"url" => ^expected}} = result
 
       expected = "data:image/webp;base64,image_base64_data"
-      result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: :webp))
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), ContentPart.image!("image_base64_data", media: :webp))
       assert %{"image_url" => %{"url" => ^expected}} = result
 
       expected = "data:image/png;base64,image_base64_data"
-      result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: :png))
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), ContentPart.image!("image_base64_data", media: :png))
       assert %{"image_url" => %{"url" => ^expected}} = result
 
       # an string value is passed through
       expected = "data:file/pdf;base64,image_base64_data"
-      result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: "file/pdf"))
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), ContentPart.image!("image_base64_data", media: "file/pdf"))
       assert %{"image_url" => %{"url" => ^expected}} = result
     end
 
     test "turns an image_url ContentPart into the expected JSON format" do
       expected = %{"type" => "image_url", "image_url" => %{"url" => "url-to-image"}}
-      result = ChatOpenAI.for_api(ContentPart.image_url!("url-to-image"))
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), ContentPart.image_url!("url-to-image"))
       assert result == expected
     end
 
@@ -312,7 +283,7 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
         "image_url" => %{"url" => "url-to-image", "detail" => "low"}
       }
 
-      result = ChatOpenAI.for_api(ContentPart.image_url!("url-to-image", detail: "low"))
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), ContentPart.image_url!("url-to-image", detail: "low"))
       assert result == expected
     end
 
@@ -320,7 +291,7 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
       tool_call =
         ToolCall.new!(%{call_id: "call_abc123", name: "hello_world", arguments: "{}"})
 
-      json = ChatOpenAI.for_api(tool_call)
+      json = ChatOpenAI.for_api(ChatOpenAI.new!(), tool_call)
 
       assert json ==
                %{
@@ -347,7 +318,7 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
           ]
         })
 
-      json = ChatOpenAI.for_api(msg)
+      json = ChatOpenAI.for_api(ChatOpenAI.new!(), msg)
 
       assert json == %{
                "role" => :assistant,
@@ -365,18 +336,6 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
              }
     end
 
-    test "turns a ToolResult into the expected JSON format" do
-      result = ToolResult.new!(%{tool_call_id: "tool_abc123", content: "Hello World!"})
-
-      json = ChatOpenAI.for_api(result)
-
-      assert json == %{
-               "content" => "Hello World!",
-               "tool_call_id" => "tool_abc123",
-               "role" => :tool
-             }
-    end
-
     test "turns a tool message into expected JSON format" do
       msg =
         Message.new_tool_result!(%{
@@ -385,7 +344,7 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
           ]
         })
 
-      [json] = ChatOpenAI.for_api(msg)
+      [json] = ChatOpenAI.for_api(ChatOpenAI.new!(), msg)
 
       assert json == %{
                "content" => "Hello World!",
@@ -411,7 +370,7 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
 
       # ChatGPT expects each tool response to stand alone. This splits them out
       # and returns them individually.
-      list = ChatOpenAI.for_api(message)
+      list = ChatOpenAI.for_api(ChatOpenAI.new!(), message)
 
       assert is_list(list)
 
@@ -437,7 +396,7 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
     end
 
     test "tools work with minimal definition and no parameters", %{hello_world: hello_world} do
-      result = ChatOpenAI.for_api(hello_world)
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), hello_world)
 
       assert result == %{
                "name" => "hello_world",
@@ -474,7 +433,7 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
         })
 
       # result = Function.for_api(fun)
-      result = ChatOpenAI.for_api(fun)
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), fun)
 
       assert result == %{
                "name" => "say_hi",
@@ -507,7 +466,7 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
         })
 
       # result = Function.for_api(fun)
-      result = ChatOpenAI.for_api(fun)
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), fun)
 
       assert result == %{
                "name" => "say_hi",
@@ -534,11 +493,91 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
       # don't try and send an Elixir function ref through to the API
       {:ok, fun} = Function.new(%{"name" => "hello_world", "function" => &hello_world/2})
       # result = Function.for_api(fun)
-      result = ChatOpenAI.for_api(fun)
+      result = ChatOpenAI.for_api(ChatOpenAI.new!(), fun)
       refute Map.has_key?(result, "function")
     end
   end
 
+  describe "for_api/2" do
+    test "turns a basic user message into the expected JSON format" do
+      openai = ChatOpenAI.new!()
+
+      expected = %{"role" => :user, "content" => "Hi."}
+      result = ChatOpenAI.for_api(openai, Message.new_user!("Hi."))
+      assert result == expected
+    end
+
+    test "includes 'name' when set" do
+      openai = ChatOpenAI.new!()
+
+      expected = %{"role" => :user, "content" => "Hi.", "name" => "Harold"}
+
+      result =
+        ChatOpenAI.for_api(openai, Message.new!(%{role: :user, content: "Hi.", name: "Harold"}))
+
+      assert result == expected
+    end
+
+    test "turns an assistant message into expected JSON format" do
+      openai = ChatOpenAI.new!()
+
+      # NOTE: Does not include tool_calls if empty
+      expected = %{"role" => :assistant, "content" => "Hi."}
+
+      result =
+        ChatOpenAI.for_api(openai, Message.new_assistant!(%{content: "Hi.", tool_calls: []}))
+
+      assert result == expected
+    end
+
+    test "turns a ToolResult into the expected JSON format" do
+      openai = ChatOpenAI.new!()
+      result = ToolResult.new!(%{tool_call_id: "tool_abc123", content: "Hello World!"})
+
+      json = ChatOpenAI.for_api(openai, result)
+
+      assert json == %{
+               "content" => "Hello World!",
+               "tool_call_id" => "tool_abc123",
+               "role" => :tool
+             }
+    end
+
+    test "turns a multi-modal user message into the expected JSON format" do
+      openai = ChatOpenAI.new!()
+
+      expected = %{
+        "role" => :user,
+        "content" => [
+          %{"type" => "text", "text" => "Tell me about this image:"},
+          %{"type" => "image_url", "image_url" => %{"url" => "url-to-image"}}
+        ]
+      }
+
+      result =
+        ChatOpenAI.for_api(
+          openai,
+          Message.new_user!([
+            ContentPart.text!("Tell me about this image:"),
+            ContentPart.image_url!("url-to-image")
+          ])
+        )
+
+      assert result == expected
+    end
+
+    test "turns system role in to developer role based on flag" do
+      openai = ChatOpenAI.new!()
+      openai_dev = ChatOpenAI.new!(%{reasoning_mode: true})
+
+      assert %{"role" => :system} =
+               ChatOpenAI.for_api(openai, Message.new_system!("System prompt!"))
+
+      assert %{"role" => :developer} =
+               ChatOpenAI.for_api(openai_dev, Message.new_system!("System prompt!"))
+    end
+  end
+
   describe "call/2" do
     @tag live_call: true, live_open_ai: true
     test "basic content example and fires ratelimit callback" do
@@ -2066,6 +2105,8 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
                "max_tokens" => 1234,
                "model" => "gpt-4o",
                "n" => 1,
+               "reasoning_mode" => false,
+               "reasoning_effort" => "medium",
                "receive_timeout" => 60000,
                "seed" => 123,
                "stream" => false,

From 0c10c33f2cbeed7742b7ff1481a66908b57aa518 Mon Sep 17 00:00:00 2001
From: Matthew Pope <mpope9@users.noreply.github.com>
Date: Wed, 22 Jan 2025 11:04:55 -0800
Subject: [PATCH 11/11] feat: Support for Ollama keep_alive API parameter
 (#237)

* feat: Support for Ollama keep_alive API parameter

* test fix
---
 lib/chat_models/chat_ollama_ai.ex        | 7 +++++++
 test/chat_models/chat_ollama_ai_test.exs | 1 +
 2 files changed, 8 insertions(+)

diff --git a/lib/chat_models/chat_ollama_ai.ex b/lib/chat_models/chat_ollama_ai.ex
index 8b948eca..63725eff 100644
--- a/lib/chat_models/chat_ollama_ai.ex
+++ b/lib/chat_models/chat_ollama_ai.ex
@@ -49,6 +49,7 @@ defmodule LangChain.ChatModels.ChatOllamaAI do
 
   @create_fields [
     :endpoint,
+    :keep_alive,
     :mirostat,
     :mirostat_eta,
     :mirostat_tau,
@@ -78,6 +79,10 @@ defmodule LangChain.ChatModels.ChatOllamaAI do
   embedded_schema do
     field :endpoint, :string, default: "http://localhost:11434/api/chat"
 
+    # Change Keep Alive setting for unloading the model from memory.
+    # (Default: "5m", set to a negative interval to disable)
+    field :keep_alive, :string, default: "5m"
+
     # Enable Mirostat sampling for controlling perplexity.
     # (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
     field :mirostat, :integer, default: 0
@@ -199,6 +204,7 @@ defmodule LangChain.ChatModels.ChatOllamaAI do
       num_predict: model.num_predict,
       repeat_last_n: model.repeat_last_n,
       repeat_penalty: model.repeat_penalty,
+      keep_alive: model.keep_alive,
       mirostat: model.mirostat,
       mirostat_eta: model.mirostat_eta,
       mirostat_tau: model.mirostat_tau,
@@ -411,6 +417,7 @@ defmodule LangChain.ChatModels.ChatOllamaAI do
       model,
       [
         :endpoint,
+        :keep_alive,
         :model,
         :mirostat,
         :mirostat_eta,
diff --git a/test/chat_models/chat_ollama_ai_test.exs b/test/chat_models/chat_ollama_ai_test.exs
index b1fbd3f1..68c702d4 100644
--- a/test/chat_models/chat_ollama_ai_test.exs
+++ b/test/chat_models/chat_ollama_ai_test.exs
@@ -257,6 +257,7 @@ defmodule ChatModels.ChatOllamaAITest do
 
       assert result == %{
                "endpoint" => "http://localhost:11434/api/chat",
+               "keep_alive" => "5m",
                "mirostat" => 0,
                "mirostat_eta" => 0.1,
                "mirostat_tau" => 5.0,