Add prompt caching support for Claude. (#226)

- both ChatGPT and Claude offer prefix-based prompt caching, however, unlike ChatGPT, prompt caching for Claude is not automatic. - Claude will cache tokens up until a block marked with 'cache_control'. This can include tools, system, and messages, in that order. - setting cache_control can now be done at the ContentPart level by setting the :cache_control option to true or %{type: "ephemeral"}. See ChatAnthropicTest for an example. Co-authored-by: Mark Ericksen <[email protected]>
brainlid · Jan 22, 2025 · c2b22c4 · c2b22c4
1 parent 0c10c33
commit c2b22c4
Show file tree

Hide file tree

Showing 6 changed files with 70 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -122,6 +122,14 @@ A list of models to use:
 - [OpenAI models on Azure](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models)
 - [Gemini AI models](https://ai.google.dev/gemini-api/docs/models/gemini)
 
+## Prompt caching
+
+ChatGPT and Claude both offer prefix-based prompt caching, which can offer cost and performance benefits for longer prompts. Gemini offers context caching, which is similar. 
+
+- [ChatGPT's prompt caching](https://openai.com/index/api-prompt-caching/) is automatic for prompts longer than 1024 tokens, caching the longest common prefix.
+- [Claude's prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) is not automatic. It's prefixing processes tools, system, and then messages, in that order, up to and including the block designated with {"cache_control": {"type": "ephemeral"}} . See LangChain.ChatModels.ChatAnthropicTest and for an example.
+- [Gemini's context caching]((https://ai.google.dev/gemini-api/docs/caching?lang=python)) requires a seperate call which is not supported by Langchain. 
+
 ## Usage
 
 The central module in this library is `LangChain.Chains.LLMChain`. Most other pieces are either inputs to this, or structures used by it. For understanding how to use the library, start there.

diff --git a/lib/chat_models/chat_anthropic.ex b/lib/chat_models/chat_anthropic.ex
@@ -220,13 +220,19 @@ defmodule LangChain.ChatModels.ChatAnthropic do
   def for_api(%ChatAnthropic{} = anthropic, messages, tools) do
     # separate the system message from the rest. Handled separately.
     {system, messages} =
-      Utils.split_system_message(messages, "Anthropic only supports a single System message")
+      Utils.split_system_message(
+        messages,
+        "Anthropic only supports a single System message, however, you may use multiple ContentParts for the System message to indicate where prompt caching should be used."
+      )
 
     system_text =
       case system do
         nil ->
           nil
 
+        %Message{role: :system, content: [_ | _]} = message ->
+          for_api(message)
+
         %Message{role: :system, content: content} ->
           content
       end
@@ -806,7 +812,7 @@ defmodule LangChain.ChatModels.ChatAnthropic do
   end
 
   @doc """
-  Convert a LangChain structure to the expected map of data for the OpenAI API.
+  Convert a LangChain structure to the expected map of data for the Anthropic API.
   """
   @spec for_api(Message.t() | ContentPart.t() | Function.t()) ::
           %{String.t() => any()} | no_return()
@@ -859,8 +865,19 @@ defmodule LangChain.ChatModels.ChatAnthropic do
     }
   end
 
+  def for_api(%Message{role: :system, content: content}) when is_list(content) do
+    Enum.map(content, &for_api(&1))
+  end
+
   def for_api(%ContentPart{type: :text} = part) do
-    %{"type" => "text", "text" => part.content}
+    case Keyword.fetch(part.options || [], :cache_control) do
+      :error ->
+        %{"type" => "text", "text" => part.content}
+
+      {:ok, setting} ->
+        setting = if setting == true, do: %{"type" => "ephemeral"}, else: setting
+        %{"type" => "text", "text" => part.content, "cache_control" => setting}
+    end
   end
 
   def for_api(%ContentPart{type: :image} = part) do
@@ -1013,12 +1030,8 @@ defmodule LangChain.ChatModels.ChatAnthropic do
   end
 
   defp get_token_usage(%{"usage" => usage} = _response_body) do
-    # extract out the reported response token usage
-    #
-    #    defp get_token_usage(%{"usage" => usage} = _response_body) do
-    # extract out the reported response token usage
-    #
-    #  https://platform.openai.com/docs/api-reference/chat/object#chat/object-usage
+    # if prompt caching has been used the response will also contain
+    # "cache_creation_input_tokens" and "cache_read_input_tokens"
     TokenUsage.new!(%{
       input: Map.get(usage, "input_tokens"),
       output: Map.get(usage, "output_tokens")

diff --git a/lib/message.ex b/lib/message.ex
@@ -191,15 +191,15 @@ defmodule LangChain.Message do
         changeset
 
       {:ok, content} when is_list(content) ->
-        if role in [:user, :assistant] do
+        if role in [:user, :assistant, :system] do
           # if a list, verify all elements are a ContentPart or PromptTemplate
           if Enum.all?(content, &(match?(%ContentPart{}, &1) or match?(%PromptTemplate{}, &1))) do
             changeset
           else
             add_error(changeset, :content, "must be text or a list of ContentParts")
           end
         else
-          # only a user message can have ContentParts
+          # only a user message can have ContentParts (except for ChatAnthropic system messages)
           Logger.error(
             "Invalid message content #{inspect(get_field(changeset, :content))} for role #{role}"
           )

diff --git a/lib/message/content_part.ex b/lib/message/content_part.ex
@@ -89,8 +89,8 @@ defmodule LangChain.Message.ContentPart do
   Create a new ContentPart that contains text. Raises an exception if not valid.
   """
   @spec text!(String.t()) :: t() | no_return()
-  def text!(content) do
-    new!(%{type: :text, content: content})
+  def text!(content, opts \\ []) do
+    new!(%{type: :text, content: content, options: opts})
   end
 
   @doc """

diff --git a/test/chat_models/chat_anthropic_test.exs b/test/chat_models/chat_anthropic_test.exs
@@ -104,6 +104,41 @@ defmodule LangChain.ChatModels.ChatAnthropicTest do
       assert "You are my helpful hero." == data[:system]
     end
 
+    test "supports prompt caching in the system message" do
+      {:ok, anthropic} = ChatAnthropic.new()
+
+      # this example is from https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching.
+      data =
+        ChatAnthropic.for_api(
+          anthropic,
+          [
+            Message.new_system!([
+              ContentPart.text!(
+                "You are an AI assistant tasked with analyzing literary works. Your goal is to provide insightful commentary on themes, characters, and writing style.\n"
+              ),
+              ContentPart.text!("<the entire contents of Pride and Prejudice>",
+                cache_control: true
+              )
+            ])
+          ],
+          []
+        )
+
+      assert data.system ==
+               [
+                 %{
+                   "text" =>
+                     "You are an AI assistant tasked with analyzing literary works. Your goal is to provide insightful commentary on themes, characters, and writing style.\n",
+                   "type" => "text"
+                 },
+                 %{
+                   "cache_control" => %{"type" => "ephemeral"},
+                   "text" => "<the entire contents of Pride and Prejudice>",
+                   "type" => "text"
+                 }
+               ]
+    end
+
     test "generates a map for an API call with max_tokens set" do
       {:ok, anthropic} =
         ChatAnthropic.new(%{

diff --git a/test/message_test.exs b/test/message_test.exs
@@ -94,10 +94,6 @@ defmodule LangChain.MessageTest do
       {:ok, message} = Message.new_user("Hi")
       assert message.content == "Hi"
 
-      # content parts not allowed for other role types
-      {:error, changeset} = Message.new_system([part])
-      assert {"is invalid for role system", _} = changeset.errors[:content]
-
       {:error, changeset} =
         Message.new(%{
           role: :tool,
@@ -162,7 +158,7 @@ defmodule LangChain.MessageTest do
       assert msg.role == :user
 
       assert msg.content == [
-               %ContentPart{type: :text, content: "Describe what is in this image:"},
+               %ContentPart{type: :text, content: "Describe what is in this image:", options: []},
                %ContentPart{type: :image, content: "ZmFrZV9pbWFnZV9kYXRh", options: []}
              ]
     end