Skip to content

Commit

Permalink
Add prompt caching support for Claude. (#226)
Browse files Browse the repository at this point in the history
- both ChatGPT and Claude offer prefix-based prompt caching, however, unlike ChatGPT, prompt caching for Claude is not automatic. 
 - Claude will cache tokens up until a block marked with 'cache_control'. This can include tools, system, and messages, in that order.  
 - setting cache_control can now be done at the ContentPart level by setting the :cache_control option to true or %{type: "ephemeral"}. See ChatAnthropicTest for an example.

Co-authored-by: Mark Ericksen <[email protected]>
  • Loading branch information
montebrown and brainlid authored Jan 22, 2025
1 parent 0c10c33 commit c2b22c4
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 18 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,14 @@ A list of models to use:
- [OpenAI models on Azure](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models)
- [Gemini AI models](https://ai.google.dev/gemini-api/docs/models/gemini)

## Prompt caching

ChatGPT and Claude both offer prefix-based prompt caching, which can offer cost and performance benefits for longer prompts. Gemini offers context caching, which is similar.

- [ChatGPT's prompt caching](https://openai.com/index/api-prompt-caching/) is automatic for prompts longer than 1024 tokens, caching the longest common prefix.
- [Claude's prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) is not automatic. It's prefixing processes tools, system, and then messages, in that order, up to and including the block designated with {"cache_control": {"type": "ephemeral"}} . See LangChain.ChatModels.ChatAnthropicTest and for an example.
- [Gemini's context caching]((https://ai.google.dev/gemini-api/docs/caching?lang=python)) requires a seperate call which is not supported by Langchain.

## Usage

The central module in this library is `LangChain.Chains.LLMChain`. Most other pieces are either inputs to this, or structures used by it. For understanding how to use the library, start there.
Expand Down
31 changes: 22 additions & 9 deletions lib/chat_models/chat_anthropic.ex
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,19 @@ defmodule LangChain.ChatModels.ChatAnthropic do
def for_api(%ChatAnthropic{} = anthropic, messages, tools) do
# separate the system message from the rest. Handled separately.
{system, messages} =
Utils.split_system_message(messages, "Anthropic only supports a single System message")
Utils.split_system_message(
messages,
"Anthropic only supports a single System message, however, you may use multiple ContentParts for the System message to indicate where prompt caching should be used."
)

system_text =
case system do
nil ->
nil

%Message{role: :system, content: [_ | _]} = message ->
for_api(message)

%Message{role: :system, content: content} ->
content
end
Expand Down Expand Up @@ -806,7 +812,7 @@ defmodule LangChain.ChatModels.ChatAnthropic do
end

@doc """
Convert a LangChain structure to the expected map of data for the OpenAI API.
Convert a LangChain structure to the expected map of data for the Anthropic API.
"""
@spec for_api(Message.t() | ContentPart.t() | Function.t()) ::
%{String.t() => any()} | no_return()
Expand Down Expand Up @@ -859,8 +865,19 @@ defmodule LangChain.ChatModels.ChatAnthropic do
}
end

def for_api(%Message{role: :system, content: content}) when is_list(content) do
Enum.map(content, &for_api(&1))
end

def for_api(%ContentPart{type: :text} = part) do
%{"type" => "text", "text" => part.content}
case Keyword.fetch(part.options || [], :cache_control) do
:error ->
%{"type" => "text", "text" => part.content}

{:ok, setting} ->
setting = if setting == true, do: %{"type" => "ephemeral"}, else: setting
%{"type" => "text", "text" => part.content, "cache_control" => setting}
end
end

def for_api(%ContentPart{type: :image} = part) do
Expand Down Expand Up @@ -1013,12 +1030,8 @@ defmodule LangChain.ChatModels.ChatAnthropic do
end

defp get_token_usage(%{"usage" => usage} = _response_body) do
# extract out the reported response token usage
#
# defp get_token_usage(%{"usage" => usage} = _response_body) do
# extract out the reported response token usage
#
# https://platform.openai.com/docs/api-reference/chat/object#chat/object-usage
# if prompt caching has been used the response will also contain
# "cache_creation_input_tokens" and "cache_read_input_tokens"
TokenUsage.new!(%{
input: Map.get(usage, "input_tokens"),
output: Map.get(usage, "output_tokens")
Expand Down
4 changes: 2 additions & 2 deletions lib/message.ex
Original file line number Diff line number Diff line change
Expand Up @@ -191,15 +191,15 @@ defmodule LangChain.Message do
changeset

{:ok, content} when is_list(content) ->
if role in [:user, :assistant] do
if role in [:user, :assistant, :system] do
# if a list, verify all elements are a ContentPart or PromptTemplate
if Enum.all?(content, &(match?(%ContentPart{}, &1) or match?(%PromptTemplate{}, &1))) do
changeset
else
add_error(changeset, :content, "must be text or a list of ContentParts")
end
else
# only a user message can have ContentParts
# only a user message can have ContentParts (except for ChatAnthropic system messages)
Logger.error(
"Invalid message content #{inspect(get_field(changeset, :content))} for role #{role}"
)
Expand Down
4 changes: 2 additions & 2 deletions lib/message/content_part.ex
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ defmodule LangChain.Message.ContentPart do
Create a new ContentPart that contains text. Raises an exception if not valid.
"""
@spec text!(String.t()) :: t() | no_return()
def text!(content) do
new!(%{type: :text, content: content})
def text!(content, opts \\ []) do
new!(%{type: :text, content: content, options: opts})
end

@doc """
Expand Down
35 changes: 35 additions & 0 deletions test/chat_models/chat_anthropic_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,41 @@ defmodule LangChain.ChatModels.ChatAnthropicTest do
assert "You are my helpful hero." == data[:system]
end

test "supports prompt caching in the system message" do
{:ok, anthropic} = ChatAnthropic.new()

# this example is from https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching.
data =
ChatAnthropic.for_api(
anthropic,
[
Message.new_system!([
ContentPart.text!(
"You are an AI assistant tasked with analyzing literary works. Your goal is to provide insightful commentary on themes, characters, and writing style.\n"
),
ContentPart.text!("<the entire contents of Pride and Prejudice>",
cache_control: true
)
])
],
[]
)

assert data.system ==
[
%{
"text" =>
"You are an AI assistant tasked with analyzing literary works. Your goal is to provide insightful commentary on themes, characters, and writing style.\n",
"type" => "text"
},
%{
"cache_control" => %{"type" => "ephemeral"},
"text" => "<the entire contents of Pride and Prejudice>",
"type" => "text"
}
]
end

test "generates a map for an API call with max_tokens set" do
{:ok, anthropic} =
ChatAnthropic.new(%{
Expand Down
6 changes: 1 addition & 5 deletions test/message_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,6 @@ defmodule LangChain.MessageTest do
{:ok, message} = Message.new_user("Hi")
assert message.content == "Hi"

# content parts not allowed for other role types
{:error, changeset} = Message.new_system([part])
assert {"is invalid for role system", _} = changeset.errors[:content]

{:error, changeset} =
Message.new(%{
role: :tool,
Expand Down Expand Up @@ -162,7 +158,7 @@ defmodule LangChain.MessageTest do
assert msg.role == :user

assert msg.content == [
%ContentPart{type: :text, content: "Describe what is in this image:"},
%ContentPart{type: :text, content: "Describe what is in this image:", options: []},
%ContentPart{type: :image, content: "ZmFrZV9pbWFnZV9kYXRh", options: []}
]
end
Expand Down

0 comments on commit c2b22c4

Please sign in to comment.