diff --git a/README.md b/README.md index d91bb05a..bd23e170 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,14 @@ A list of models to use: - [OpenAI models on Azure](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models) - [Gemini AI models](https://ai.google.dev/gemini-api/docs/models/gemini) +## Prompt caching + +ChatGPT and Claude both offer prefix-based prompt caching, which can offer cost and performance benefits for longer prompts. Gemini offers context caching, which is similar. + +- [ChatGPT's prompt caching](https://openai.com/index/api-prompt-caching/) is automatic for prompts longer than 1024 tokens, caching the longest common prefix. +- [Claude's prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) is not automatic. It's prefixing processes tools, system, and then messages, in that order, up to and including the block designated with {"cache_control": {"type": "ephemeral"}} . See LangChain.ChatModels.ChatAnthropicTest and for an example. +- [Gemini's context caching]((https://ai.google.dev/gemini-api/docs/caching?lang=python)) requires a seperate call which is not supported by Langchain. + ## Usage The central module in this library is `LangChain.Chains.LLMChain`. Most other pieces are either inputs to this, or structures used by it. For understanding how to use the library, start there. diff --git a/lib/chat_models/chat_anthropic.ex b/lib/chat_models/chat_anthropic.ex index 40c1ff17..e00932de 100644 --- a/lib/chat_models/chat_anthropic.ex +++ b/lib/chat_models/chat_anthropic.ex @@ -220,13 +220,19 @@ defmodule LangChain.ChatModels.ChatAnthropic do def for_api(%ChatAnthropic{} = anthropic, messages, tools) do # separate the system message from the rest. Handled separately. {system, messages} = - Utils.split_system_message(messages, "Anthropic only supports a single System message") + Utils.split_system_message( + messages, + "Anthropic only supports a single System message, however, you may use multiple ContentParts for the System message to indicate where prompt caching should be used." + ) system_text = case system do nil -> nil + %Message{role: :system, content: [_ | _]} = message -> + for_api(message) + %Message{role: :system, content: content} -> content end @@ -806,7 +812,7 @@ defmodule LangChain.ChatModels.ChatAnthropic do end @doc """ - Convert a LangChain structure to the expected map of data for the OpenAI API. + Convert a LangChain structure to the expected map of data for the Anthropic API. """ @spec for_api(Message.t() | ContentPart.t() | Function.t()) :: %{String.t() => any()} | no_return() @@ -859,8 +865,19 @@ defmodule LangChain.ChatModels.ChatAnthropic do } end + def for_api(%Message{role: :system, content: content}) when is_list(content) do + Enum.map(content, &for_api(&1)) + end + def for_api(%ContentPart{type: :text} = part) do - %{"type" => "text", "text" => part.content} + case Keyword.fetch(part.options || [], :cache_control) do + :error -> + %{"type" => "text", "text" => part.content} + + {:ok, setting} -> + setting = if setting == true, do: %{"type" => "ephemeral"}, else: setting + %{"type" => "text", "text" => part.content, "cache_control" => setting} + end end def for_api(%ContentPart{type: :image} = part) do @@ -1013,12 +1030,8 @@ defmodule LangChain.ChatModels.ChatAnthropic do end defp get_token_usage(%{"usage" => usage} = _response_body) do - # extract out the reported response token usage - # - # defp get_token_usage(%{"usage" => usage} = _response_body) do - # extract out the reported response token usage - # - # https://platform.openai.com/docs/api-reference/chat/object#chat/object-usage + # if prompt caching has been used the response will also contain + # "cache_creation_input_tokens" and "cache_read_input_tokens" TokenUsage.new!(%{ input: Map.get(usage, "input_tokens"), output: Map.get(usage, "output_tokens") diff --git a/lib/message.ex b/lib/message.ex index 2360f483..9d72cd6d 100644 --- a/lib/message.ex +++ b/lib/message.ex @@ -191,7 +191,7 @@ defmodule LangChain.Message do changeset {:ok, content} when is_list(content) -> - if role in [:user, :assistant] do + if role in [:user, :assistant, :system] do # if a list, verify all elements are a ContentPart or PromptTemplate if Enum.all?(content, &(match?(%ContentPart{}, &1) or match?(%PromptTemplate{}, &1))) do changeset @@ -199,7 +199,7 @@ defmodule LangChain.Message do add_error(changeset, :content, "must be text or a list of ContentParts") end else - # only a user message can have ContentParts + # only a user message can have ContentParts (except for ChatAnthropic system messages) Logger.error( "Invalid message content #{inspect(get_field(changeset, :content))} for role #{role}" ) diff --git a/lib/message/content_part.ex b/lib/message/content_part.ex index 412dfc4b..3f44a1e2 100644 --- a/lib/message/content_part.ex +++ b/lib/message/content_part.ex @@ -89,8 +89,8 @@ defmodule LangChain.Message.ContentPart do Create a new ContentPart that contains text. Raises an exception if not valid. """ @spec text!(String.t()) :: t() | no_return() - def text!(content) do - new!(%{type: :text, content: content}) + def text!(content, opts \\ []) do + new!(%{type: :text, content: content, options: opts}) end @doc """ diff --git a/test/chat_models/chat_anthropic_test.exs b/test/chat_models/chat_anthropic_test.exs index c7304ae3..035ecd38 100644 --- a/test/chat_models/chat_anthropic_test.exs +++ b/test/chat_models/chat_anthropic_test.exs @@ -104,6 +104,41 @@ defmodule LangChain.ChatModels.ChatAnthropicTest do assert "You are my helpful hero." == data[:system] end + test "supports prompt caching in the system message" do + {:ok, anthropic} = ChatAnthropic.new() + + # this example is from https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching. + data = + ChatAnthropic.for_api( + anthropic, + [ + Message.new_system!([ + ContentPart.text!( + "You are an AI assistant tasked with analyzing literary works. Your goal is to provide insightful commentary on themes, characters, and writing style.\n" + ), + ContentPart.text!("", + cache_control: true + ) + ]) + ], + [] + ) + + assert data.system == + [ + %{ + "text" => + "You are an AI assistant tasked with analyzing literary works. Your goal is to provide insightful commentary on themes, characters, and writing style.\n", + "type" => "text" + }, + %{ + "cache_control" => %{"type" => "ephemeral"}, + "text" => "", + "type" => "text" + } + ] + end + test "generates a map for an API call with max_tokens set" do {:ok, anthropic} = ChatAnthropic.new(%{ diff --git a/test/message_test.exs b/test/message_test.exs index 2f7c66bd..ba3f7d86 100644 --- a/test/message_test.exs +++ b/test/message_test.exs @@ -94,10 +94,6 @@ defmodule LangChain.MessageTest do {:ok, message} = Message.new_user("Hi") assert message.content == "Hi" - # content parts not allowed for other role types - {:error, changeset} = Message.new_system([part]) - assert {"is invalid for role system", _} = changeset.errors[:content] - {:error, changeset} = Message.new(%{ role: :tool, @@ -162,7 +158,7 @@ defmodule LangChain.MessageTest do assert msg.role == :user assert msg.content == [ - %ContentPart{type: :text, content: "Describe what is in this image:"}, + %ContentPart{type: :text, content: "Describe what is in this image:", options: []}, %ContentPart{type: :image, content: "ZmFrZV9pbWFnZV9kYXRh", options: []} ] end