diff --git a/MODEL_BEHAVIORS.md b/MODEL_BEHAVIORS.md index 84f92b9d..dced858f 100644 --- a/MODEL_BEHAVIORS.md +++ b/MODEL_BEHAVIORS.md @@ -8,4 +8,12 @@ This is to document some of the differences between model behaviors. This is spe - 2024-04-17 - Anthropic's Claude 3 does respond well to pre-filled assistant responses and it is officially encouraged. - - 2024-04-17 \ No newline at end of file + - 2024-04-17 + +## Image content parts + +- Anthropic will not work if the media type is included with the base64 data. ie. `"data:image/jpeg;base64," <> "..."` + - Requires providing the media type as a separate option. +- ChatGPT - + - When `base64` data given, requires providing the image data prefixed with the base64 content. + - Ex: `"data:image/jpeg;base64," <> "..."` diff --git a/lib/chat_models/chat_open_ai.ex b/lib/chat_models/chat_open_ai.ex index 210755dd..6d67d1b3 100644 --- a/lib/chat_models/chat_open_ai.ex +++ b/lib/chat_models/chat_open_ai.ex @@ -239,7 +239,21 @@ defmodule LangChain.ChatModels.ChatOpenAI do end def for_api(%ContentPart{type: image} = part) when image in [:image, :image_url] do - %{"type" => "image_url", "image_url" => %{"url" => part.content}} + media_prefix = + case Keyword.get(part.options || [], :media, nil) do + nil -> + "" + + type when is_binary(type) -> + "data:#{type};base64," + + other -> + message = "Received unsupported media type for ContentPart: #{inspect(other)}" + Logger.error(message) + raise LangChainError, message + end + + %{"type" => "image_url", "image_url" => %{"url" => media_prefix <> part.content}} end # ToolCall support diff --git a/lib/message/content_part.ex b/lib/message/content_part.ex index a9c1897a..849de936 100644 --- a/lib/message/content_part.ex +++ b/lib/message/content_part.ex @@ -84,6 +84,14 @@ defmodule LangChain.Message.ContentPart do - `:media` - Provide the "media type" for the image. Examples: "image/jpeg", "image/png", etc. ChatGPT does not require this but other LLMs may. + + ChatGPT requires media type information to prefix the base64 content. Setting + the `media: "image/jpeg"` type will do that. Otherwise the data must be + provided with the required prefix. + + Anthropic requires the media type information to be submitted as separate + information with the JSON request. This media option provides an abstraction + to normalize the behavior. """ @spec image!(String.t(), Keyword.t()) :: t() | no_return() def image!(content, opts \\ []) do diff --git a/test/chat_models/chat_open_ai_test.exs b/test/chat_models/chat_open_ai_test.exs index 590afd7a..e76ea0c8 100644 --- a/test/chat_models/chat_open_ai_test.exs +++ b/test/chat_models/chat_open_ai_test.exs @@ -177,6 +177,24 @@ defmodule LangChain.ChatModels.ChatOpenAITest do assert result == expected end + test "turns an image ContentPart with base64 media into the expected JSON format" do + expected = %{ + "type" => "image_url", + "image_url" => %{"url" => "data:image/jpeg;base64,image_base64_data"} + } + + result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: "image/jpeg")) + assert result == expected + + expected = %{ + "type" => "image_url", + "image_url" => %{"url" => "data:image/png;base64,image_base64_data"} + } + + result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: "image/png")) + assert result == expected + end + test "turns an image_url ContentPart into the expected JSON format" do expected = %{"type" => "image_url", "image_url" => %{"url" => "url-to-image"}} result = ChatOpenAI.for_api(ContentPart.image_url!("url-to-image"))