From 7f434e3431e35ebe819fa936e37ec0eda487eed3 Mon Sep 17 00:00:00 2001 From: Facundo Santiago Date: Wed, 15 Jan 2025 16:49:35 +0000 Subject: [PATCH] feat: image embeddings --- .../GetChatCompletions_MaximumSet_Gen.json | 1 + .../GetImageEmbeddings_MaximumSet_Gen.json | 53 +++++++ .../GetImageEmbeddings_MinimumSet_Gen.json | 47 +++++++ .../models/image_embeddings.tsp | 61 ++++++++ specification/ai/ModelInference/routes.tsp | 15 ++ .../GetChatCompletions_MaximumSet_Gen.json | 1 + .../GetImageEmbeddings_MaximumSet_Gen.json | 53 +++++++ .../GetImageEmbeddings_MinimumSet_Gen.json | 47 +++++++ .../preview/2024-05-01-preview/openapi.json | 133 ++++++++++++++++++ .../preview/2024-05-01-preview/openapi.yaml | 93 ++++++++++++ 10 files changed, 504 insertions(+) create mode 100644 specification/ai/ModelInference/examples/2024-05-01-preview/GetImageEmbeddings_MaximumSet_Gen.json create mode 100644 specification/ai/ModelInference/examples/2024-05-01-preview/GetImageEmbeddings_MinimumSet_Gen.json create mode 100644 specification/ai/ModelInference/models/image_embeddings.tsp create mode 100644 specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json create mode 100644 specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/examples/GetImageEmbeddings_MinimumSet_Gen.json diff --git a/specification/ai/ModelInference/examples/2024-05-01-preview/GetChatCompletions_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2024-05-01-preview/GetChatCompletions_MaximumSet_Gen.json index 2dff1c3b6470..eaf087423616 100644 --- a/specification/ai/ModelInference/examples/2024-05-01-preview/GetChatCompletions_MaximumSet_Gen.json +++ b/specification/ai/ModelInference/examples/2024-05-01-preview/GetChatCompletions_MaximumSet_Gen.json @@ -5,6 +5,7 @@ "api-version": "2024-05-01-preview", "extra-parameters": "error", "body": { + "modalities": [ "text" ], "messages": [ { "role": "system", diff --git a/specification/ai/ModelInference/examples/2024-05-01-preview/GetImageEmbeddings_MaximumSet_Gen.json b/specification/ai/ModelInference/examples/2024-05-01-preview/GetImageEmbeddings_MaximumSet_Gen.json new file mode 100644 index 000000000000..f952674b6e3f --- /dev/null +++ b/specification/ai/ModelInference/examples/2024-05-01-preview/GetImageEmbeddings_MaximumSet_Gen.json @@ -0,0 +1,53 @@ +{ + "title": "maximum set image embeddings", + "operationId": "GetImageEmbeddings", + "parameters": { + "api-version": "2024-05-01-preview", + "extra-parameters": "error", + "body": { + "input": [ + { + "image": "puqkvvlvgcjyzughesnkena", + "text": "azrzyjsmnuefqpowpvfmyobeehqsni" + } + ], + "dimensions": 1024, + "encoding_format": "float", + "input_type": "text", + "model": "my-model-name" + } + }, + "responses": { + "200": { + "body": { + "id": "cknxthfa", + "data": [ + { + "index": 0, + "object": "embedding", + "embedding": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ] + } + ], + "object": "list", + "model": "my-model-name", + "usage": { + "prompt_tokens": 15, + "total_tokens": 15 + } + } + } + } +} diff --git a/specification/ai/ModelInference/examples/2024-05-01-preview/GetImageEmbeddings_MinimumSet_Gen.json b/specification/ai/ModelInference/examples/2024-05-01-preview/GetImageEmbeddings_MinimumSet_Gen.json new file mode 100644 index 000000000000..b90e35374308 --- /dev/null +++ b/specification/ai/ModelInference/examples/2024-05-01-preview/GetImageEmbeddings_MinimumSet_Gen.json @@ -0,0 +1,47 @@ +{ + "title": "minimum set image embeddings", + "operationId": "GetImageEmbeddings", + "parameters": { + "api-version": "2024-05-01-preview", + "body": { + "input": [ + { + "image": "gvmojtfooxixxzayrditjlyymg" + } + ] + } + }, + "responses": { + "200": { + "body": { + "id": "cknxthfa", + "data": [ + { + "index": 0, + "object": "embedding", + "embedding": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ] + } + ], + "object": "list", + "model": "my-model-name", + "usage": { + "prompt_tokens": 15, + "total_tokens": 15 + } + } + } + } +} diff --git a/specification/ai/ModelInference/models/image_embeddings.tsp b/specification/ai/ModelInference/models/image_embeddings.tsp new file mode 100644 index 000000000000..28f16c5090c5 --- /dev/null +++ b/specification/ai/ModelInference/models/image_embeddings.tsp @@ -0,0 +1,61 @@ +import "@typespec/rest"; +import "@typespec/http"; + +import "./embeddings.tsp"; + +using TypeSpec.Rest; +using TypeSpec.Http; + +namespace ModelInference; + +@doc(""" + The configuration information for an image embeddings request. + """) +model ImageEmbeddingsOptions { + @doc(""" + Input image to embed. To embed multiple inputs in a single request, pass an array. + The input must not exceed the max input tokens for the model. + """) + input: ImageEmbeddingInput[]; + + @doc(""" + Optional. The number of dimensions the resulting output embeddings should have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter. + """) + dimensions?: int32; + + @doc(""" + Optional. The number of dimensions the resulting output embeddings should have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter. + """) + encoding_format?: EmbeddingEncodingFormat; + + @doc(""" + Optional. The type of the input. + Returns a 422 error if the model doesn't support the value or parameter. + """) + input_type?: EmbeddingInputType; + + @doc(""" + ID of the specific AI model to use, if more than one model is available on the endpoint. + """) + `model`?: string; + + ...Record; +} + +@doc("Represents an image with optional text.") +model ImageEmbeddingInput { + @doc(""" + The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`. + """) + image: string; + + @doc(""" + Optional. The text input to feed into the model (like DINO, CLIP). + Returns a 422 error if the model doesn't support the value or parameter. + """) + text?: string; +} diff --git a/specification/ai/ModelInference/routes.tsp b/specification/ai/ModelInference/routes.tsp index 7c7eb6c7903c..b5f2e5d02407 100644 --- a/specification/ai/ModelInference/routes.tsp +++ b/specification/ai/ModelInference/routes.tsp @@ -6,6 +6,7 @@ import "@typespec/versioning"; import "./models/common.tsp"; import "./models/chat_completions.tsp"; import "./models/embeddings.tsp"; +import "./models/image_embeddings.tsp"; using TypeSpec.Rest; using TypeSpec.Http; @@ -44,6 +45,20 @@ op getEmbeddings is Azure.Core.RpcOperation< EmbeddingsResult >; +@doc(""" + Return the embedding vectors for given images. + The method makes a REST API call to the `/images/embeddings` route on the given endpoint. + """) +@actionSeparator("/") +@route("images/embeddings") +op getImageEmbeddings is Azure.Core.RpcOperation< + { + ...ImageEmbeddingsOptions; + ...AdditionalRequestHeaders; + }, + EmbeddingsResult +>; + @doc(""" Returns information about the AI model. The method makes a REST API call to the `/info` route on the given endpoint. diff --git a/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/examples/GetChatCompletions_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/examples/GetChatCompletions_MaximumSet_Gen.json index 2dff1c3b6470..eaf087423616 100644 --- a/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/examples/GetChatCompletions_MaximumSet_Gen.json +++ b/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/examples/GetChatCompletions_MaximumSet_Gen.json @@ -5,6 +5,7 @@ "api-version": "2024-05-01-preview", "extra-parameters": "error", "body": { + "modalities": [ "text" ], "messages": [ { "role": "system", diff --git a/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json new file mode 100644 index 000000000000..f952674b6e3f --- /dev/null +++ b/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/examples/GetImageEmbeddings_MaximumSet_Gen.json @@ -0,0 +1,53 @@ +{ + "title": "maximum set image embeddings", + "operationId": "GetImageEmbeddings", + "parameters": { + "api-version": "2024-05-01-preview", + "extra-parameters": "error", + "body": { + "input": [ + { + "image": "puqkvvlvgcjyzughesnkena", + "text": "azrzyjsmnuefqpowpvfmyobeehqsni" + } + ], + "dimensions": 1024, + "encoding_format": "float", + "input_type": "text", + "model": "my-model-name" + } + }, + "responses": { + "200": { + "body": { + "id": "cknxthfa", + "data": [ + { + "index": 0, + "object": "embedding", + "embedding": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ] + } + ], + "object": "list", + "model": "my-model-name", + "usage": { + "prompt_tokens": 15, + "total_tokens": 15 + } + } + } + } +} diff --git a/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/examples/GetImageEmbeddings_MinimumSet_Gen.json b/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/examples/GetImageEmbeddings_MinimumSet_Gen.json new file mode 100644 index 000000000000..b90e35374308 --- /dev/null +++ b/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/examples/GetImageEmbeddings_MinimumSet_Gen.json @@ -0,0 +1,47 @@ +{ + "title": "minimum set image embeddings", + "operationId": "GetImageEmbeddings", + "parameters": { + "api-version": "2024-05-01-preview", + "body": { + "input": [ + { + "image": "gvmojtfooxixxzayrditjlyymg" + } + ] + } + }, + "responses": { + "200": { + "body": { + "id": "cknxthfa", + "data": [ + { + "index": 0, + "object": "embedding", + "embedding": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ] + } + ], + "object": "list", + "model": "my-model-name", + "usage": { + "prompt_tokens": 15, + "total_tokens": 15 + } + } + } + } +} diff --git a/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/openapi.json b/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/openapi.json index f79c11054e2c..ba1e6818a098 100644 --- a/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/openapi.json +++ b/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/openapi.json @@ -207,6 +207,87 @@ } } }, + "/images/embeddings": { + "post": { + "operationId": "GetImageEmbeddings", + "description": "Return the embedding vectors for given images.\nThe method makes a REST API call to the `/images/embeddings` route on the given endpoint.", + "parameters": [ + { + "$ref": "#/parameters/Azure.Core.Foundations.ApiVersionParameter" + }, + { + "name": "extra-parameters", + "in": "header", + "description": "Controls what happens if extra parameters, undefined by the REST API,\nare passed in the JSON request payload.\nThis sets the HTTP request header `extra-parameters`.", + "required": false, + "type": "string", + "enum": [ + "error", + "drop", + "pass-through" + ], + "x-ms-enum": { + "name": "ExtraParameters", + "modelAsString": true, + "values": [ + { + "name": "error", + "value": "error", + "description": "The service will error if it detected extra parameters in the request payload. This is the service default." + }, + { + "name": "drop", + "value": "drop", + "description": "The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to the back-end AI model." + }, + { + "name": "pass_through", + "value": "pass-through", + "description": "The service will pass extra parameters to the back-end AI model." + } + ] + }, + "x-ms-client-name": "extra_params" + }, + { + "name": "body", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/ImageEmbeddingsOptions" + } + } + ], + "responses": { + "200": { + "description": "The request has succeeded.", + "schema": { + "$ref": "#/definitions/EmbeddingsResult" + } + }, + "default": { + "description": "An unexpected error response.", + "schema": { + "$ref": "#/definitions/Azure.Core.Foundations.ErrorResponse" + }, + "headers": { + "x-ms-error-code": { + "type": "string", + "description": "String error code indicating what went wrong." + } + } + } + }, + "x-ms-examples": { + "maximum set image embeddings": { + "$ref": "./examples/GetImageEmbeddings_MaximumSet_Gen.json" + }, + "minimum set image embeddings": { + "$ref": "./examples/GetImageEmbeddings_MinimumSet_Gen.json" + } + } + } + }, "/info": { "get": { "operationId": "GetModelInfo", @@ -1455,6 +1536,58 @@ "name" ] }, + "ImageEmbeddingInput": { + "type": "object", + "description": "Represents an image with optional text.", + "properties": { + "image": { + "type": "string", + "description": "The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`." + }, + "text": { + "type": "string", + "description": "Optional. The text input to feed into the model (like DINO, CLIP).\nReturns a 422 error if the model doesn't support the value or parameter." + } + }, + "required": [ + "image" + ] + }, + "ImageEmbeddingsOptions": { + "type": "object", + "description": "The configuration information for an image embeddings request.", + "properties": { + "input": { + "type": "array", + "description": "Input image to embed. To embed multiple inputs in a single request, pass an array.\nThe input must not exceed the max input tokens for the model.", + "items": { + "$ref": "#/definitions/ImageEmbeddingInput" + }, + "x-ms-identifiers": [] + }, + "dimensions": { + "type": "integer", + "format": "int32", + "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "encoding_format": { + "$ref": "#/definitions/EmbeddingEncodingFormat", + "description": "Optional. The number of dimensions the resulting output embeddings should have.\nPassing null causes the model to use its default value.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "input_type": { + "$ref": "#/definitions/EmbeddingInputType", + "description": "Optional. The type of the input.\nReturns a 422 error if the model doesn't support the value or parameter." + }, + "model": { + "type": "string", + "description": "ID of the specific AI model to use, if more than one model is available on the endpoint." + } + }, + "required": [ + "input" + ], + "additionalProperties": {} + }, "ModelInfo": { "type": "object", "description": "Represents some basic information about the AI model.", diff --git a/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/openapi.yaml b/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/openapi.yaml index 1f72cb0abda8..ef69ad847651 100644 --- a/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/openapi.yaml +++ b/specification/ai/data-plane/ModelInference/preview/2024-05-01-preview/openapi.yaml @@ -90,6 +90,48 @@ paths: application/json: schema: $ref: '#/components/schemas/EmbeddingsOptions' + /images/embeddings: + post: + operationId: getImageEmbeddings + description: |- + Return the embedding vectors for given images. + The method makes a REST API call to the `/images/embeddings` route on the given endpoint. + parameters: + - $ref: '#/components/parameters/Azure.Core.Foundations.ApiVersionParameter' + - name: extra-parameters + in: header + required: false + description: |- + Controls what happens if extra parameters, undefined by the REST API, + are passed in the JSON request payload. + This sets the HTTP request header `extra-parameters`. + schema: + $ref: '#/components/schemas/ExtraParameters' + responses: + '200': + description: The request has succeeded. + content: + application/json: + schema: + $ref: '#/components/schemas/EmbeddingsResult' + default: + description: An unexpected error response. + headers: + x-ms-error-code: + required: false + description: String error code indicating what went wrong. + schema: + type: string + content: + application/json: + schema: + $ref: '#/components/schemas/Azure.Core.Foundations.ErrorResponse' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/ImageEmbeddingsOptions' /info: get: operationId: getModelInfo @@ -1067,6 +1109,57 @@ components: additionalProperties: {} description: The parameters the function accepts, described as a JSON Schema object. description: The definition of a caller-specified function that chat completions may invoke in response to matching user input. + ImageEmbeddingInput: + type: object + required: + - image + properties: + image: + type: string + description: 'The input image encoded in base64 string as a data URL. Example: `data:image/{format};base64,{data}`.' + text: + type: string + description: |- + Optional. The text input to feed into the model (like DINO, CLIP). + Returns a 422 error if the model doesn't support the value or parameter. + description: Represents an image with optional text. + ImageEmbeddingsOptions: + type: object + required: + - input + properties: + input: + type: array + items: + $ref: '#/components/schemas/ImageEmbeddingInput' + description: |- + Input image to embed. To embed multiple inputs in a single request, pass an array. + The input must not exceed the max input tokens for the model. + dimensions: + type: integer + format: int32 + description: |- + Optional. The number of dimensions the resulting output embeddings should have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter. + encoding_format: + allOf: + - $ref: '#/components/schemas/EmbeddingEncodingFormat' + description: |- + Optional. The number of dimensions the resulting output embeddings should have. + Passing null causes the model to use its default value. + Returns a 422 error if the model doesn't support the value or parameter. + input_type: + allOf: + - $ref: '#/components/schemas/EmbeddingInputType' + description: |- + Optional. The type of the input. + Returns a 422 error if the model doesn't support the value or parameter. + model: + type: string + description: ID of the specific AI model to use, if more than one model is available on the endpoint. + additionalProperties: {} + description: The configuration information for an image embeddings request. ModelInfo: type: object required: