diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/AIInferenceClientBuilderExtensions.cs b/sdk/ai/Azure.AI.Inference/src/Generated/AIInferenceClientBuilderExtensions.cs
index 5ef054431019..3348bd6a13c2 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/AIInferenceClientBuilderExtensions.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/AIInferenceClientBuilderExtensions.cs
@@ -12,62 +12,35 @@
 
 namespace Microsoft.Extensions.Azure
 {
-    /// <summary> Extension methods to add <see cref="ChatCompletionsClient"/>, <see cref="EmbeddingsClient"/> to client builder. </summary>
+    /// <summary> Extension methods to add <see cref="ChatCompletionsClient"/> to client builder. </summary>
     public static partial class AIInferenceClientBuilderExtensions
     {
         /// <summary> Registers a <see cref="ChatCompletionsClient"/> instance. </summary>
         /// <param name="builder"> The builder to register with. </param>
         /// <param name="endpoint"> Service host. </param>
         /// <param name="credential"> A credential used to authenticate to an Azure Service. </param>
-        public static IAzureClientBuilder<ChatCompletionsClient, AzureAIInferenceClientOptions> AddChatCompletionsClient<TBuilder>(this TBuilder builder, Uri endpoint, AzureKeyCredential credential)
+        public static IAzureClientBuilder<ChatCompletionsClient, ChatCompletionsClientOptions> AddChatCompletionsClient<TBuilder>(this TBuilder builder, Uri endpoint, AzureKeyCredential credential)
         where TBuilder : IAzureClientFactoryBuilder
         {
-            return builder.RegisterClientFactory<ChatCompletionsClient, AzureAIInferenceClientOptions>((options) => new ChatCompletionsClient(endpoint, credential, options));
+            return builder.RegisterClientFactory<ChatCompletionsClient, ChatCompletionsClientOptions>((options) => new ChatCompletionsClient(endpoint, credential, options));
         }
 
         /// <summary> Registers a <see cref="ChatCompletionsClient"/> instance. </summary>
         /// <param name="builder"> The builder to register with. </param>
         /// <param name="endpoint"> Service host. </param>
-        public static IAzureClientBuilder<ChatCompletionsClient, AzureAIInferenceClientOptions> AddChatCompletionsClient<TBuilder>(this TBuilder builder, Uri endpoint)
+        public static IAzureClientBuilder<ChatCompletionsClient, ChatCompletionsClientOptions> AddChatCompletionsClient<TBuilder>(this TBuilder builder, Uri endpoint)
         where TBuilder : IAzureClientFactoryBuilderWithCredential
         {
-            return builder.RegisterClientFactory<ChatCompletionsClient, AzureAIInferenceClientOptions>((options, cred) => new ChatCompletionsClient(endpoint, cred, options));
-        }
-
-        /// <summary> Registers a <see cref="EmbeddingsClient"/> instance. </summary>
-        /// <param name="builder"> The builder to register with. </param>
-        /// <param name="endpoint"> Service host. </param>
-        /// <param name="credential"> A credential used to authenticate to an Azure Service. </param>
-        public static IAzureClientBuilder<EmbeddingsClient, AzureAIInferenceClientOptions> AddEmbeddingsClient<TBuilder>(this TBuilder builder, Uri endpoint, AzureKeyCredential credential)
-        where TBuilder : IAzureClientFactoryBuilder
-        {
-            return builder.RegisterClientFactory<EmbeddingsClient, AzureAIInferenceClientOptions>((options) => new EmbeddingsClient(endpoint, credential, options));
-        }
-
-        /// <summary> Registers a <see cref="EmbeddingsClient"/> instance. </summary>
-        /// <param name="builder"> The builder to register with. </param>
-        /// <param name="endpoint"> Service host. </param>
-        public static IAzureClientBuilder<EmbeddingsClient, AzureAIInferenceClientOptions> AddEmbeddingsClient<TBuilder>(this TBuilder builder, Uri endpoint)
-        where TBuilder : IAzureClientFactoryBuilderWithCredential
-        {
-            return builder.RegisterClientFactory<EmbeddingsClient, AzureAIInferenceClientOptions>((options, cred) => new EmbeddingsClient(endpoint, cred, options));
+            return builder.RegisterClientFactory<ChatCompletionsClient, ChatCompletionsClientOptions>((options, cred) => new ChatCompletionsClient(endpoint, cred, options));
         }
 
         /// <summary> Registers a <see cref="ChatCompletionsClient"/> instance. </summary>
         /// <param name="builder"> The builder to register with. </param>
         /// <param name="configuration"> The configuration values. </param>
-        public static IAzureClientBuilder<ChatCompletionsClient, AzureAIInferenceClientOptions> AddChatCompletionsClient<TBuilder, TConfiguration>(this TBuilder builder, TConfiguration configuration)
-        where TBuilder : IAzureClientFactoryBuilderWithConfiguration<TConfiguration>
-        {
-            return builder.RegisterClientFactory<ChatCompletionsClient, AzureAIInferenceClientOptions>(configuration);
-        }
-        /// <summary> Registers a <see cref="EmbeddingsClient"/> instance. </summary>
-        /// <param name="builder"> The builder to register with. </param>
-        /// <param name="configuration"> The configuration values. </param>
-        public static IAzureClientBuilder<EmbeddingsClient, AzureAIInferenceClientOptions> AddEmbeddingsClient<TBuilder, TConfiguration>(this TBuilder builder, TConfiguration configuration)
+        public static IAzureClientBuilder<ChatCompletionsClient, ChatCompletionsClientOptions> AddChatCompletionsClient<TBuilder, TConfiguration>(this TBuilder builder, TConfiguration configuration)
         where TBuilder : IAzureClientFactoryBuilderWithConfiguration<TConfiguration>
         {
-            return builder.RegisterClientFactory<EmbeddingsClient, AzureAIInferenceClientOptions>(configuration);
+            return builder.RegisterClientFactory<ChatCompletionsClient, ChatCompletionsClientOptions>(configuration);
         }
     }
 }
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/AIInferenceModelFactory.cs b/sdk/ai/Azure.AI.Inference/src/Generated/AIInferenceModelFactory.cs
index b69b4ab3c432..947d086078db 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/AIInferenceModelFactory.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/AIInferenceModelFactory.cs
@@ -30,6 +30,14 @@ public static ChatMessageTextContentItem ChatMessageTextContentItem(string text
             return new ChatMessageTextContentItem("text", serializedAdditionalRawData: null, text);
         }
 
+        /// <summary> Initializes a new instance of <see cref="Inference.ChatMessageAudioContentItem"/>. </summary>
+        /// <param name="inputAudio"> The details of the input audio. </param>
+        /// <returns> A new <see cref="Inference.ChatMessageAudioContentItem"/> instance for mocking. </returns>
+        public static ChatMessageAudioContentItem ChatMessageAudioContentItem(ChatMessageInputAudio inputAudio = null)
+        {
+            return new ChatMessageAudioContentItem("input_audio", serializedAdditionalRawData: null, inputAudio);
+        }
+
         /// <summary> Initializes a new instance of <see cref="Inference.ChatCompletionsToolCall"/>. </summary>
         /// <param name="id"> The ID of the tool call. </param>
         /// <param name="type"> The type of tool call. Currently, only `function` is supported. </param>
@@ -67,16 +75,6 @@ public static ChatCompletionsNamedToolChoice ChatCompletionsNamedToolChoice(Chat
             return new ChatCompletionsNamedToolChoice(type, function, serializedAdditionalRawData: null);
         }
 
-        /// <summary> Initializes a new instance of <see cref="Inference.CompletionsUsage"/>. </summary>
-        /// <param name="completionTokens"> The number of tokens generated across all completions emissions. </param>
-        /// <param name="promptTokens"> The number of tokens in the provided prompts for the completions request. </param>
-        /// <param name="totalTokens"> The total number of tokens processed for the completions request and response. </param>
-        /// <returns> A new <see cref="Inference.CompletionsUsage"/> instance for mocking. </returns>
-        public static CompletionsUsage CompletionsUsage(int completionTokens = default, int promptTokens = default, int totalTokens = default)
-        {
-            return new CompletionsUsage(completionTokens, promptTokens, totalTokens, serializedAdditionalRawData: null);
-        }
-
         /// <summary> Initializes a new instance of <see cref="Inference.ChatChoice"/>. </summary>
         /// <param name="index"> The ordered index associated with this chat completions choice. </param>
         /// <param name="finishReason"> The reason that this chat completions choice completed its generated. </param>
@@ -102,6 +100,16 @@ public static ChatResponseMessage ChatResponseMessage(ChatRole role = default, s
             return new ChatResponseMessage(role, content, toolCalls?.ToList(), serializedAdditionalRawData: null);
         }
 
+        /// <summary> Initializes a new instance of <see cref="Inference.CompletionsUsage"/>. </summary>
+        /// <param name="completionTokens"> The number of tokens generated across all completions emissions. </param>
+        /// <param name="promptTokens"> The number of tokens in the provided prompts for the completions request. </param>
+        /// <param name="totalTokens"> The total number of tokens processed for the completions request and response. </param>
+        /// <returns> A new <see cref="Inference.CompletionsUsage"/> instance for mocking. </returns>
+        public static CompletionsUsage CompletionsUsage(int completionTokens = default, int promptTokens = default, int totalTokens = default)
+        {
+            return new CompletionsUsage(completionTokens, promptTokens, totalTokens, serializedAdditionalRawData: null);
+        }
+
         /// <summary> Initializes a new instance of <see cref="Inference.ModelInfo"/>. </summary>
         /// <param name="modelName"> The name of the AI model. For example: `Phi21`. </param>
         /// <param name="modelType"> The type of the AI model. A Unique identifier for the profile. </param>
@@ -112,43 +120,6 @@ public static ModelInfo ModelInfo(string modelName = null, ModelType modelType =
             return new ModelInfo(modelName, modelType, modelProviderName, serializedAdditionalRawData: null);
         }
 
-        /// <summary> Initializes a new instance of <see cref="Inference.EmbeddingsResult"/>. </summary>
-        /// <param name="id"> Unique identifier for the embeddings result. </param>
-        /// <param name="data"> Embedding values for the prompts submitted in the request. </param>
-        /// <param name="usage"> Usage counts for tokens input using the embeddings API. </param>
-        /// <param name="model"> The model ID used to generate this result. </param>
-        /// <returns> A new <see cref="Inference.EmbeddingsResult"/> instance for mocking. </returns>
-        public static EmbeddingsResult EmbeddingsResult(string id = null, IEnumerable<EmbeddingItem> data = null, EmbeddingsUsage usage = null, string model = null)
-        {
-            data ??= new List<EmbeddingItem>();
-
-            return new EmbeddingsResult(id, data?.ToList(), usage, model, serializedAdditionalRawData: null);
-        }
-
-        /// <summary> Initializes a new instance of <see cref="Inference.EmbeddingItem"/>. </summary>
-        /// <param name="embedding">
-        /// List of embedding values for the input prompt. These represent a measurement of the
-        /// vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.
-        /// </param>
-        /// <param name="index"> Index of the prompt to which the EmbeddingItem corresponds. </param>
-        /// <returns> A new <see cref="Inference.EmbeddingItem"/> instance for mocking. </returns>
-        public static EmbeddingItem EmbeddingItem(BinaryData embedding = null, int index = default)
-        {
-            return new EmbeddingItem(embedding, index, serializedAdditionalRawData: null);
-        }
-
-        /// <summary> Initializes a new instance of <see cref="Inference.EmbeddingsUsage"/>. </summary>
-        /// <param name="promptTokens"> Number of tokens in the request. </param>
-        /// <param name="totalTokens">
-        /// Total number of tokens transacted in this request/response. Should equal the
-        /// number of tokens in the request.
-        /// </param>
-        /// <returns> A new <see cref="Inference.EmbeddingsUsage"/> instance for mocking. </returns>
-        public static EmbeddingsUsage EmbeddingsUsage(int promptTokens = default, int totalTokens = default)
-        {
-            return new EmbeddingsUsage(promptTokens, totalTokens, serializedAdditionalRawData: null);
-        }
-
         /// <summary> Initializes a new instance of <see cref="Inference.StreamingChatCompletionsUpdate"/>. </summary>
         /// <param name="id"> A unique identifier associated with this chat completions response. </param>
         /// <param name="created">
@@ -156,14 +127,14 @@ public static EmbeddingsUsage EmbeddingsUsage(int promptTokens = default, int to
         /// represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
         /// </param>
         /// <param name="model"> The model used for the chat completion. </param>
-        /// <param name="usage"> Usage information for tokens processed and generated as part of this completions operation. </param>
         /// <param name="choices">
         /// An update to the collection of completion choices associated with this completions response.
         /// Generally, `n` choices are generated per provided prompt with a default value of 1.
         /// Token limits and other settings may limit the number of choices generated.
         /// </param>
+        /// <param name="usage"> Usage information for tokens processed and generated as part of this completions operation. </param>
         /// <returns> A new <see cref="Inference.StreamingChatCompletionsUpdate"/> instance for mocking. </returns>
-        public static StreamingChatCompletionsUpdate StreamingChatCompletionsUpdate(string id = null, DateTimeOffset created = default, string model = null, CompletionsUsage usage = null, IEnumerable<StreamingChatChoiceUpdate> choices = null)
+        public static StreamingChatCompletionsUpdate StreamingChatCompletionsUpdate(string id = null, DateTimeOffset created = default, string model = null, IEnumerable<StreamingChatChoiceUpdate> choices = null, CompletionsUsage usage = null)
         {
             choices ??= new List<StreamingChatChoiceUpdate>();
 
@@ -171,8 +142,8 @@ public static StreamingChatCompletionsUpdate StreamingChatCompletionsUpdate(stri
                 id,
                 created,
                 model,
-                usage,
                 choices?.ToList(),
+                usage,
                 serializedAdditionalRawData: null);
         }
 
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/AudioContentFormat.cs b/sdk/ai/Azure.AI.Inference/src/Generated/AudioContentFormat.cs
new file mode 100644
index 000000000000..3723b8aa68c8
--- /dev/null
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/AudioContentFormat.cs
@@ -0,0 +1,51 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// <auto-generated/>
+
+#nullable disable
+
+using System;
+using System.ComponentModel;
+
+namespace Azure.AI.Inference
+{
+    /// <summary> A representation of the possible audio formats for audio. </summary>
+    public readonly partial struct AudioContentFormat : IEquatable<AudioContentFormat>
+    {
+        private readonly string _value;
+
+        /// <summary> Initializes a new instance of <see cref="AudioContentFormat"/>. </summary>
+        /// <exception cref="ArgumentNullException"> <paramref name="value"/> is null. </exception>
+        public AudioContentFormat(string value)
+        {
+            _value = value ?? throw new ArgumentNullException(nameof(value));
+        }
+
+        private const string WavValue = "wav";
+        private const string Mp3Value = "mp3";
+
+        /// <summary> Specifies audio in WAV format. </summary>
+        public static AudioContentFormat Wav { get; } = new AudioContentFormat(WavValue);
+        /// <summary> Specifies audio in MP3 format. </summary>
+        public static AudioContentFormat Mp3 { get; } = new AudioContentFormat(Mp3Value);
+        /// <summary> Determines if two <see cref="AudioContentFormat"/> values are the same. </summary>
+        public static bool operator ==(AudioContentFormat left, AudioContentFormat right) => left.Equals(right);
+        /// <summary> Determines if two <see cref="AudioContentFormat"/> values are not the same. </summary>
+        public static bool operator !=(AudioContentFormat left, AudioContentFormat right) => !left.Equals(right);
+        /// <summary> Converts a <see cref="string"/> to a <see cref="AudioContentFormat"/>. </summary>
+        public static implicit operator AudioContentFormat(string value) => new AudioContentFormat(value);
+
+        /// <inheritdoc />
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public override bool Equals(object obj) => obj is AudioContentFormat other && Equals(other);
+        /// <inheritdoc />
+        public bool Equals(AudioContentFormat other) => string.Equals(_value, other._value, StringComparison.InvariantCultureIgnoreCase);
+
+        /// <inheritdoc />
+        [EditorBrowsable(EditorBrowsableState.Never)]
+        public override int GetHashCode() => _value != null ? StringComparer.InvariantCultureIgnoreCase.GetHashCode(_value) : 0;
+        /// <inheritdoc />
+        public override string ToString() => _value;
+    }
+}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletions.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletions.Serialization.cs
index 1fd5ab2dd912..8ad5ae184eda 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletions.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletions.Serialization.cs
@@ -40,8 +40,6 @@ protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWrit
             writer.WriteNumberValue(Created, "U");
             writer.WritePropertyName("model"u8);
             writer.WriteStringValue(Model);
-            writer.WritePropertyName("usage"u8);
-            writer.WriteObjectValue(Usage, options);
             writer.WritePropertyName("choices"u8);
             writer.WriteStartArray();
             foreach (var item in Choices)
@@ -49,6 +47,8 @@ protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWrit
                 writer.WriteObjectValue<ChatChoice>(item, options);
             }
             writer.WriteEndArray();
+            writer.WritePropertyName("usage"u8);
+            writer.WriteObjectValue(Usage, options);
             if (options.Format != "W" && _serializedAdditionalRawData != null)
             {
                 foreach (var item in _serializedAdditionalRawData)
@@ -89,8 +89,8 @@ internal static ChatCompletions DeserializeChatCompletions(JsonElement element,
             string id = default;
             DateTimeOffset created = default;
             string model = default;
-            CompletionsUsage usage = default;
             IReadOnlyList<ChatChoice> choices = default;
+            CompletionsUsage usage = default;
             IDictionary<string, BinaryData> serializedAdditionalRawData = default;
             Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
             foreach (var property in element.EnumerateObject())
@@ -110,11 +110,6 @@ internal static ChatCompletions DeserializeChatCompletions(JsonElement element,
                     model = property.Value.GetString();
                     continue;
                 }
-                if (property.NameEquals("usage"u8))
-                {
-                    usage = CompletionsUsage.DeserializeCompletionsUsage(property.Value, options);
-                    continue;
-                }
                 if (property.NameEquals("choices"u8))
                 {
                     List<ChatChoice> array = new List<ChatChoice>();
@@ -125,6 +120,11 @@ internal static ChatCompletions DeserializeChatCompletions(JsonElement element,
                     choices = array;
                     continue;
                 }
+                if (property.NameEquals("usage"u8))
+                {
+                    usage = CompletionsUsage.DeserializeCompletionsUsage(property.Value, options);
+                    continue;
+                }
                 if (options.Format != "W")
                 {
                     rawDataDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
@@ -135,8 +135,8 @@ internal static ChatCompletions DeserializeChatCompletions(JsonElement element,
                 id,
                 created,
                 model,
-                usage,
                 choices,
+                usage,
                 serializedAdditionalRawData);
         }
 
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletions.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletions.cs
index 1861a3df65f8..7af202a83311 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletions.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletions.cs
@@ -57,25 +57,25 @@ public partial class ChatCompletions
         /// represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
         /// </param>
         /// <param name="model"> The model used for the chat completion. </param>
-        /// <param name="usage"> Usage information for tokens processed and generated as part of this completions operation. </param>
         /// <param name="choices">
         /// The collection of completions choices associated with this completions response.
         /// Generally, `n` choices are generated per provided prompt with a default value of 1.
         /// Token limits and other settings may limit the number of choices generated.
         /// </param>
-        /// <exception cref="ArgumentNullException"> <paramref name="id"/>, <paramref name="model"/>, <paramref name="usage"/> or <paramref name="choices"/> is null. </exception>
-        internal ChatCompletions(string id, DateTimeOffset created, string model, CompletionsUsage usage, IEnumerable<ChatChoice> choices)
+        /// <param name="usage"> Usage information for tokens processed and generated as part of this completions operation. </param>
+        /// <exception cref="ArgumentNullException"> <paramref name="id"/>, <paramref name="model"/>, <paramref name="choices"/> or <paramref name="usage"/> is null. </exception>
+        internal ChatCompletions(string id, DateTimeOffset created, string model, IEnumerable<ChatChoice> choices, CompletionsUsage usage)
         {
             Argument.AssertNotNull(id, nameof(id));
             Argument.AssertNotNull(model, nameof(model));
-            Argument.AssertNotNull(usage, nameof(usage));
             Argument.AssertNotNull(choices, nameof(choices));
+            Argument.AssertNotNull(usage, nameof(usage));
 
             Id = id;
             Created = created;
             Model = model;
-            Usage = usage;
             Choices = choices.ToList();
+            Usage = usage;
         }
 
         /// <summary> Initializes a new instance of <see cref="ChatCompletions"/>. </summary>
@@ -85,20 +85,20 @@ internal ChatCompletions(string id, DateTimeOffset created, string model, Comple
         /// represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
         /// </param>
         /// <param name="model"> The model used for the chat completion. </param>
-        /// <param name="usage"> Usage information for tokens processed and generated as part of this completions operation. </param>
         /// <param name="choices">
         /// The collection of completions choices associated with this completions response.
         /// Generally, `n` choices are generated per provided prompt with a default value of 1.
         /// Token limits and other settings may limit the number of choices generated.
         /// </param>
+        /// <param name="usage"> Usage information for tokens processed and generated as part of this completions operation. </param>
         /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
-        internal ChatCompletions(string id, DateTimeOffset created, string model, CompletionsUsage usage, IReadOnlyList<ChatChoice> choices, IDictionary<string, BinaryData> serializedAdditionalRawData)
+        internal ChatCompletions(string id, DateTimeOffset created, string model, IReadOnlyList<ChatChoice> choices, CompletionsUsage usage, IDictionary<string, BinaryData> serializedAdditionalRawData)
         {
             Id = id;
             Created = created;
             Model = model;
-            Usage = usage;
             Choices = choices;
+            Usage = usage;
             _serializedAdditionalRawData = serializedAdditionalRawData;
         }
 
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsClient.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsClient.cs
index 89ce6729a893..1d0c1bab7768 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsClient.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsClient.cs
@@ -7,6 +7,7 @@
 
 using System;
 using System.Collections.Generic;
+using System.Linq;
 using System.Threading;
 using System.Threading.Tasks;
 using Azure.Core;
@@ -18,9 +19,8 @@ namespace Azure.AI.Inference
     /// <summary> The ChatCompletions service client. </summary>
     public partial class ChatCompletionsClient
     {
-        private const string AuthorizationHeader = "Authorization";
+        private const string AuthorizationHeader = "api-key";
         private readonly AzureKeyCredential _keyCredential;
-        private const string AuthorizationApiKeyPrefix = "Bearer";
         private static readonly string[] AuthorizationScopes = new string[] { "https://ml.azure.com/.default" };
         private readonly TokenCredential _tokenCredential;
         private readonly HttpPipeline _pipeline;
@@ -42,7 +42,7 @@ protected ChatCompletionsClient()
         /// <param name="endpoint"> Service host. </param>
         /// <param name="credential"> A credential used to authenticate to an Azure Service. </param>
         /// <exception cref="ArgumentNullException"> <paramref name="endpoint"/> or <paramref name="credential"/> is null. </exception>
-        public ChatCompletionsClient(Uri endpoint, AzureKeyCredential credential) : this(endpoint, credential, new AzureAIInferenceClientOptions())
+        public ChatCompletionsClient(Uri endpoint, AzureKeyCredential credential) : this(endpoint, credential, new ChatCompletionsClientOptions())
         {
         }
 
@@ -50,7 +50,7 @@ protected ChatCompletionsClient()
         /// <param name="endpoint"> Service host. </param>
         /// <param name="credential"> A credential used to authenticate to an Azure Service. </param>
         /// <exception cref="ArgumentNullException"> <paramref name="endpoint"/> or <paramref name="credential"/> is null. </exception>
-        public ChatCompletionsClient(Uri endpoint, TokenCredential credential) : this(endpoint, credential, new AzureAIInferenceClientOptions())
+        public ChatCompletionsClient(Uri endpoint, TokenCredential credential) : this(endpoint, credential, new ChatCompletionsClientOptions())
         {
         }
 
@@ -59,11 +59,29 @@ protected ChatCompletionsClient()
         /// <param name="credential"> A credential used to authenticate to an Azure Service. </param>
         /// <param name="options"> The options for configuring the client. </param>
         /// <exception cref="ArgumentNullException"> <paramref name="endpoint"/> or <paramref name="credential"/> is null. </exception>
-        public ChatCompletionsClient(Uri endpoint, TokenCredential credential, AzureAIInferenceClientOptions options)
+        public ChatCompletionsClient(Uri endpoint, AzureKeyCredential credential, ChatCompletionsClientOptions options)
         {
             Argument.AssertNotNull(endpoint, nameof(endpoint));
             Argument.AssertNotNull(credential, nameof(credential));
-            options ??= new AzureAIInferenceClientOptions();
+            options ??= new ChatCompletionsClientOptions();
+
+            ClientDiagnostics = new ClientDiagnostics(options, true);
+            _keyCredential = credential;
+            _pipeline = HttpPipelineBuilder.Build(options, Array.Empty<HttpPipelinePolicy>(), new HttpPipelinePolicy[] { new AzureKeyCredentialPolicy(_keyCredential, AuthorizationHeader) }, new ResponseClassifier());
+            _endpoint = endpoint;
+            _apiVersion = options.Version;
+        }
+
+        /// <summary> Initializes a new instance of ChatCompletionsClient. </summary>
+        /// <param name="endpoint"> Service host. </param>
+        /// <param name="credential"> A credential used to authenticate to an Azure Service. </param>
+        /// <param name="options"> The options for configuring the client. </param>
+        /// <exception cref="ArgumentNullException"> <paramref name="endpoint"/> or <paramref name="credential"/> is null. </exception>
+        public ChatCompletionsClient(Uri endpoint, TokenCredential credential, ChatCompletionsClientOptions options)
+        {
+            Argument.AssertNotNull(endpoint, nameof(endpoint));
+            Argument.AssertNotNull(credential, nameof(credential));
+            options ??= new ChatCompletionsClientOptions();
 
             ClientDiagnostics = new ClientDiagnostics(options, true);
             _tokenCredential = credential;
@@ -72,6 +90,198 @@ public ChatCompletionsClient(Uri endpoint, TokenCredential credential, AzureAIIn
             _apiVersion = options.Version;
         }
 
+        /// <summary>
+        /// Gets chat completions for the provided chat messages.
+        /// Completions support a wide variety of tasks and generate text that continues from or "completes"
+        /// provided prompt data. The method makes a REST API call to the `/chat/completions` route
+        /// on the given endpoint.
+        /// </summary>
+        /// <param name="messages">
+        /// The collection of context messages associated with this chat completions request.
+        /// Typical usage begins with a chat message for the System role that provides instructions for
+        /// the behavior of the assistant, followed by alternating messages between the User and
+        /// Assistant roles.
+        /// </param>
+        /// <param name="frequencyPenalty">
+        /// A value that influences the probability of generated tokens appearing based on their cumulative
+        /// frequency in generated text.
+        /// Positive values will make tokens less likely to appear as their frequency increases and
+        /// decrease the likelihood of the model repeating the same statements verbatim.
+        /// Supported range is [-2, 2].
+        /// </param>
+        /// <param name="internalShouldStreamResponse"> A value indicating whether chat completions should be streamed for this request. </param>
+        /// <param name="presencePenalty">
+        /// A value that influences the probability of generated tokens appearing based on their existing
+        /// presence in generated text.
+        /// Positive values will make tokens less likely to appear when they already exist and increase the
+        /// model's likelihood to output new topics.
+        /// Supported range is [-2, 2].
+        /// </param>
+        /// <param name="temperature">
+        /// The sampling temperature to use that controls the apparent creativity of generated completions.
+        /// Higher values will make output more random while lower values will make results more focused
+        /// and deterministic.
+        /// It is not recommended to modify temperature and top_p for the same completions request as the
+        /// interaction of these two settings is difficult to predict.
+        /// Supported range is [0, 1].
+        /// </param>
+        /// <param name="nucleusSamplingFactor">
+        /// An alternative to sampling with temperature called nucleus sampling. This value causes the
+        /// model to consider the results of tokens with the provided probability mass. As an example, a
+        /// value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+        /// considered.
+        /// It is not recommended to modify temperature and top_p for the same completions request as the
+        /// interaction of these two settings is difficult to predict.
+        /// Supported range is [0, 1].
+        /// </param>
+        /// <param name="maxTokens"> The maximum number of tokens to generate. </param>
+        /// <param name="responseFormat">
+        /// An object specifying the format that the model must output.
+        ///
+        /// Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.
+        ///
+        /// Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON.
+        ///
+        /// **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
+        /// </param>
+        /// <param name="stopSequences"> A collection of textual sequences that will end completions generation. </param>
+        /// <param name="tools">
+        /// A list of tools the model may request to call. Currently, only functions are supported as a tool. The model
+        /// may response with a function call request and provide the input arguments in JSON format for that function.
+        /// </param>
+        /// <param name="toolChoice"> If specified, the model will configure which of the provided tools it can use for the chat completions response. </param>
+        /// <param name="seed">
+        /// If specified, the system will make a best effort to sample deterministically such that repeated requests with the
+        /// same seed and parameters should return the same result. Determinism is not guaranteed.
+        /// </param>
+        /// <param name="model"> ID of the specific AI model to use, if more than one model is available on the endpoint. </param>
+        /// <param name="extraParams">
+        /// Controls what happens if extra parameters, undefined by the REST API,
+        /// are passed in the JSON request payload.
+        /// This sets the HTTP request header `extra-parameters`.
+        /// </param>
+        /// <param name="cancellationToken"> The cancellation token to use. </param>
+        /// <exception cref="ArgumentNullException"> <paramref name="messages"/> is null. </exception>
+        internal virtual async Task<Response<ChatCompletions>> CompleteAsync(IEnumerable<ChatRequestMessage> messages, float? frequencyPenalty = null, bool? internalShouldStreamResponse = null, float? presencePenalty = null, float? temperature = null, float? nucleusSamplingFactor = null, int? maxTokens = null, ChatCompletionsResponseFormat responseFormat = null, IEnumerable<string> stopSequences = null, IEnumerable<ChatCompletionsToolDefinition> tools = null, BinaryData toolChoice = null, long? seed = null, string model = null, ExtraParameters? extraParams = null, CancellationToken cancellationToken = default)
+        {
+            Argument.AssertNotNull(messages, nameof(messages));
+
+            CompleteRequest completeRequest = new CompleteRequest(
+                messages.ToList(),
+                frequencyPenalty,
+                internalShouldStreamResponse,
+                presencePenalty,
+                temperature,
+                nucleusSamplingFactor,
+                maxTokens,
+                responseFormat,
+                stopSequences?.ToList() as IReadOnlyList<string> ?? new ChangeTrackingList<string>(),
+                tools?.ToList() as IReadOnlyList<ChatCompletionsToolDefinition> ?? new ChangeTrackingList<ChatCompletionsToolDefinition>(),
+                toolChoice,
+                seed,
+                model,
+                null);
+            RequestContext context = FromCancellationToken(cancellationToken);
+            Response response = await CompleteAsync(completeRequest.ToRequestContent(), extraParams?.ToString(), context).ConfigureAwait(false);
+            return Response.FromValue(ChatCompletions.FromResponse(response), response);
+        }
+
+        /// <summary>
+        /// Gets chat completions for the provided chat messages.
+        /// Completions support a wide variety of tasks and generate text that continues from or "completes"
+        /// provided prompt data. The method makes a REST API call to the `/chat/completions` route
+        /// on the given endpoint.
+        /// </summary>
+        /// <param name="messages">
+        /// The collection of context messages associated with this chat completions request.
+        /// Typical usage begins with a chat message for the System role that provides instructions for
+        /// the behavior of the assistant, followed by alternating messages between the User and
+        /// Assistant roles.
+        /// </param>
+        /// <param name="frequencyPenalty">
+        /// A value that influences the probability of generated tokens appearing based on their cumulative
+        /// frequency in generated text.
+        /// Positive values will make tokens less likely to appear as their frequency increases and
+        /// decrease the likelihood of the model repeating the same statements verbatim.
+        /// Supported range is [-2, 2].
+        /// </param>
+        /// <param name="internalShouldStreamResponse"> A value indicating whether chat completions should be streamed for this request. </param>
+        /// <param name="presencePenalty">
+        /// A value that influences the probability of generated tokens appearing based on their existing
+        /// presence in generated text.
+        /// Positive values will make tokens less likely to appear when they already exist and increase the
+        /// model's likelihood to output new topics.
+        /// Supported range is [-2, 2].
+        /// </param>
+        /// <param name="temperature">
+        /// The sampling temperature to use that controls the apparent creativity of generated completions.
+        /// Higher values will make output more random while lower values will make results more focused
+        /// and deterministic.
+        /// It is not recommended to modify temperature and top_p for the same completions request as the
+        /// interaction of these two settings is difficult to predict.
+        /// Supported range is [0, 1].
+        /// </param>
+        /// <param name="nucleusSamplingFactor">
+        /// An alternative to sampling with temperature called nucleus sampling. This value causes the
+        /// model to consider the results of tokens with the provided probability mass. As an example, a
+        /// value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+        /// considered.
+        /// It is not recommended to modify temperature and top_p for the same completions request as the
+        /// interaction of these two settings is difficult to predict.
+        /// Supported range is [0, 1].
+        /// </param>
+        /// <param name="maxTokens"> The maximum number of tokens to generate. </param>
+        /// <param name="responseFormat">
+        /// An object specifying the format that the model must output.
+        ///
+        /// Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.
+        ///
+        /// Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON.
+        ///
+        /// **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
+        /// </param>
+        /// <param name="stopSequences"> A collection of textual sequences that will end completions generation. </param>
+        /// <param name="tools">
+        /// A list of tools the model may request to call. Currently, only functions are supported as a tool. The model
+        /// may response with a function call request and provide the input arguments in JSON format for that function.
+        /// </param>
+        /// <param name="toolChoice"> If specified, the model will configure which of the provided tools it can use for the chat completions response. </param>
+        /// <param name="seed">
+        /// If specified, the system will make a best effort to sample deterministically such that repeated requests with the
+        /// same seed and parameters should return the same result. Determinism is not guaranteed.
+        /// </param>
+        /// <param name="model"> ID of the specific AI model to use, if more than one model is available on the endpoint. </param>
+        /// <param name="extraParams">
+        /// Controls what happens if extra parameters, undefined by the REST API,
+        /// are passed in the JSON request payload.
+        /// This sets the HTTP request header `extra-parameters`.
+        /// </param>
+        /// <param name="cancellationToken"> The cancellation token to use. </param>
+        /// <exception cref="ArgumentNullException"> <paramref name="messages"/> is null. </exception>
+        internal virtual Response<ChatCompletions> Complete(IEnumerable<ChatRequestMessage> messages, float? frequencyPenalty = null, bool? internalShouldStreamResponse = null, float? presencePenalty = null, float? temperature = null, float? nucleusSamplingFactor = null, int? maxTokens = null, ChatCompletionsResponseFormat responseFormat = null, IEnumerable<string> stopSequences = null, IEnumerable<ChatCompletionsToolDefinition> tools = null, BinaryData toolChoice = null, long? seed = null, string model = null, ExtraParameters? extraParams = null, CancellationToken cancellationToken = default)
+        {
+            Argument.AssertNotNull(messages, nameof(messages));
+
+            CompleteRequest completeRequest = new CompleteRequest(
+                messages.ToList(),
+                frequencyPenalty,
+                internalShouldStreamResponse,
+                presencePenalty,
+                temperature,
+                nucleusSamplingFactor,
+                maxTokens,
+                responseFormat,
+                stopSequences?.ToList() as IReadOnlyList<string> ?? new ChangeTrackingList<string>(),
+                tools?.ToList() as IReadOnlyList<ChatCompletionsToolDefinition> ?? new ChangeTrackingList<ChatCompletionsToolDefinition>(),
+                toolChoice,
+                seed,
+                model,
+                null);
+            RequestContext context = FromCancellationToken(cancellationToken);
+            Response response = Complete(completeRequest.ToRequestContent(), extraParams?.ToString(), context);
+            return Response.FromValue(ChatCompletions.FromResponse(response), response);
+        }
+
         /// <summary>
         /// Returns information about the AI model.
         /// The method makes a REST API call to the `/info` route on the given endpoint.
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/AzureAIInferenceClientOptions.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsClientOptions.cs
similarity index 70%
rename from sdk/ai/Azure.AI.Inference/src/Generated/AzureAIInferenceClientOptions.cs
rename to sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsClientOptions.cs
index 46dcb4716cad..e68f1c4ec7fb 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/AzureAIInferenceClientOptions.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsClientOptions.cs
@@ -10,8 +10,8 @@
 
 namespace Azure.AI.Inference
 {
-    /// <summary> Client options for Azure.AI.Inference library clients. </summary>
-    public partial class AzureAIInferenceClientOptions : ClientOptions
+    /// <summary> Client options for ChatCompletionsClient. </summary>
+    public partial class ChatCompletionsClientOptions : ClientOptions
     {
         private const ServiceVersion LatestVersion = ServiceVersion.V2024_05_01_Preview;
 
@@ -24,8 +24,8 @@ public enum ServiceVersion
 
         internal string Version { get; }
 
-        /// <summary> Initializes new instance of AzureAIInferenceClientOptions. </summary>
-        public AzureAIInferenceClientOptions(ServiceVersion version = LatestVersion)
+        /// <summary> Initializes new instance of ChatCompletionsClientOptions. </summary>
+        public ChatCompletionsClientOptions(ServiceVersion version = LatestVersion)
         {
             Version = version switch
             {
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsOptions.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsOptions.Serialization.cs
deleted file mode 100644
index 3d996776fc76..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsOptions.Serialization.cs
+++ /dev/null
@@ -1,356 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.ClientModel.Primitives;
-using System.Collections.Generic;
-using System.Text.Json;
-using Azure.Core;
-
-namespace Azure.AI.Inference
-{
-    public partial class ChatCompletionsOptions : IUtf8JsonSerializable, IJsonModel<ChatCompletionsOptions>
-    {
-        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<ChatCompletionsOptions>)this).Write(writer, ModelSerializationExtensions.WireOptions);
-
-        /// <param name="writer"> The JSON writer. </param>
-        /// <param name="options"> The client options for reading and writing models. </param>
-        protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsOptions>)this).GetFormatFromOptions(options) : options.Format;
-            if (format != "J")
-            {
-                throw new FormatException($"The model {nameof(ChatCompletionsOptions)} does not support writing '{format}' format.");
-            }
-
-            writer.WritePropertyName("messages"u8);
-            writer.WriteStartArray();
-            foreach (var item in Messages)
-            {
-                writer.WriteObjectValue<ChatRequestMessage>(item, options);
-            }
-            writer.WriteEndArray();
-            if (Optional.IsDefined(FrequencyPenalty))
-            {
-                writer.WritePropertyName("frequency_penalty"u8);
-                writer.WriteNumberValue(FrequencyPenalty.Value);
-            }
-            if (Optional.IsDefined(InternalShouldStreamResponse))
-            {
-                writer.WritePropertyName("stream"u8);
-                writer.WriteBooleanValue(InternalShouldStreamResponse.Value);
-            }
-            if (Optional.IsDefined(PresencePenalty))
-            {
-                writer.WritePropertyName("presence_penalty"u8);
-                writer.WriteNumberValue(PresencePenalty.Value);
-            }
-            if (Optional.IsDefined(Temperature))
-            {
-                writer.WritePropertyName("temperature"u8);
-                writer.WriteNumberValue(Temperature.Value);
-            }
-            if (Optional.IsDefined(NucleusSamplingFactor))
-            {
-                writer.WritePropertyName("top_p"u8);
-                writer.WriteNumberValue(NucleusSamplingFactor.Value);
-            }
-            if (Optional.IsDefined(MaxTokens))
-            {
-                writer.WritePropertyName("max_tokens"u8);
-                writer.WriteNumberValue(MaxTokens.Value);
-            }
-            if (Optional.IsDefined(ResponseFormat))
-            {
-                writer.WritePropertyName("response_format"u8);
-                writer.WriteObjectValue(ResponseFormat, options);
-            }
-            if (Optional.IsCollectionDefined(StopSequences))
-            {
-                writer.WritePropertyName("stop"u8);
-                writer.WriteStartArray();
-                foreach (var item in StopSequences)
-                {
-                    writer.WriteStringValue(item);
-                }
-                writer.WriteEndArray();
-            }
-            if (Optional.IsCollectionDefined(Tools))
-            {
-                writer.WritePropertyName("tools"u8);
-                writer.WriteStartArray();
-                foreach (var item in Tools)
-                {
-                    writer.WriteObjectValue(item, options);
-                }
-                writer.WriteEndArray();
-            }
-            if (Optional.IsDefined(InternalSuppressedToolChoice))
-            {
-                writer.WritePropertyName("tool_choice"u8);
-#if NET6_0_OR_GREATER
-				writer.WriteRawValue(InternalSuppressedToolChoice);
-#else
-                using (JsonDocument document = JsonDocument.Parse(InternalSuppressedToolChoice))
-                {
-                    JsonSerializer.Serialize(writer, document.RootElement);
-                }
-#endif
-            }
-            if (Optional.IsDefined(Seed))
-            {
-                writer.WritePropertyName("seed"u8);
-                writer.WriteNumberValue(Seed.Value);
-            }
-            if (Optional.IsDefined(Model))
-            {
-                writer.WritePropertyName("model"u8);
-                writer.WriteStringValue(Model);
-            }
-            foreach (var item in AdditionalProperties)
-            {
-                writer.WritePropertyName(item.Key);
-#if NET6_0_OR_GREATER
-				writer.WriteRawValue(item.Value);
-#else
-                using (JsonDocument document = JsonDocument.Parse(item.Value))
-                {
-                    JsonSerializer.Serialize(writer, document.RootElement);
-                }
-#endif
-            }
-        }
-
-        ChatCompletionsOptions IJsonModel<ChatCompletionsOptions>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsOptions>)this).GetFormatFromOptions(options) : options.Format;
-            if (format != "J")
-            {
-                throw new FormatException($"The model {nameof(ChatCompletionsOptions)} does not support reading '{format}' format.");
-            }
-
-            using JsonDocument document = JsonDocument.ParseValue(ref reader);
-            return DeserializeChatCompletionsOptions(document.RootElement, options);
-        }
-
-        internal static ChatCompletionsOptions DeserializeChatCompletionsOptions(JsonElement element, ModelReaderWriterOptions options = null)
-        {
-            options ??= ModelSerializationExtensions.WireOptions;
-
-            if (element.ValueKind == JsonValueKind.Null)
-            {
-                return null;
-            }
-            IList<ChatRequestMessage> messages = default;
-            float? frequencyPenalty = default;
-            bool? stream = default;
-            float? presencePenalty = default;
-            float? temperature = default;
-            float? topP = default;
-            int? maxTokens = default;
-            ChatCompletionsResponseFormat responseFormat = default;
-            IList<string> stop = default;
-            IList<ChatCompletionsToolDefinition> tools = default;
-            BinaryData toolChoice = default;
-            long? seed = default;
-            string model = default;
-            IDictionary<string, BinaryData> additionalProperties = default;
-            Dictionary<string, BinaryData> additionalPropertiesDictionary = new Dictionary<string, BinaryData>();
-            foreach (var property in element.EnumerateObject())
-            {
-                if (property.NameEquals("messages"u8))
-                {
-                    List<ChatRequestMessage> array = new List<ChatRequestMessage>();
-                    foreach (var item in property.Value.EnumerateArray())
-                    {
-                        array.Add(ChatRequestMessage.DeserializeChatRequestMessage(item, options));
-                    }
-                    messages = array;
-                    continue;
-                }
-                if (property.NameEquals("frequency_penalty"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    frequencyPenalty = property.Value.GetSingle();
-                    continue;
-                }
-                if (property.NameEquals("stream"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    stream = property.Value.GetBoolean();
-                    continue;
-                }
-                if (property.NameEquals("presence_penalty"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    presencePenalty = property.Value.GetSingle();
-                    continue;
-                }
-                if (property.NameEquals("temperature"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    temperature = property.Value.GetSingle();
-                    continue;
-                }
-                if (property.NameEquals("top_p"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    topP = property.Value.GetSingle();
-                    continue;
-                }
-                if (property.NameEquals("max_tokens"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    maxTokens = property.Value.GetInt32();
-                    continue;
-                }
-                if (property.NameEquals("response_format"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    responseFormat = ChatCompletionsResponseFormat.DeserializeChatCompletionsResponseFormat(property.Value, options);
-                    continue;
-                }
-                if (property.NameEquals("stop"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    List<string> array = new List<string>();
-                    foreach (var item in property.Value.EnumerateArray())
-                    {
-                        array.Add(item.GetString());
-                    }
-                    stop = array;
-                    continue;
-                }
-                if (property.NameEquals("tools"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    List<ChatCompletionsToolDefinition> array = new List<ChatCompletionsToolDefinition>();
-                    foreach (var item in property.Value.EnumerateArray())
-                    {
-                        array.Add(ChatCompletionsToolDefinition.DeserializeChatCompletionsToolDefinition(item, options));
-                    }
-                    tools = array;
-                    continue;
-                }
-                if (property.NameEquals("tool_choice"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    toolChoice = BinaryData.FromString(property.Value.GetRawText());
-                    continue;
-                }
-                if (property.NameEquals("seed"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    seed = property.Value.GetInt64();
-                    continue;
-                }
-                if (property.NameEquals("model"u8))
-                {
-                    model = property.Value.GetString();
-                    continue;
-                }
-                additionalPropertiesDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
-            }
-            additionalProperties = additionalPropertiesDictionary;
-            return new ChatCompletionsOptions(
-                messages,
-                frequencyPenalty,
-                stream,
-                presencePenalty,
-                temperature,
-                topP,
-                maxTokens,
-                responseFormat,
-                stop ?? new ChangeTrackingList<string>(),
-                tools ?? new ChangeTrackingList<ChatCompletionsToolDefinition>(),
-                toolChoice,
-                seed,
-                model,
-                additionalProperties);
-        }
-
-        BinaryData IPersistableModel<ChatCompletionsOptions>.Write(ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsOptions>)this).GetFormatFromOptions(options) : options.Format;
-
-            switch (format)
-            {
-                case "J":
-                    return ModelReaderWriter.Write(this, options);
-                default:
-                    throw new FormatException($"The model {nameof(ChatCompletionsOptions)} does not support writing '{options.Format}' format.");
-            }
-        }
-
-        ChatCompletionsOptions IPersistableModel<ChatCompletionsOptions>.Create(BinaryData data, ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsOptions>)this).GetFormatFromOptions(options) : options.Format;
-
-            switch (format)
-            {
-                case "J":
-                    {
-                        using JsonDocument document = JsonDocument.Parse(data);
-                        return DeserializeChatCompletionsOptions(document.RootElement, options);
-                    }
-                default:
-                    throw new FormatException($"The model {nameof(ChatCompletionsOptions)} does not support reading '{options.Format}' format.");
-            }
-        }
-
-        string IPersistableModel<ChatCompletionsOptions>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
-
-        /// <summary> Deserializes the model from a raw response. </summary>
-        /// <param name="response"> The response to deserialize the model from. </param>
-        internal static ChatCompletionsOptions FromResponse(Response response)
-        {
-            using var document = JsonDocument.Parse(response.Content);
-            return DeserializeChatCompletionsOptions(document.RootElement);
-        }
-
-        /// <summary> Convert into a <see cref="RequestContent"/>. </summary>
-        internal virtual RequestContent ToRequestContent()
-        {
-            var content = new Utf8JsonRequestContent();
-            content.JsonWriter.WriteObjectValue(this, ModelSerializationExtensions.WireOptions);
-            return content;
-        }
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsOptions.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsOptions.cs
deleted file mode 100644
index 55b2bf833d24..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsOptions.cs
+++ /dev/null
@@ -1,210 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-
-namespace Azure.AI.Inference
-{
-    /// <summary>
-    /// The configuration information for a chat completions request.
-    /// Completions support a wide variety of tasks and generate text that continues from or "completes"
-    /// provided prompt data.
-    /// </summary>
-    public partial class ChatCompletionsOptions
-    {
-        /// <summary> Initializes a new instance of <see cref="ChatCompletionsOptions"/>. </summary>
-        /// <param name="messages">
-        /// The collection of context messages associated with this chat completions request.
-        /// Typical usage begins with a chat message for the System role that provides instructions for
-        /// the behavior of the assistant, followed by alternating messages between the User and
-        /// Assistant roles.
-        /// Please note <see cref="ChatRequestMessage"/> is the base class. According to the scenario, a derived class of the base class might need to be assigned here, or this property needs to be casted to one of the possible derived classes.
-        /// The available derived classes include <see cref="ChatRequestAssistantMessage"/>, <see cref="ChatRequestSystemMessage"/>, <see cref="ChatRequestToolMessage"/> and <see cref="ChatRequestUserMessage"/>.
-        /// </param>
-        /// <exception cref="ArgumentNullException"> <paramref name="messages"/> is null. </exception>
-        public ChatCompletionsOptions(IEnumerable<ChatRequestMessage> messages)
-        {
-            Argument.AssertNotNull(messages, nameof(messages));
-
-            Messages = messages.ToList();
-            StopSequences = new ChangeTrackingList<string>();
-            Tools = new ChangeTrackingList<ChatCompletionsToolDefinition>();
-            AdditionalProperties = new ChangeTrackingDictionary<string, BinaryData>();
-        }
-
-        /// <summary> Initializes a new instance of <see cref="ChatCompletionsOptions"/>. </summary>
-        /// <param name="messages">
-        /// The collection of context messages associated with this chat completions request.
-        /// Typical usage begins with a chat message for the System role that provides instructions for
-        /// the behavior of the assistant, followed by alternating messages between the User and
-        /// Assistant roles.
-        /// Please note <see cref="ChatRequestMessage"/> is the base class. According to the scenario, a derived class of the base class might need to be assigned here, or this property needs to be casted to one of the possible derived classes.
-        /// The available derived classes include <see cref="ChatRequestAssistantMessage"/>, <see cref="ChatRequestSystemMessage"/>, <see cref="ChatRequestToolMessage"/> and <see cref="ChatRequestUserMessage"/>.
-        /// </param>
-        /// <param name="frequencyPenalty">
-        /// A value that influences the probability of generated tokens appearing based on their cumulative
-        /// frequency in generated text.
-        /// Positive values will make tokens less likely to appear as their frequency increases and
-        /// decrease the likelihood of the model repeating the same statements verbatim.
-        /// Supported range is [-2, 2].
-        /// </param>
-        /// <param name="internalShouldStreamResponse"> A value indicating whether chat completions should be streamed for this request. </param>
-        /// <param name="presencePenalty">
-        /// A value that influences the probability of generated tokens appearing based on their existing
-        /// presence in generated text.
-        /// Positive values will make tokens less likely to appear when they already exist and increase the
-        /// model's likelihood to output new topics.
-        /// Supported range is [-2, 2].
-        /// </param>
-        /// <param name="temperature">
-        /// The sampling temperature to use that controls the apparent creativity of generated completions.
-        /// Higher values will make output more random while lower values will make results more focused
-        /// and deterministic.
-        /// It is not recommended to modify temperature and top_p for the same completions request as the
-        /// interaction of these two settings is difficult to predict.
-        /// Supported range is [0, 1].
-        /// </param>
-        /// <param name="nucleusSamplingFactor">
-        /// An alternative to sampling with temperature called nucleus sampling. This value causes the
-        /// model to consider the results of tokens with the provided probability mass. As an example, a
-        /// value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
-        /// considered.
-        /// It is not recommended to modify temperature and top_p for the same completions request as the
-        /// interaction of these two settings is difficult to predict.
-        /// Supported range is [0, 1].
-        /// </param>
-        /// <param name="maxTokens"> The maximum number of tokens to generate. </param>
-        /// <param name="responseFormat">
-        /// The format that the model must output. Use this to enable JSON mode instead of the default text mode.
-        /// Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
-        /// via a system or user message.
-        /// Please note <see cref="ChatCompletionsResponseFormat"/> is the base class. According to the scenario, a derived class of the base class might need to be assigned here, or this property needs to be casted to one of the possible derived classes.
-        /// The available derived classes include <see cref="ChatCompletionsResponseFormatJSON"/> and <see cref="ChatCompletionsResponseFormatText"/>.
-        /// </param>
-        /// <param name="stopSequences"> A collection of textual sequences that will end completions generation. </param>
-        /// <param name="tools">
-        /// A list of tools the model may request to call. Currently, only functions are supported as a tool. The model
-        /// may response with a function call request and provide the input arguments in JSON format for that function.
-        /// </param>
-        /// <param name="internalSuppressedToolChoice"> If specified, the model will configure which of the provided tools it can use for the chat completions response. </param>
-        /// <param name="seed">
-        /// If specified, the system will make a best effort to sample deterministically such that repeated requests with the
-        /// same seed and parameters should return the same result. Determinism is not guaranteed.
-        /// </param>
-        /// <param name="model"> ID of the specific AI model to use, if more than one model is available on the endpoint. </param>
-        /// <param name="additionalProperties"> Additional Properties. </param>
-        internal ChatCompletionsOptions(IList<ChatRequestMessage> messages, float? frequencyPenalty, bool? internalShouldStreamResponse, float? presencePenalty, float? temperature, float? nucleusSamplingFactor, int? maxTokens, ChatCompletionsResponseFormat responseFormat, IList<string> stopSequences, IList<ChatCompletionsToolDefinition> tools, BinaryData internalSuppressedToolChoice, long? seed, string model, IDictionary<string, BinaryData> additionalProperties)
-        {
-            Messages = messages;
-            FrequencyPenalty = frequencyPenalty;
-            InternalShouldStreamResponse = internalShouldStreamResponse;
-            PresencePenalty = presencePenalty;
-            Temperature = temperature;
-            NucleusSamplingFactor = nucleusSamplingFactor;
-            MaxTokens = maxTokens;
-            ResponseFormat = responseFormat;
-            StopSequences = stopSequences;
-            Tools = tools;
-            InternalSuppressedToolChoice = internalSuppressedToolChoice;
-            Seed = seed;
-            Model = model;
-            AdditionalProperties = additionalProperties;
-        }
-        /// <summary>
-        /// A value that influences the probability of generated tokens appearing based on their cumulative
-        /// frequency in generated text.
-        /// Positive values will make tokens less likely to appear as their frequency increases and
-        /// decrease the likelihood of the model repeating the same statements verbatim.
-        /// Supported range is [-2, 2].
-        /// </summary>
-        public float? FrequencyPenalty { get; set; }
-        /// <summary>
-        /// A value that influences the probability of generated tokens appearing based on their existing
-        /// presence in generated text.
-        /// Positive values will make tokens less likely to appear when they already exist and increase the
-        /// model's likelihood to output new topics.
-        /// Supported range is [-2, 2].
-        /// </summary>
-        public float? PresencePenalty { get; set; }
-        /// <summary>
-        /// The sampling temperature to use that controls the apparent creativity of generated completions.
-        /// Higher values will make output more random while lower values will make results more focused
-        /// and deterministic.
-        /// It is not recommended to modify temperature and top_p for the same completions request as the
-        /// interaction of these two settings is difficult to predict.
-        /// Supported range is [0, 1].
-        /// </summary>
-        public float? Temperature { get; set; }
-        /// <summary>
-        /// An alternative to sampling with temperature called nucleus sampling. This value causes the
-        /// model to consider the results of tokens with the provided probability mass. As an example, a
-        /// value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
-        /// considered.
-        /// It is not recommended to modify temperature and top_p for the same completions request as the
-        /// interaction of these two settings is difficult to predict.
-        /// Supported range is [0, 1].
-        /// </summary>
-        public float? NucleusSamplingFactor { get; set; }
-        /// <summary> The maximum number of tokens to generate. </summary>
-        public int? MaxTokens { get; set; }
-        /// <summary>
-        /// The format that the model must output. Use this to enable JSON mode instead of the default text mode.
-        /// Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
-        /// via a system or user message.
-        /// Please note <see cref="ChatCompletionsResponseFormat"/> is the base class. According to the scenario, a derived class of the base class might need to be assigned here, or this property needs to be casted to one of the possible derived classes.
-        /// The available derived classes include <see cref="ChatCompletionsResponseFormatJSON"/> and <see cref="ChatCompletionsResponseFormatText"/>.
-        /// </summary>
-        public ChatCompletionsResponseFormat ResponseFormat { get; set; }
-        /// <summary> A collection of textual sequences that will end completions generation. </summary>
-        public IList<string> StopSequences { get; }
-        /// <summary>
-        /// A list of tools the model may request to call. Currently, only functions are supported as a tool. The model
-        /// may response with a function call request and provide the input arguments in JSON format for that function.
-        /// </summary>
-        public IList<ChatCompletionsToolDefinition> Tools { get; }
-        /// <summary>
-        /// If specified, the system will make a best effort to sample deterministically such that repeated requests with the
-        /// same seed and parameters should return the same result. Determinism is not guaranteed.
-        /// </summary>
-        public long? Seed { get; set; }
-        /// <summary> ID of the specific AI model to use, if more than one model is available on the endpoint. </summary>
-        public string Model { get; set; }
-        /// <summary>
-        /// Additional Properties
-        /// <para>
-        /// To assign an object to the value of this property use <see cref="BinaryData.FromObjectAsJson{T}(T, System.Text.Json.JsonSerializerOptions?)"/>.
-        /// </para>
-        /// <para>
-        /// To assign an already formatted json string to this property use <see cref="BinaryData.FromString(string)"/>.
-        /// </para>
-        /// <para>
-        /// Examples:
-        /// <list type="bullet">
-        /// <item>
-        /// <term>BinaryData.FromObjectAsJson("foo")</term>
-        /// <description>Creates a payload of "foo".</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromString("\"foo\"")</term>
-        /// <description>Creates a payload of "foo".</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromObjectAsJson(new { key = "value" })</term>
-        /// <description>Creates a payload of { "key": "value" }.</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromString("{\"key\": \"value\"}")</term>
-        /// <description>Creates a payload of { "key": "value" }.</description>
-        /// </item>
-        /// </list>
-        /// </para>
-        /// </summary>
-        public IDictionary<string, BinaryData> AdditionalProperties { get; }
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormat.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormat.Serialization.cs
index 19320a2d360f..76c5cc54cbe8 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormat.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormat.Serialization.cs
@@ -13,7 +13,7 @@
 namespace Azure.AI.Inference
 {
     [PersistableModelProxy(typeof(UnknownChatCompletionsResponseFormat))]
-    public partial class ChatCompletionsResponseFormat : IUtf8JsonSerializable, IJsonModel<ChatCompletionsResponseFormat>
+    internal partial class ChatCompletionsResponseFormat : IUtf8JsonSerializable, IJsonModel<ChatCompletionsResponseFormat>
     {
         void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<ChatCompletionsResponseFormat>)this).Write(writer, ModelSerializationExtensions.WireOptions);
 
@@ -77,7 +77,8 @@ internal static ChatCompletionsResponseFormat DeserializeChatCompletionsResponse
             {
                 switch (discriminator.GetString())
                 {
-                    case "json_object": return ChatCompletionsResponseFormatJSON.DeserializeChatCompletionsResponseFormatJSON(element, options);
+                    case "json_object": return ChatCompletionsResponseFormatJsonObject.DeserializeChatCompletionsResponseFormatJsonObject(element, options);
+                    case "json_schema": return ChatCompletionsResponseFormatJsonSchema.DeserializeChatCompletionsResponseFormatJsonSchema(element, options);
                     case "text": return ChatCompletionsResponseFormatText.DeserializeChatCompletionsResponseFormatText(element, options);
                 }
             }
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormat.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormat.cs
index 93d9fa9b6c0d..65eee831814d 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormat.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormat.cs
@@ -15,9 +15,9 @@ namespace Azure.AI.Inference
     /// Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
     /// via a system or user message.
     /// Please note <see cref="ChatCompletionsResponseFormat"/> is the base class. According to the scenario, a derived class of the base class might need to be assigned here, or this property needs to be casted to one of the possible derived classes.
-    /// The available derived classes include <see cref="ChatCompletionsResponseFormatJSON"/> and <see cref="ChatCompletionsResponseFormatText"/>.
+    /// The available derived classes include <see cref="ChatCompletionsResponseFormatJsonObject"/>, <see cref="ChatCompletionsResponseFormatJsonSchema"/> and <see cref="ChatCompletionsResponseFormatText"/>.
     /// </summary>
-    public abstract partial class ChatCompletionsResponseFormat
+    internal abstract partial class ChatCompletionsResponseFormat
     {
         /// <summary>
         /// Keeps track of any properties unknown to the library.
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJSON.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonObject.Serialization.cs
similarity index 61%
rename from sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJSON.Serialization.cs
rename to sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonObject.Serialization.cs
index c86d5a9969bd..d2148969d227 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJSON.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonObject.Serialization.cs
@@ -13,11 +13,11 @@
 
 namespace Azure.AI.Inference
 {
-    public partial class ChatCompletionsResponseFormatJSON : IUtf8JsonSerializable, IJsonModel<ChatCompletionsResponseFormatJSON>
+    internal partial class ChatCompletionsResponseFormatJsonObject : IUtf8JsonSerializable, IJsonModel<ChatCompletionsResponseFormatJsonObject>
     {
-        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<ChatCompletionsResponseFormatJSON>)this).Write(writer, ModelSerializationExtensions.WireOptions);
+        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<ChatCompletionsResponseFormatJsonObject>)this).Write(writer, ModelSerializationExtensions.WireOptions);
 
-        void IJsonModel<ChatCompletionsResponseFormatJSON>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
+        void IJsonModel<ChatCompletionsResponseFormatJsonObject>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
         {
             writer.WriteStartObject();
             JsonModelWriteCore(writer, options);
@@ -28,28 +28,28 @@ void IJsonModel<ChatCompletionsResponseFormatJSON>.Write(Utf8JsonWriter writer,
         /// <param name="options"> The client options for reading and writing models. </param>
         protected override void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions options)
         {
-            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJSON>)this).GetFormatFromOptions(options) : options.Format;
+            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJsonObject>)this).GetFormatFromOptions(options) : options.Format;
             if (format != "J")
             {
-                throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJSON)} does not support writing '{format}' format.");
+                throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJsonObject)} does not support writing '{format}' format.");
             }
 
             base.JsonModelWriteCore(writer, options);
         }
 
-        ChatCompletionsResponseFormatJSON IJsonModel<ChatCompletionsResponseFormatJSON>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
+        ChatCompletionsResponseFormatJsonObject IJsonModel<ChatCompletionsResponseFormatJsonObject>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
         {
-            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJSON>)this).GetFormatFromOptions(options) : options.Format;
+            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJsonObject>)this).GetFormatFromOptions(options) : options.Format;
             if (format != "J")
             {
-                throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJSON)} does not support reading '{format}' format.");
+                throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJsonObject)} does not support reading '{format}' format.");
             }
 
             using JsonDocument document = JsonDocument.ParseValue(ref reader);
-            return DeserializeChatCompletionsResponseFormatJSON(document.RootElement, options);
+            return DeserializeChatCompletionsResponseFormatJsonObject(document.RootElement, options);
         }
 
-        internal static ChatCompletionsResponseFormatJSON DeserializeChatCompletionsResponseFormatJSON(JsonElement element, ModelReaderWriterOptions options = null)
+        internal static ChatCompletionsResponseFormatJsonObject DeserializeChatCompletionsResponseFormatJsonObject(JsonElement element, ModelReaderWriterOptions options = null)
         {
             options ??= ModelSerializationExtensions.WireOptions;
 
@@ -73,46 +73,46 @@ internal static ChatCompletionsResponseFormatJSON DeserializeChatCompletionsResp
                 }
             }
             serializedAdditionalRawData = rawDataDictionary;
-            return new ChatCompletionsResponseFormatJSON(type, serializedAdditionalRawData);
+            return new ChatCompletionsResponseFormatJsonObject(type, serializedAdditionalRawData);
         }
 
-        BinaryData IPersistableModel<ChatCompletionsResponseFormatJSON>.Write(ModelReaderWriterOptions options)
+        BinaryData IPersistableModel<ChatCompletionsResponseFormatJsonObject>.Write(ModelReaderWriterOptions options)
         {
-            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJSON>)this).GetFormatFromOptions(options) : options.Format;
+            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJsonObject>)this).GetFormatFromOptions(options) : options.Format;
 
             switch (format)
             {
                 case "J":
                     return ModelReaderWriter.Write(this, options);
                 default:
-                    throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJSON)} does not support writing '{options.Format}' format.");
+                    throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJsonObject)} does not support writing '{options.Format}' format.");
             }
         }
 
-        ChatCompletionsResponseFormatJSON IPersistableModel<ChatCompletionsResponseFormatJSON>.Create(BinaryData data, ModelReaderWriterOptions options)
+        ChatCompletionsResponseFormatJsonObject IPersistableModel<ChatCompletionsResponseFormatJsonObject>.Create(BinaryData data, ModelReaderWriterOptions options)
         {
-            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJSON>)this).GetFormatFromOptions(options) : options.Format;
+            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJsonObject>)this).GetFormatFromOptions(options) : options.Format;
 
             switch (format)
             {
                 case "J":
                     {
                         using JsonDocument document = JsonDocument.Parse(data);
-                        return DeserializeChatCompletionsResponseFormatJSON(document.RootElement, options);
+                        return DeserializeChatCompletionsResponseFormatJsonObject(document.RootElement, options);
                     }
                 default:
-                    throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJSON)} does not support reading '{options.Format}' format.");
+                    throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJsonObject)} does not support reading '{options.Format}' format.");
             }
         }
 
-        string IPersistableModel<ChatCompletionsResponseFormatJSON>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
+        string IPersistableModel<ChatCompletionsResponseFormatJsonObject>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
 
         /// <summary> Deserializes the model from a raw response. </summary>
         /// <param name="response"> The response to deserialize the model from. </param>
-        internal static new ChatCompletionsResponseFormatJSON FromResponse(Response response)
+        internal static new ChatCompletionsResponseFormatJsonObject FromResponse(Response response)
         {
             using var document = JsonDocument.Parse(response.Content);
-            return DeserializeChatCompletionsResponseFormatJSON(document.RootElement);
+            return DeserializeChatCompletionsResponseFormatJsonObject(document.RootElement);
         }
 
         /// <summary> Convert into a <see cref="RequestContent"/>. </summary>
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJSON.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonObject.cs
similarity index 68%
rename from sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJSON.cs
rename to sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonObject.cs
index e52f9c3529f0..21669dc2fb87 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJSON.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonObject.cs
@@ -15,18 +15,18 @@ namespace Azure.AI.Inference
     /// Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
     /// via a system or user message.
     /// </summary>
-    public partial class ChatCompletionsResponseFormatJSON : ChatCompletionsResponseFormat
+    internal partial class ChatCompletionsResponseFormatJsonObject : ChatCompletionsResponseFormat
     {
-        /// <summary> Initializes a new instance of <see cref="ChatCompletionsResponseFormatJSON"/>. </summary>
-        public ChatCompletionsResponseFormatJSON()
+        /// <summary> Initializes a new instance of <see cref="ChatCompletionsResponseFormatJsonObject"/>. </summary>
+        public ChatCompletionsResponseFormatJsonObject()
         {
             Type = "json_object";
         }
 
-        /// <summary> Initializes a new instance of <see cref="ChatCompletionsResponseFormatJSON"/>. </summary>
+        /// <summary> Initializes a new instance of <see cref="ChatCompletionsResponseFormatJsonObject"/>. </summary>
         /// <param name="type"> The response format type to use for chat completions. </param>
         /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
-        internal ChatCompletionsResponseFormatJSON(string type, IDictionary<string, BinaryData> serializedAdditionalRawData) : base(type, serializedAdditionalRawData)
+        internal ChatCompletionsResponseFormatJsonObject(string type, IDictionary<string, BinaryData> serializedAdditionalRawData) : base(type, serializedAdditionalRawData)
         {
         }
     }
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonSchema.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonSchema.Serialization.cs
new file mode 100644
index 000000000000..c830d922a9cf
--- /dev/null
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonSchema.Serialization.cs
@@ -0,0 +1,134 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// <auto-generated/>
+
+#nullable disable
+
+using System;
+using System.ClientModel.Primitives;
+using System.Collections.Generic;
+using System.Text.Json;
+using Azure.Core;
+
+namespace Azure.AI.Inference
+{
+    internal partial class ChatCompletionsResponseFormatJsonSchema : IUtf8JsonSerializable, IJsonModel<ChatCompletionsResponseFormatJsonSchema>
+    {
+        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<ChatCompletionsResponseFormatJsonSchema>)this).Write(writer, ModelSerializationExtensions.WireOptions);
+
+        void IJsonModel<ChatCompletionsResponseFormatJsonSchema>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
+        {
+            writer.WriteStartObject();
+            JsonModelWriteCore(writer, options);
+            writer.WriteEndObject();
+        }
+
+        /// <param name="writer"> The JSON writer. </param>
+        /// <param name="options"> The client options for reading and writing models. </param>
+        protected override void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJsonSchema>)this).GetFormatFromOptions(options) : options.Format;
+            if (format != "J")
+            {
+                throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJsonSchema)} does not support writing '{format}' format.");
+            }
+
+            base.JsonModelWriteCore(writer, options);
+            writer.WritePropertyName("json_schema"u8);
+            writer.WriteObjectValue(JsonSchema, options);
+        }
+
+        ChatCompletionsResponseFormatJsonSchema IJsonModel<ChatCompletionsResponseFormatJsonSchema>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJsonSchema>)this).GetFormatFromOptions(options) : options.Format;
+            if (format != "J")
+            {
+                throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJsonSchema)} does not support reading '{format}' format.");
+            }
+
+            using JsonDocument document = JsonDocument.ParseValue(ref reader);
+            return DeserializeChatCompletionsResponseFormatJsonSchema(document.RootElement, options);
+        }
+
+        internal static ChatCompletionsResponseFormatJsonSchema DeserializeChatCompletionsResponseFormatJsonSchema(JsonElement element, ModelReaderWriterOptions options = null)
+        {
+            options ??= ModelSerializationExtensions.WireOptions;
+
+            if (element.ValueKind == JsonValueKind.Null)
+            {
+                return null;
+            }
+            ChatCompletionsResponseFormatJsonSchemaDefinition jsonSchema = default;
+            string type = default;
+            IDictionary<string, BinaryData> serializedAdditionalRawData = default;
+            Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
+            foreach (var property in element.EnumerateObject())
+            {
+                if (property.NameEquals("json_schema"u8))
+                {
+                    jsonSchema = ChatCompletionsResponseFormatJsonSchemaDefinition.DeserializeChatCompletionsResponseFormatJsonSchemaDefinition(property.Value, options);
+                    continue;
+                }
+                if (property.NameEquals("type"u8))
+                {
+                    type = property.Value.GetString();
+                    continue;
+                }
+                if (options.Format != "W")
+                {
+                    rawDataDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
+                }
+            }
+            serializedAdditionalRawData = rawDataDictionary;
+            return new ChatCompletionsResponseFormatJsonSchema(type, serializedAdditionalRawData, jsonSchema);
+        }
+
+        BinaryData IPersistableModel<ChatCompletionsResponseFormatJsonSchema>.Write(ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJsonSchema>)this).GetFormatFromOptions(options) : options.Format;
+
+            switch (format)
+            {
+                case "J":
+                    return ModelReaderWriter.Write(this, options);
+                default:
+                    throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJsonSchema)} does not support writing '{options.Format}' format.");
+            }
+        }
+
+        ChatCompletionsResponseFormatJsonSchema IPersistableModel<ChatCompletionsResponseFormatJsonSchema>.Create(BinaryData data, ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJsonSchema>)this).GetFormatFromOptions(options) : options.Format;
+
+            switch (format)
+            {
+                case "J":
+                    {
+                        using JsonDocument document = JsonDocument.Parse(data);
+                        return DeserializeChatCompletionsResponseFormatJsonSchema(document.RootElement, options);
+                    }
+                default:
+                    throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJsonSchema)} does not support reading '{options.Format}' format.");
+            }
+        }
+
+        string IPersistableModel<ChatCompletionsResponseFormatJsonSchema>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
+
+        /// <summary> Deserializes the model from a raw response. </summary>
+        /// <param name="response"> The response to deserialize the model from. </param>
+        internal static new ChatCompletionsResponseFormatJsonSchema FromResponse(Response response)
+        {
+            using var document = JsonDocument.Parse(response.Content);
+            return DeserializeChatCompletionsResponseFormatJsonSchema(document.RootElement);
+        }
+
+        /// <summary> Convert into a <see cref="RequestContent"/>. </summary>
+        internal override RequestContent ToRequestContent()
+        {
+            var content = new Utf8JsonRequestContent();
+            content.JsonWriter.WriteObjectValue(this, ModelSerializationExtensions.WireOptions);
+            return content;
+        }
+    }
+}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonSchema.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonSchema.cs
new file mode 100644
index 000000000000..f6932d2b4955
--- /dev/null
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonSchema.cs
@@ -0,0 +1,47 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// <auto-generated/>
+
+#nullable disable
+
+using System;
+using System.Collections.Generic;
+
+namespace Azure.AI.Inference
+{
+    /// <summary>
+    /// A response format for Chat Completions that restricts responses to emitting valid JSON objects, with a
+    /// JSON schema specified by the caller.
+    /// </summary>
+    internal partial class ChatCompletionsResponseFormatJsonSchema : ChatCompletionsResponseFormat
+    {
+        /// <summary> Initializes a new instance of <see cref="ChatCompletionsResponseFormatJsonSchema"/>. </summary>
+        /// <param name="jsonSchema"> The definition of the required JSON schema in the response, and associated metadata. </param>
+        /// <exception cref="ArgumentNullException"> <paramref name="jsonSchema"/> is null. </exception>
+        public ChatCompletionsResponseFormatJsonSchema(ChatCompletionsResponseFormatJsonSchemaDefinition jsonSchema)
+        {
+            Argument.AssertNotNull(jsonSchema, nameof(jsonSchema));
+
+            Type = "json_schema";
+            JsonSchema = jsonSchema;
+        }
+
+        /// <summary> Initializes a new instance of <see cref="ChatCompletionsResponseFormatJsonSchema"/>. </summary>
+        /// <param name="type"> The response format type to use for chat completions. </param>
+        /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
+        /// <param name="jsonSchema"> The definition of the required JSON schema in the response, and associated metadata. </param>
+        internal ChatCompletionsResponseFormatJsonSchema(string type, IDictionary<string, BinaryData> serializedAdditionalRawData, ChatCompletionsResponseFormatJsonSchemaDefinition jsonSchema) : base(type, serializedAdditionalRawData)
+        {
+            JsonSchema = jsonSchema;
+        }
+
+        /// <summary> Initializes a new instance of <see cref="ChatCompletionsResponseFormatJsonSchema"/> for deserialization. </summary>
+        internal ChatCompletionsResponseFormatJsonSchema()
+        {
+        }
+
+        /// <summary> The definition of the required JSON schema in the response, and associated metadata. </summary>
+        public ChatCompletionsResponseFormatJsonSchemaDefinition JsonSchema { get; }
+    }
+}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonSchemaDefinition.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonSchemaDefinition.Serialization.cs
new file mode 100644
index 000000000000..0701b54653a0
--- /dev/null
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonSchemaDefinition.Serialization.cs
@@ -0,0 +1,206 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// <auto-generated/>
+
+#nullable disable
+
+using System;
+using System.ClientModel.Primitives;
+using System.Collections.Generic;
+using System.Text.Json;
+using Azure.Core;
+
+namespace Azure.AI.Inference
+{
+    internal partial class ChatCompletionsResponseFormatJsonSchemaDefinition : IUtf8JsonSerializable, IJsonModel<ChatCompletionsResponseFormatJsonSchemaDefinition>
+    {
+        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<ChatCompletionsResponseFormatJsonSchemaDefinition>)this).Write(writer, ModelSerializationExtensions.WireOptions);
+
+        void IJsonModel<ChatCompletionsResponseFormatJsonSchemaDefinition>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
+        {
+            writer.WriteStartObject();
+            JsonModelWriteCore(writer, options);
+            writer.WriteEndObject();
+        }
+
+        /// <param name="writer"> The JSON writer. </param>
+        /// <param name="options"> The client options for reading and writing models. </param>
+        protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJsonSchemaDefinition>)this).GetFormatFromOptions(options) : options.Format;
+            if (format != "J")
+            {
+                throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJsonSchemaDefinition)} does not support writing '{format}' format.");
+            }
+
+            writer.WritePropertyName("name"u8);
+            writer.WriteStringValue(Name);
+            writer.WritePropertyName("schema"u8);
+            writer.WriteStartObject();
+            foreach (var item in Schema)
+            {
+                writer.WritePropertyName(item.Key);
+                if (item.Value == null)
+                {
+                    writer.WriteNullValue();
+                    continue;
+                }
+#if NET6_0_OR_GREATER
+				writer.WriteRawValue(item.Value);
+#else
+                using (JsonDocument document = JsonDocument.Parse(item.Value))
+                {
+                    JsonSerializer.Serialize(writer, document.RootElement);
+                }
+#endif
+            }
+            writer.WriteEndObject();
+            if (Optional.IsDefined(Description))
+            {
+                writer.WritePropertyName("description"u8);
+                writer.WriteStringValue(Description);
+            }
+            if (Optional.IsDefined(Strict))
+            {
+                writer.WritePropertyName("strict"u8);
+                writer.WriteBooleanValue(Strict.Value);
+            }
+            if (options.Format != "W" && _serializedAdditionalRawData != null)
+            {
+                foreach (var item in _serializedAdditionalRawData)
+                {
+                    writer.WritePropertyName(item.Key);
+#if NET6_0_OR_GREATER
+				writer.WriteRawValue(item.Value);
+#else
+                    using (JsonDocument document = JsonDocument.Parse(item.Value))
+                    {
+                        JsonSerializer.Serialize(writer, document.RootElement);
+                    }
+#endif
+                }
+            }
+        }
+
+        ChatCompletionsResponseFormatJsonSchemaDefinition IJsonModel<ChatCompletionsResponseFormatJsonSchemaDefinition>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJsonSchemaDefinition>)this).GetFormatFromOptions(options) : options.Format;
+            if (format != "J")
+            {
+                throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJsonSchemaDefinition)} does not support reading '{format}' format.");
+            }
+
+            using JsonDocument document = JsonDocument.ParseValue(ref reader);
+            return DeserializeChatCompletionsResponseFormatJsonSchemaDefinition(document.RootElement, options);
+        }
+
+        internal static ChatCompletionsResponseFormatJsonSchemaDefinition DeserializeChatCompletionsResponseFormatJsonSchemaDefinition(JsonElement element, ModelReaderWriterOptions options = null)
+        {
+            options ??= ModelSerializationExtensions.WireOptions;
+
+            if (element.ValueKind == JsonValueKind.Null)
+            {
+                return null;
+            }
+            string name = default;
+            IDictionary<string, BinaryData> schema = default;
+            string description = default;
+            bool? strict = default;
+            IDictionary<string, BinaryData> serializedAdditionalRawData = default;
+            Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
+            foreach (var property in element.EnumerateObject())
+            {
+                if (property.NameEquals("name"u8))
+                {
+                    name = property.Value.GetString();
+                    continue;
+                }
+                if (property.NameEquals("schema"u8))
+                {
+                    Dictionary<string, BinaryData> dictionary = new Dictionary<string, BinaryData>();
+                    foreach (var property0 in property.Value.EnumerateObject())
+                    {
+                        if (property0.Value.ValueKind == JsonValueKind.Null)
+                        {
+                            dictionary.Add(property0.Name, null);
+                        }
+                        else
+                        {
+                            dictionary.Add(property0.Name, BinaryData.FromString(property0.Value.GetRawText()));
+                        }
+                    }
+                    schema = dictionary;
+                    continue;
+                }
+                if (property.NameEquals("description"u8))
+                {
+                    description = property.Value.GetString();
+                    continue;
+                }
+                if (property.NameEquals("strict"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    strict = property.Value.GetBoolean();
+                    continue;
+                }
+                if (options.Format != "W")
+                {
+                    rawDataDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
+                }
+            }
+            serializedAdditionalRawData = rawDataDictionary;
+            return new ChatCompletionsResponseFormatJsonSchemaDefinition(name, schema, description, strict, serializedAdditionalRawData);
+        }
+
+        BinaryData IPersistableModel<ChatCompletionsResponseFormatJsonSchemaDefinition>.Write(ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJsonSchemaDefinition>)this).GetFormatFromOptions(options) : options.Format;
+
+            switch (format)
+            {
+                case "J":
+                    return ModelReaderWriter.Write(this, options);
+                default:
+                    throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJsonSchemaDefinition)} does not support writing '{options.Format}' format.");
+            }
+        }
+
+        ChatCompletionsResponseFormatJsonSchemaDefinition IPersistableModel<ChatCompletionsResponseFormatJsonSchemaDefinition>.Create(BinaryData data, ModelReaderWriterOptions options)
+        {
+            var format = options.Format == "W" ? ((IPersistableModel<ChatCompletionsResponseFormatJsonSchemaDefinition>)this).GetFormatFromOptions(options) : options.Format;
+
+            switch (format)
+            {
+                case "J":
+                    {
+                        using JsonDocument document = JsonDocument.Parse(data);
+                        return DeserializeChatCompletionsResponseFormatJsonSchemaDefinition(document.RootElement, options);
+                    }
+                default:
+                    throw new FormatException($"The model {nameof(ChatCompletionsResponseFormatJsonSchemaDefinition)} does not support reading '{options.Format}' format.");
+            }
+        }
+
+        string IPersistableModel<ChatCompletionsResponseFormatJsonSchemaDefinition>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
+
+        /// <summary> Deserializes the model from a raw response. </summary>
+        /// <param name="response"> The response to deserialize the model from. </param>
+        internal static ChatCompletionsResponseFormatJsonSchemaDefinition FromResponse(Response response)
+        {
+            using var document = JsonDocument.Parse(response.Content);
+            return DeserializeChatCompletionsResponseFormatJsonSchemaDefinition(document.RootElement);
+        }
+
+        /// <summary> Convert into a <see cref="RequestContent"/>. </summary>
+        internal virtual RequestContent ToRequestContent()
+        {
+            var content = new Utf8JsonRequestContent();
+            content.JsonWriter.WriteObjectValue(this, ModelSerializationExtensions.WireOptions);
+            return content;
+        }
+    }
+}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonSchemaDefinition.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonSchemaDefinition.cs
new file mode 100644
index 000000000000..749179704791
--- /dev/null
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatJsonSchemaDefinition.cs
@@ -0,0 +1,142 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// <auto-generated/>
+
+#nullable disable
+
+using System;
+using System.Collections.Generic;
+
+namespace Azure.AI.Inference
+{
+    /// <summary>
+    /// Defines the response format for chat completions as JSON with a given schema.
+    /// The AI model will need to adhere to this schema when generating completions.
+    /// </summary>
+    internal partial class ChatCompletionsResponseFormatJsonSchemaDefinition
+    {
+        /// <summary>
+        /// Keeps track of any properties unknown to the library.
+        /// <para>
+        /// To assign an object to the value of this property use <see cref="BinaryData.FromObjectAsJson{T}(T, System.Text.Json.JsonSerializerOptions?)"/>.
+        /// </para>
+        /// <para>
+        /// To assign an already formatted json string to this property use <see cref="BinaryData.FromString(string)"/>.
+        /// </para>
+        /// <para>
+        /// Examples:
+        /// <list type="bullet">
+        /// <item>
+        /// <term>BinaryData.FromObjectAsJson("foo")</term>
+        /// <description>Creates a payload of "foo".</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromString("\"foo\"")</term>
+        /// <description>Creates a payload of "foo".</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromObjectAsJson(new { key = "value" })</term>
+        /// <description>Creates a payload of { "key": "value" }.</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromString("{\"key\": \"value\"}")</term>
+        /// <description>Creates a payload of { "key": "value" }.</description>
+        /// </item>
+        /// </list>
+        /// </para>
+        /// </summary>
+        private IDictionary<string, BinaryData> _serializedAdditionalRawData;
+
+        /// <summary> Initializes a new instance of <see cref="ChatCompletionsResponseFormatJsonSchemaDefinition"/>. </summary>
+        /// <param name="name"> A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. </param>
+        /// <param name="schema">
+        /// The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema.
+        /// Note that AI models usually only support a subset of the keywords defined by JSON schema.
+        /// Consult your AI model documentation to determine what is supported.
+        /// </param>
+        /// <exception cref="ArgumentNullException"> <paramref name="name"/> or <paramref name="schema"/> is null. </exception>
+        public ChatCompletionsResponseFormatJsonSchemaDefinition(string name, IDictionary<string, BinaryData> schema)
+        {
+            Argument.AssertNotNull(name, nameof(name));
+            Argument.AssertNotNull(schema, nameof(schema));
+
+            Name = name;
+            Schema = schema;
+        }
+
+        /// <summary> Initializes a new instance of <see cref="ChatCompletionsResponseFormatJsonSchemaDefinition"/>. </summary>
+        /// <param name="name"> A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. </param>
+        /// <param name="schema">
+        /// The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema.
+        /// Note that AI models usually only support a subset of the keywords defined by JSON schema.
+        /// Consult your AI model documentation to determine what is supported.
+        /// </param>
+        /// <param name="description"> A description of the response format, used by the AI model to determine how to generate responses in this format. </param>
+        /// <param name="strict">
+        /// If set to true, the service will error out if the provided JSON schema contains keywords
+        /// not supported by the AI model. An example of such keyword may be `maxLength` for JSON type `string`.
+        /// If false, and the provided JSON schema contains keywords not supported by the AI model,
+        /// the AI model will not error out. Instead it will ignore the unsupported keywords.
+        /// </param>
+        /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
+        internal ChatCompletionsResponseFormatJsonSchemaDefinition(string name, IDictionary<string, BinaryData> schema, string description, bool? strict, IDictionary<string, BinaryData> serializedAdditionalRawData)
+        {
+            Name = name;
+            Schema = schema;
+            Description = description;
+            Strict = strict;
+            _serializedAdditionalRawData = serializedAdditionalRawData;
+        }
+
+        /// <summary> Initializes a new instance of <see cref="ChatCompletionsResponseFormatJsonSchemaDefinition"/> for deserialization. </summary>
+        internal ChatCompletionsResponseFormatJsonSchemaDefinition()
+        {
+        }
+
+        /// <summary> A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. </summary>
+        public string Name { get; }
+        /// <summary>
+        /// The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema.
+        /// Note that AI models usually only support a subset of the keywords defined by JSON schema.
+        /// Consult your AI model documentation to determine what is supported.
+        /// <para>
+        /// To assign an object to the value of this property use <see cref="BinaryData.FromObjectAsJson{T}(T, System.Text.Json.JsonSerializerOptions?)"/>.
+        /// </para>
+        /// <para>
+        /// To assign an already formatted json string to this property use <see cref="BinaryData.FromString(string)"/>.
+        /// </para>
+        /// <para>
+        /// Examples:
+        /// <list type="bullet">
+        /// <item>
+        /// <term>BinaryData.FromObjectAsJson("foo")</term>
+        /// <description>Creates a payload of "foo".</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromString("\"foo\"")</term>
+        /// <description>Creates a payload of "foo".</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromObjectAsJson(new { key = "value" })</term>
+        /// <description>Creates a payload of { "key": "value" }.</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromString("{\"key\": \"value\"}")</term>
+        /// <description>Creates a payload of { "key": "value" }.</description>
+        /// </item>
+        /// </list>
+        /// </para>
+        /// </summary>
+        public IDictionary<string, BinaryData> Schema { get; }
+        /// <summary> A description of the response format, used by the AI model to determine how to generate responses in this format. </summary>
+        public string Description { get; set; }
+        /// <summary>
+        /// If set to true, the service will error out if the provided JSON schema contains keywords
+        /// not supported by the AI model. An example of such keyword may be `maxLength` for JSON type `string`.
+        /// If false, and the provided JSON schema contains keywords not supported by the AI model,
+        /// the AI model will not error out. Instead it will ignore the unsupported keywords.
+        /// </summary>
+        public bool? Strict { get; set; }
+    }
+}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatText.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatText.Serialization.cs
index 338a5e605290..30bfe39f4c0f 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatText.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatText.Serialization.cs
@@ -13,7 +13,7 @@
 
 namespace Azure.AI.Inference
 {
-    public partial class ChatCompletionsResponseFormatText : IUtf8JsonSerializable, IJsonModel<ChatCompletionsResponseFormatText>
+    internal partial class ChatCompletionsResponseFormatText : IUtf8JsonSerializable, IJsonModel<ChatCompletionsResponseFormatText>
     {
         void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<ChatCompletionsResponseFormatText>)this).Write(writer, ModelSerializationExtensions.WireOptions);
 
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatText.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatText.cs
index e4df6128fe6c..6ac05f84ce3e 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatText.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatCompletionsResponseFormatText.cs
@@ -11,7 +11,7 @@
 namespace Azure.AI.Inference
 {
     /// <summary> A response format for Chat Completions that emits text responses. This is the default response format. </summary>
-    public partial class ChatCompletionsResponseFormatText : ChatCompletionsResponseFormat
+    internal partial class ChatCompletionsResponseFormatText : ChatCompletionsResponseFormat
     {
         /// <summary> Initializes a new instance of <see cref="ChatCompletionsResponseFormatText"/>. </summary>
         public ChatCompletionsResponseFormatText()
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbedRequest.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageAudioContentItem.Serialization.cs
similarity index 50%
rename from sdk/ai/Azure.AI.Inference/src/Generated/EmbedRequest.Serialization.cs
rename to sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageAudioContentItem.Serialization.cs
index db940a8c92d0..bdc18645059e 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbedRequest.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageAudioContentItem.Serialization.cs
@@ -13,11 +13,11 @@
 
 namespace Azure.AI.Inference
 {
-    internal partial class EmbedRequest : IUtf8JsonSerializable, IJsonModel<EmbedRequest>
+    public partial class ChatMessageAudioContentItem : IUtf8JsonSerializable, IJsonModel<ChatMessageAudioContentItem>
     {
-        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<EmbedRequest>)this).Write(writer, ModelSerializationExtensions.WireOptions);
+        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<ChatMessageAudioContentItem>)this).Write(writer, ModelSerializationExtensions.WireOptions);
 
-        void IJsonModel<EmbedRequest>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
+        void IJsonModel<ChatMessageAudioContentItem>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
         {
             writer.WriteStartObject();
             JsonModelWriteCore(writer, options);
@@ -26,46 +26,32 @@ void IJsonModel<EmbedRequest>.Write(Utf8JsonWriter writer, ModelReaderWriterOpti
 
         /// <param name="writer"> The JSON writer. </param>
         /// <param name="options"> The client options for reading and writing models. </param>
-        protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions options)
+        protected override void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions options)
         {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbedRequest>)this).GetFormatFromOptions(options) : options.Format;
+            var format = options.Format == "W" ? ((IPersistableModel<ChatMessageAudioContentItem>)this).GetFormatFromOptions(options) : options.Format;
             if (format != "J")
             {
-                throw new FormatException($"The model {nameof(EmbedRequest)} does not support writing '{format}' format.");
+                throw new FormatException($"The model {nameof(ChatMessageAudioContentItem)} does not support writing '{format}' format.");
             }
 
-            writer.WritePropertyName("embeddingsOptions"u8);
-            writer.WriteObjectValue(EmbeddingsOptions, options);
-            if (options.Format != "W" && _serializedAdditionalRawData != null)
-            {
-                foreach (var item in _serializedAdditionalRawData)
-                {
-                    writer.WritePropertyName(item.Key);
-#if NET6_0_OR_GREATER
-				writer.WriteRawValue(item.Value);
-#else
-                    using (JsonDocument document = JsonDocument.Parse(item.Value))
-                    {
-                        JsonSerializer.Serialize(writer, document.RootElement);
-                    }
-#endif
-                }
-            }
+            base.JsonModelWriteCore(writer, options);
+            writer.WritePropertyName("input_audio"u8);
+            writer.WriteObjectValue(InputAudio, options);
         }
 
-        EmbedRequest IJsonModel<EmbedRequest>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
+        ChatMessageAudioContentItem IJsonModel<ChatMessageAudioContentItem>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
         {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbedRequest>)this).GetFormatFromOptions(options) : options.Format;
+            var format = options.Format == "W" ? ((IPersistableModel<ChatMessageAudioContentItem>)this).GetFormatFromOptions(options) : options.Format;
             if (format != "J")
             {
-                throw new FormatException($"The model {nameof(EmbedRequest)} does not support reading '{format}' format.");
+                throw new FormatException($"The model {nameof(ChatMessageAudioContentItem)} does not support reading '{format}' format.");
             }
 
             using JsonDocument document = JsonDocument.ParseValue(ref reader);
-            return DeserializeEmbedRequest(document.RootElement, options);
+            return DeserializeChatMessageAudioContentItem(document.RootElement, options);
         }
 
-        internal static EmbedRequest DeserializeEmbedRequest(JsonElement element, ModelReaderWriterOptions options = null)
+        internal static ChatMessageAudioContentItem DeserializeChatMessageAudioContentItem(JsonElement element, ModelReaderWriterOptions options = null)
         {
             options ??= ModelSerializationExtensions.WireOptions;
 
@@ -73,14 +59,20 @@ internal static EmbedRequest DeserializeEmbedRequest(JsonElement element, ModelR
             {
                 return null;
             }
-            EmbeddingsOptions embeddingsOptions = default;
+            ChatMessageInputAudio inputAudio = default;
+            string type = default;
             IDictionary<string, BinaryData> serializedAdditionalRawData = default;
             Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
             foreach (var property in element.EnumerateObject())
             {
-                if (property.NameEquals("embeddingsOptions"u8))
+                if (property.NameEquals("input_audio"u8))
+                {
+                    inputAudio = ChatMessageInputAudio.DeserializeChatMessageInputAudio(property.Value, options);
+                    continue;
+                }
+                if (property.NameEquals("type"u8))
                 {
-                    embeddingsOptions = EmbeddingsOptions.DeserializeEmbeddingsOptions(property.Value, options);
+                    type = property.Value.GetString();
                     continue;
                 }
                 if (options.Format != "W")
@@ -89,50 +81,50 @@ internal static EmbedRequest DeserializeEmbedRequest(JsonElement element, ModelR
                 }
             }
             serializedAdditionalRawData = rawDataDictionary;
-            return new EmbedRequest(embeddingsOptions, serializedAdditionalRawData);
+            return new ChatMessageAudioContentItem(type, serializedAdditionalRawData, inputAudio);
         }
 
-        BinaryData IPersistableModel<EmbedRequest>.Write(ModelReaderWriterOptions options)
+        BinaryData IPersistableModel<ChatMessageAudioContentItem>.Write(ModelReaderWriterOptions options)
         {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbedRequest>)this).GetFormatFromOptions(options) : options.Format;
+            var format = options.Format == "W" ? ((IPersistableModel<ChatMessageAudioContentItem>)this).GetFormatFromOptions(options) : options.Format;
 
             switch (format)
             {
                 case "J":
                     return ModelReaderWriter.Write(this, options);
                 default:
-                    throw new FormatException($"The model {nameof(EmbedRequest)} does not support writing '{options.Format}' format.");
+                    throw new FormatException($"The model {nameof(ChatMessageAudioContentItem)} does not support writing '{options.Format}' format.");
             }
         }
 
-        EmbedRequest IPersistableModel<EmbedRequest>.Create(BinaryData data, ModelReaderWriterOptions options)
+        ChatMessageAudioContentItem IPersistableModel<ChatMessageAudioContentItem>.Create(BinaryData data, ModelReaderWriterOptions options)
         {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbedRequest>)this).GetFormatFromOptions(options) : options.Format;
+            var format = options.Format == "W" ? ((IPersistableModel<ChatMessageAudioContentItem>)this).GetFormatFromOptions(options) : options.Format;
 
             switch (format)
             {
                 case "J":
                     {
                         using JsonDocument document = JsonDocument.Parse(data);
-                        return DeserializeEmbedRequest(document.RootElement, options);
+                        return DeserializeChatMessageAudioContentItem(document.RootElement, options);
                     }
                 default:
-                    throw new FormatException($"The model {nameof(EmbedRequest)} does not support reading '{options.Format}' format.");
+                    throw new FormatException($"The model {nameof(ChatMessageAudioContentItem)} does not support reading '{options.Format}' format.");
             }
         }
 
-        string IPersistableModel<EmbedRequest>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
+        string IPersistableModel<ChatMessageAudioContentItem>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
 
         /// <summary> Deserializes the model from a raw response. </summary>
         /// <param name="response"> The response to deserialize the model from. </param>
-        internal static EmbedRequest FromResponse(Response response)
+        internal static new ChatMessageAudioContentItem FromResponse(Response response)
         {
             using var document = JsonDocument.Parse(response.Content);
-            return DeserializeEmbedRequest(document.RootElement);
+            return DeserializeChatMessageAudioContentItem(document.RootElement);
         }
 
         /// <summary> Convert into a <see cref="RequestContent"/>. </summary>
-        internal virtual RequestContent ToRequestContent()
+        internal override RequestContent ToRequestContent()
         {
             var content = new Utf8JsonRequestContent();
             content.JsonWriter.WriteObjectValue(this, ModelSerializationExtensions.WireOptions);
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageAudioContentItem.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageAudioContentItem.cs
new file mode 100644
index 000000000000..c92893f6ee43
--- /dev/null
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageAudioContentItem.cs
@@ -0,0 +1,44 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+// <auto-generated/>
+
+#nullable disable
+
+using System;
+using System.Collections.Generic;
+
+namespace Azure.AI.Inference
+{
+    /// <summary> A structured chat content item containing an audio content. </summary>
+    public partial class ChatMessageAudioContentItem : ChatMessageContentItem
+    {
+        /// <summary> Initializes a new instance of <see cref="ChatMessageAudioContentItem"/>. </summary>
+        /// <param name="inputAudio"> The details of the input audio. </param>
+        /// <exception cref="ArgumentNullException"> <paramref name="inputAudio"/> is null. </exception>
+        public ChatMessageAudioContentItem(ChatMessageInputAudio inputAudio)
+        {
+            Argument.AssertNotNull(inputAudio, nameof(inputAudio));
+
+            Type = "input_audio";
+            InputAudio = inputAudio;
+        }
+
+        /// <summary> Initializes a new instance of <see cref="ChatMessageAudioContentItem"/>. </summary>
+        /// <param name="type"> The discriminated object type. </param>
+        /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
+        /// <param name="inputAudio"> The details of the input audio. </param>
+        internal ChatMessageAudioContentItem(string type, IDictionary<string, BinaryData> serializedAdditionalRawData, ChatMessageInputAudio inputAudio) : base(type, serializedAdditionalRawData)
+        {
+            InputAudio = inputAudio;
+        }
+
+        /// <summary> Initializes a new instance of <see cref="ChatMessageAudioContentItem"/> for deserialization. </summary>
+        internal ChatMessageAudioContentItem()
+        {
+        }
+
+        /// <summary> The details of the input audio. </summary>
+        public ChatMessageInputAudio InputAudio { get; }
+    }
+}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageContentItem.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageContentItem.Serialization.cs
index 11e7c365f14e..3a02f4155303 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageContentItem.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageContentItem.Serialization.cs
@@ -78,6 +78,7 @@ internal static ChatMessageContentItem DeserializeChatMessageContentItem(JsonEle
                 switch (discriminator.GetString())
                 {
                     case "image_url": return ChatMessageImageContentItem.DeserializeChatMessageImageContentItem(element, options);
+                    case "input_audio": return ChatMessageAudioContentItem.DeserializeChatMessageAudioContentItem(element, options);
                     case "text": return ChatMessageTextContentItem.DeserializeChatMessageTextContentItem(element, options);
                 }
             }
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageContentItem.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageContentItem.cs
index 3c1131052b9b..2c314658a47f 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageContentItem.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageContentItem.cs
@@ -13,7 +13,7 @@ namespace Azure.AI.Inference
     /// <summary>
     /// An abstract representation of a structured content item within a chat message.
     /// Please note <see cref="ChatMessageContentItem"/> is the base class. According to the scenario, a derived class of the base class might need to be assigned here, or this property needs to be casted to one of the possible derived classes.
-    /// The available derived classes include <see cref="ChatMessageImageContentItem"/> and <see cref="ChatMessageTextContentItem"/>.
+    /// The available derived classes include <see cref="ChatMessageImageContentItem"/>, <see cref="ChatMessageAudioContentItem"/> and <see cref="ChatMessageTextContentItem"/>.
     /// </summary>
     public abstract partial class ChatMessageContentItem
     {
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsUsage.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageInputAudio.Serialization.cs
similarity index 59%
rename from sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsUsage.Serialization.cs
rename to sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageInputAudio.Serialization.cs
index 0a344e8a3e87..55c7f032a84b 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsUsage.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageInputAudio.Serialization.cs
@@ -13,11 +13,11 @@
 
 namespace Azure.AI.Inference
 {
-    public partial class EmbeddingsUsage : IUtf8JsonSerializable, IJsonModel<EmbeddingsUsage>
+    public partial class ChatMessageInputAudio : IUtf8JsonSerializable, IJsonModel<ChatMessageInputAudio>
     {
-        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<EmbeddingsUsage>)this).Write(writer, ModelSerializationExtensions.WireOptions);
+        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<ChatMessageInputAudio>)this).Write(writer, ModelSerializationExtensions.WireOptions);
 
-        void IJsonModel<EmbeddingsUsage>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
+        void IJsonModel<ChatMessageInputAudio>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
         {
             writer.WriteStartObject();
             JsonModelWriteCore(writer, options);
@@ -28,16 +28,16 @@ void IJsonModel<EmbeddingsUsage>.Write(Utf8JsonWriter writer, ModelReaderWriterO
         /// <param name="options"> The client options for reading and writing models. </param>
         protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions options)
         {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingsUsage>)this).GetFormatFromOptions(options) : options.Format;
+            var format = options.Format == "W" ? ((IPersistableModel<ChatMessageInputAudio>)this).GetFormatFromOptions(options) : options.Format;
             if (format != "J")
             {
-                throw new FormatException($"The model {nameof(EmbeddingsUsage)} does not support writing '{format}' format.");
+                throw new FormatException($"The model {nameof(ChatMessageInputAudio)} does not support writing '{format}' format.");
             }
 
-            writer.WritePropertyName("prompt_tokens"u8);
-            writer.WriteNumberValue(PromptTokens);
-            writer.WritePropertyName("total_tokens"u8);
-            writer.WriteNumberValue(TotalTokens);
+            writer.WritePropertyName("data"u8);
+            writer.WriteStringValue(Data);
+            writer.WritePropertyName("format"u8);
+            writer.WriteStringValue(Format.ToString());
             if (options.Format != "W" && _serializedAdditionalRawData != null)
             {
                 foreach (var item in _serializedAdditionalRawData)
@@ -55,19 +55,19 @@ protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWrit
             }
         }
 
-        EmbeddingsUsage IJsonModel<EmbeddingsUsage>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
+        ChatMessageInputAudio IJsonModel<ChatMessageInputAudio>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
         {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingsUsage>)this).GetFormatFromOptions(options) : options.Format;
+            var format = options.Format == "W" ? ((IPersistableModel<ChatMessageInputAudio>)this).GetFormatFromOptions(options) : options.Format;
             if (format != "J")
             {
-                throw new FormatException($"The model {nameof(EmbeddingsUsage)} does not support reading '{format}' format.");
+                throw new FormatException($"The model {nameof(ChatMessageInputAudio)} does not support reading '{format}' format.");
             }
 
             using JsonDocument document = JsonDocument.ParseValue(ref reader);
-            return DeserializeEmbeddingsUsage(document.RootElement, options);
+            return DeserializeChatMessageInputAudio(document.RootElement, options);
         }
 
-        internal static EmbeddingsUsage DeserializeEmbeddingsUsage(JsonElement element, ModelReaderWriterOptions options = null)
+        internal static ChatMessageInputAudio DeserializeChatMessageInputAudio(JsonElement element, ModelReaderWriterOptions options = null)
         {
             options ??= ModelSerializationExtensions.WireOptions;
 
@@ -75,20 +75,20 @@ internal static EmbeddingsUsage DeserializeEmbeddingsUsage(JsonElement element,
             {
                 return null;
             }
-            int promptTokens = default;
-            int totalTokens = default;
+            string data = default;
+            AudioContentFormat format = default;
             IDictionary<string, BinaryData> serializedAdditionalRawData = default;
             Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
             foreach (var property in element.EnumerateObject())
             {
-                if (property.NameEquals("prompt_tokens"u8))
+                if (property.NameEquals("data"u8))
                 {
-                    promptTokens = property.Value.GetInt32();
+                    data = property.Value.GetString();
                     continue;
                 }
-                if (property.NameEquals("total_tokens"u8))
+                if (property.NameEquals("format"u8))
                 {
-                    totalTokens = property.Value.GetInt32();
+                    format = new AudioContentFormat(property.Value.GetString());
                     continue;
                 }
                 if (options.Format != "W")
@@ -97,46 +97,46 @@ internal static EmbeddingsUsage DeserializeEmbeddingsUsage(JsonElement element,
                 }
             }
             serializedAdditionalRawData = rawDataDictionary;
-            return new EmbeddingsUsage(promptTokens, totalTokens, serializedAdditionalRawData);
+            return new ChatMessageInputAudio(data, format, serializedAdditionalRawData);
         }
 
-        BinaryData IPersistableModel<EmbeddingsUsage>.Write(ModelReaderWriterOptions options)
+        BinaryData IPersistableModel<ChatMessageInputAudio>.Write(ModelReaderWriterOptions options)
         {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingsUsage>)this).GetFormatFromOptions(options) : options.Format;
+            var format = options.Format == "W" ? ((IPersistableModel<ChatMessageInputAudio>)this).GetFormatFromOptions(options) : options.Format;
 
             switch (format)
             {
                 case "J":
                     return ModelReaderWriter.Write(this, options);
                 default:
-                    throw new FormatException($"The model {nameof(EmbeddingsUsage)} does not support writing '{options.Format}' format.");
+                    throw new FormatException($"The model {nameof(ChatMessageInputAudio)} does not support writing '{options.Format}' format.");
             }
         }
 
-        EmbeddingsUsage IPersistableModel<EmbeddingsUsage>.Create(BinaryData data, ModelReaderWriterOptions options)
+        ChatMessageInputAudio IPersistableModel<ChatMessageInputAudio>.Create(BinaryData data, ModelReaderWriterOptions options)
         {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingsUsage>)this).GetFormatFromOptions(options) : options.Format;
+            var format = options.Format == "W" ? ((IPersistableModel<ChatMessageInputAudio>)this).GetFormatFromOptions(options) : options.Format;
 
             switch (format)
             {
                 case "J":
                     {
                         using JsonDocument document = JsonDocument.Parse(data);
-                        return DeserializeEmbeddingsUsage(document.RootElement, options);
+                        return DeserializeChatMessageInputAudio(document.RootElement, options);
                     }
                 default:
-                    throw new FormatException($"The model {nameof(EmbeddingsUsage)} does not support reading '{options.Format}' format.");
+                    throw new FormatException($"The model {nameof(ChatMessageInputAudio)} does not support reading '{options.Format}' format.");
             }
         }
 
-        string IPersistableModel<EmbeddingsUsage>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
+        string IPersistableModel<ChatMessageInputAudio>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
 
         /// <summary> Deserializes the model from a raw response. </summary>
         /// <param name="response"> The response to deserialize the model from. </param>
-        internal static EmbeddingsUsage FromResponse(Response response)
+        internal static ChatMessageInputAudio FromResponse(Response response)
         {
             using var document = JsonDocument.Parse(response.Content);
-            return DeserializeEmbeddingsUsage(document.RootElement);
+            return DeserializeChatMessageInputAudio(document.RootElement);
         }
 
         /// <summary> Convert into a <see cref="RequestContent"/>. </summary>
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbedRequest.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageInputAudio.cs
similarity index 57%
rename from sdk/ai/Azure.AI.Inference/src/Generated/EmbedRequest.cs
rename to sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageInputAudio.cs
index 4cb73d47f7cd..008c3db1d9f3 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbedRequest.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatMessageInputAudio.cs
@@ -10,8 +10,8 @@
 
 namespace Azure.AI.Inference
 {
-    /// <summary> The EmbedRequest. </summary>
-    internal partial class EmbedRequest
+    /// <summary> The details of an audio chat message content part. </summary>
+    public partial class ChatMessageInputAudio
     {
         /// <summary>
         /// Keeps track of any properties unknown to the library.
@@ -45,31 +45,37 @@ internal partial class EmbedRequest
         /// </summary>
         private IDictionary<string, BinaryData> _serializedAdditionalRawData;
 
-        /// <summary> Initializes a new instance of <see cref="EmbedRequest"/>. </summary>
-        /// <param name="embeddingsOptions"></param>
-        /// <exception cref="ArgumentNullException"> <paramref name="embeddingsOptions"/> is null. </exception>
-        internal EmbedRequest(EmbeddingsOptions embeddingsOptions)
+        /// <summary> Initializes a new instance of <see cref="ChatMessageInputAudio"/>. </summary>
+        /// <param name="data"> Base64 encoded audio data. </param>
+        /// <param name="format"> The audio format of the audio content. </param>
+        /// <exception cref="ArgumentNullException"> <paramref name="data"/> is null. </exception>
+        public ChatMessageInputAudio(string data, AudioContentFormat format)
         {
-            Argument.AssertNotNull(embeddingsOptions, nameof(embeddingsOptions));
+            Argument.AssertNotNull(data, nameof(data));
 
-            EmbeddingsOptions = embeddingsOptions;
+            Data = data;
+            Format = format;
         }
 
-        /// <summary> Initializes a new instance of <see cref="EmbedRequest"/>. </summary>
-        /// <param name="embeddingsOptions"></param>
+        /// <summary> Initializes a new instance of <see cref="ChatMessageInputAudio"/>. </summary>
+        /// <param name="data"> Base64 encoded audio data. </param>
+        /// <param name="format"> The audio format of the audio content. </param>
         /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
-        internal EmbedRequest(EmbeddingsOptions embeddingsOptions, IDictionary<string, BinaryData> serializedAdditionalRawData)
+        internal ChatMessageInputAudio(string data, AudioContentFormat format, IDictionary<string, BinaryData> serializedAdditionalRawData)
         {
-            EmbeddingsOptions = embeddingsOptions;
+            Data = data;
+            Format = format;
             _serializedAdditionalRawData = serializedAdditionalRawData;
         }
 
-        /// <summary> Initializes a new instance of <see cref="EmbedRequest"/> for deserialization. </summary>
-        internal EmbedRequest()
+        /// <summary> Initializes a new instance of <see cref="ChatMessageInputAudio"/> for deserialization. </summary>
+        internal ChatMessageInputAudio()
         {
         }
 
-        /// <summary> Gets the embeddings options. </summary>
-        public EmbeddingsOptions EmbeddingsOptions { get; }
+        /// <summary> Base64 encoded audio data. </summary>
+        public string Data { get; }
+        /// <summary> The audio format of the audio content. </summary>
+        public AudioContentFormat Format { get; }
     }
 }
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatRequestAssistantMessage.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatRequestAssistantMessage.Serialization.cs
index 76ab70699d71..eb2393dae82f 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatRequestAssistantMessage.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatRequestAssistantMessage.Serialization.cs
@@ -37,15 +37,8 @@ protected override void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWri
             base.JsonModelWriteCore(writer, options);
             if (Optional.IsDefined(Content))
             {
-                if (Content != null)
-                {
-                    writer.WritePropertyName("content"u8);
-                    writer.WriteStringValue(Content);
-                }
-                else
-                {
-                    writer.WriteNull("content");
-                }
+                writer.WritePropertyName("content"u8);
+                writer.WriteStringValue(Content);
             }
             if (Optional.IsCollectionDefined(ToolCalls))
             {
@@ -88,11 +81,6 @@ internal static ChatRequestAssistantMessage DeserializeChatRequestAssistantMessa
             {
                 if (property.NameEquals("content"u8))
                 {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        content = null;
-                        continue;
-                    }
                     content = property.Value.GetString();
                     continue;
                 }
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatRequestToolMessage.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatRequestToolMessage.Serialization.cs
index cb137da445a7..8bc691ac2a0c 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatRequestToolMessage.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatRequestToolMessage.Serialization.cs
@@ -35,15 +35,11 @@ protected override void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWri
             }
 
             base.JsonModelWriteCore(writer, options);
-            if (Content != null)
+            if (Optional.IsDefined(Content))
             {
                 writer.WritePropertyName("content"u8);
                 writer.WriteStringValue(Content);
             }
-            else
-            {
-                writer.WriteNull("content");
-            }
             writer.WritePropertyName("tool_call_id"u8);
             writer.WriteStringValue(ToolCallId);
         }
@@ -77,11 +73,6 @@ internal static ChatRequestToolMessage DeserializeChatRequestToolMessage(JsonEle
             {
                 if (property.NameEquals("content"u8))
                 {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        content = null;
-                        continue;
-                    }
                     content = property.Value.GetString();
                     continue;
                 }
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ChatRequestToolMessage.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ChatRequestToolMessage.cs
index 0d848616d078..d496a231572f 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ChatRequestToolMessage.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ChatRequestToolMessage.cs
@@ -14,15 +14,13 @@ namespace Azure.AI.Inference
     public partial class ChatRequestToolMessage : ChatRequestMessage
     {
         /// <summary> Initializes a new instance of <see cref="ChatRequestToolMessage"/>. </summary>
-        /// <param name="content"> The content of the message. </param>
         /// <param name="toolCallId"> The ID of the tool call resolved by the provided content. </param>
         /// <exception cref="ArgumentNullException"> <paramref name="toolCallId"/> is null. </exception>
-        public ChatRequestToolMessage(string content, string toolCallId)
+        public ChatRequestToolMessage(string toolCallId)
         {
             Argument.AssertNotNull(toolCallId, nameof(toolCallId));
 
             Role = ChatRole.Tool;
-            Content = content;
             ToolCallId = toolCallId;
         }
 
@@ -43,7 +41,7 @@ internal ChatRequestToolMessage()
         }
 
         /// <summary> The content of the message. </summary>
-        public string Content { get; }
+        public string Content { get; set; }
         /// <summary> The ID of the tool call resolved by the provided content. </summary>
         public string ToolCallId { get; }
     }
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/CompleteRequest.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/CompleteRequest.Serialization.cs
index 5b75cc0ab4da..f5a9dc2847bf 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/CompleteRequest.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/CompleteRequest.Serialization.cs
@@ -34,22 +34,101 @@ protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWrit
                 throw new FormatException($"The model {nameof(CompleteRequest)} does not support writing '{format}' format.");
             }
 
-            writer.WritePropertyName("chatCompletionsOptions"u8);
-            writer.WriteObjectValue(ChatCompletionsOptions, options);
-            if (options.Format != "W" && _serializedAdditionalRawData != null)
+            writer.WritePropertyName("messages"u8);
+            writer.WriteStartArray();
+            foreach (var item in Messages)
             {
-                foreach (var item in _serializedAdditionalRawData)
+                writer.WriteObjectValue(item, options);
+            }
+            writer.WriteEndArray();
+            if (Optional.IsDefined(FrequencyPenalty))
+            {
+                writer.WritePropertyName("frequency_penalty"u8);
+                writer.WriteNumberValue(FrequencyPenalty.Value);
+            }
+            if (Optional.IsDefined(InternalShouldStreamResponse))
+            {
+                writer.WritePropertyName("stream"u8);
+                writer.WriteBooleanValue(InternalShouldStreamResponse.Value);
+            }
+            if (Optional.IsDefined(PresencePenalty))
+            {
+                writer.WritePropertyName("presence_penalty"u8);
+                writer.WriteNumberValue(PresencePenalty.Value);
+            }
+            if (Optional.IsDefined(Temperature))
+            {
+                writer.WritePropertyName("temperature"u8);
+                writer.WriteNumberValue(Temperature.Value);
+            }
+            if (Optional.IsDefined(NucleusSamplingFactor))
+            {
+                writer.WritePropertyName("top_p"u8);
+                writer.WriteNumberValue(NucleusSamplingFactor.Value);
+            }
+            if (Optional.IsDefined(MaxTokens))
+            {
+                writer.WritePropertyName("max_tokens"u8);
+                writer.WriteNumberValue(MaxTokens.Value);
+            }
+            if (Optional.IsDefined(ResponseFormat))
+            {
+                writer.WritePropertyName("response_format"u8);
+                writer.WriteObjectValue(ResponseFormat, options);
+            }
+            if (Optional.IsCollectionDefined(StopSequences))
+            {
+                writer.WritePropertyName("stop"u8);
+                writer.WriteStartArray();
+                foreach (var item in StopSequences)
+                {
+                    writer.WriteStringValue(item);
+                }
+                writer.WriteEndArray();
+            }
+            if (Optional.IsCollectionDefined(Tools))
+            {
+                writer.WritePropertyName("tools"u8);
+                writer.WriteStartArray();
+                foreach (var item in Tools)
                 {
-                    writer.WritePropertyName(item.Key);
+                    writer.WriteObjectValue(item, options);
+                }
+                writer.WriteEndArray();
+            }
+            if (Optional.IsDefined(ToolChoice))
+            {
+                writer.WritePropertyName("tool_choice"u8);
 #if NET6_0_OR_GREATER
-				writer.WriteRawValue(item.Value);
+				writer.WriteRawValue(ToolChoice);
 #else
-                    using (JsonDocument document = JsonDocument.Parse(item.Value))
-                    {
-                        JsonSerializer.Serialize(writer, document.RootElement);
-                    }
+                using (JsonDocument document = JsonDocument.Parse(ToolChoice))
+                {
+                    JsonSerializer.Serialize(writer, document.RootElement);
+                }
 #endif
+            }
+            if (Optional.IsDefined(Seed))
+            {
+                writer.WritePropertyName("seed"u8);
+                writer.WriteNumberValue(Seed.Value);
+            }
+            if (Optional.IsDefined(Model))
+            {
+                writer.WritePropertyName("model"u8);
+                writer.WriteStringValue(Model);
+            }
+            foreach (var item in AdditionalProperties)
+            {
+                writer.WritePropertyName(item.Key);
+#if NET6_0_OR_GREATER
+				writer.WriteRawValue(item.Value);
+#else
+                using (JsonDocument document = JsonDocument.Parse(item.Value))
+                {
+                    JsonSerializer.Serialize(writer, document.RootElement);
                 }
+#endif
             }
         }
 
@@ -73,23 +152,165 @@ internal static CompleteRequest DeserializeCompleteRequest(JsonElement element,
             {
                 return null;
             }
-            ChatCompletionsOptions chatCompletionsOptions = default;
-            IDictionary<string, BinaryData> serializedAdditionalRawData = default;
-            Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
+            IReadOnlyList<ChatRequestMessage> messages = default;
+            float? frequencyPenalty = default;
+            bool? stream = default;
+            float? presencePenalty = default;
+            float? temperature = default;
+            float? topP = default;
+            int? maxTokens = default;
+            ChatCompletionsResponseFormat responseFormat = default;
+            IReadOnlyList<string> stop = default;
+            IReadOnlyList<ChatCompletionsToolDefinition> tools = default;
+            BinaryData toolChoice = default;
+            long? seed = default;
+            string model = default;
+            IReadOnlyDictionary<string, BinaryData> additionalProperties = default;
+            Dictionary<string, BinaryData> additionalPropertiesDictionary = new Dictionary<string, BinaryData>();
             foreach (var property in element.EnumerateObject())
             {
-                if (property.NameEquals("chatCompletionsOptions"u8))
+                if (property.NameEquals("messages"u8))
                 {
-                    chatCompletionsOptions = ChatCompletionsOptions.DeserializeChatCompletionsOptions(property.Value, options);
+                    List<ChatRequestMessage> array = new List<ChatRequestMessage>();
+                    foreach (var item in property.Value.EnumerateArray())
+                    {
+                        array.Add(ChatRequestMessage.DeserializeChatRequestMessage(item, options));
+                    }
+                    messages = array;
+                    continue;
+                }
+                if (property.NameEquals("frequency_penalty"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    frequencyPenalty = property.Value.GetSingle();
+                    continue;
+                }
+                if (property.NameEquals("stream"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    stream = property.Value.GetBoolean();
                     continue;
                 }
-                if (options.Format != "W")
+                if (property.NameEquals("presence_penalty"u8))
                 {
-                    rawDataDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    presencePenalty = property.Value.GetSingle();
+                    continue;
+                }
+                if (property.NameEquals("temperature"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    temperature = property.Value.GetSingle();
+                    continue;
+                }
+                if (property.NameEquals("top_p"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    topP = property.Value.GetSingle();
+                    continue;
+                }
+                if (property.NameEquals("max_tokens"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    maxTokens = property.Value.GetInt32();
+                    continue;
+                }
+                if (property.NameEquals("response_format"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    responseFormat = ChatCompletionsResponseFormat.DeserializeChatCompletionsResponseFormat(property.Value, options);
+                    continue;
+                }
+                if (property.NameEquals("stop"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    List<string> array = new List<string>();
+                    foreach (var item in property.Value.EnumerateArray())
+                    {
+                        array.Add(item.GetString());
+                    }
+                    stop = array;
+                    continue;
+                }
+                if (property.NameEquals("tools"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    List<ChatCompletionsToolDefinition> array = new List<ChatCompletionsToolDefinition>();
+                    foreach (var item in property.Value.EnumerateArray())
+                    {
+                        array.Add(ChatCompletionsToolDefinition.DeserializeChatCompletionsToolDefinition(item, options));
+                    }
+                    tools = array;
+                    continue;
+                }
+                if (property.NameEquals("tool_choice"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    toolChoice = BinaryData.FromString(property.Value.GetRawText());
+                    continue;
+                }
+                if (property.NameEquals("seed"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    seed = property.Value.GetInt64();
+                    continue;
+                }
+                if (property.NameEquals("model"u8))
+                {
+                    model = property.Value.GetString();
+                    continue;
                 }
+                additionalPropertiesDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
             }
-            serializedAdditionalRawData = rawDataDictionary;
-            return new CompleteRequest(chatCompletionsOptions, serializedAdditionalRawData);
+            additionalProperties = additionalPropertiesDictionary;
+            return new CompleteRequest(
+                messages,
+                frequencyPenalty,
+                stream,
+                presencePenalty,
+                temperature,
+                topP,
+                maxTokens,
+                responseFormat,
+                stop ?? new ChangeTrackingList<string>(),
+                tools ?? new ChangeTrackingList<ChatCompletionsToolDefinition>(),
+                toolChoice,
+                seed,
+                model,
+                additionalProperties);
         }
 
         BinaryData IPersistableModel<CompleteRequest>.Write(ModelReaderWriterOptions options)
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/CompleteRequest.cs b/sdk/ai/Azure.AI.Inference/src/Generated/CompleteRequest.cs
index 5e6b00ae756c..e723129160d6 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/CompleteRequest.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/CompleteRequest.cs
@@ -7,14 +7,239 @@
 
 using System;
 using System.Collections.Generic;
+using System.Linq;
 
 namespace Azure.AI.Inference
 {
     /// <summary> The CompleteRequest. </summary>
     internal partial class CompleteRequest
     {
+        /// <summary> Initializes a new instance of <see cref="CompleteRequest"/>. </summary>
+        /// <param name="messages">
+        /// The collection of context messages associated with this chat completions request.
+        /// Typical usage begins with a chat message for the System role that provides instructions for
+        /// the behavior of the assistant, followed by alternating messages between the User and
+        /// Assistant roles.
+        /// Please note <see cref="ChatRequestMessage"/> is the base class. According to the scenario, a derived class of the base class might need to be assigned here, or this property needs to be casted to one of the possible derived classes.
+        /// The available derived classes include <see cref="ChatRequestAssistantMessage"/>, <see cref="ChatRequestSystemMessage"/>, <see cref="ChatRequestToolMessage"/> and <see cref="ChatRequestUserMessage"/>.
+        /// </param>
+        /// <exception cref="ArgumentNullException"> <paramref name="messages"/> is null. </exception>
+        internal CompleteRequest(IEnumerable<ChatRequestMessage> messages)
+        {
+            Argument.AssertNotNull(messages, nameof(messages));
+
+            Messages = messages.ToList();
+            StopSequences = new ChangeTrackingList<string>();
+            Tools = new ChangeTrackingList<ChatCompletionsToolDefinition>();
+            AdditionalProperties = new ChangeTrackingDictionary<string, BinaryData>();
+        }
+
+        /// <summary> Initializes a new instance of <see cref="CompleteRequest"/>. </summary>
+        /// <param name="messages">
+        /// The collection of context messages associated with this chat completions request.
+        /// Typical usage begins with a chat message for the System role that provides instructions for
+        /// the behavior of the assistant, followed by alternating messages between the User and
+        /// Assistant roles.
+        /// Please note <see cref="ChatRequestMessage"/> is the base class. According to the scenario, a derived class of the base class might need to be assigned here, or this property needs to be casted to one of the possible derived classes.
+        /// The available derived classes include <see cref="ChatRequestAssistantMessage"/>, <see cref="ChatRequestSystemMessage"/>, <see cref="ChatRequestToolMessage"/> and <see cref="ChatRequestUserMessage"/>.
+        /// </param>
+        /// <param name="frequencyPenalty">
+        /// A value that influences the probability of generated tokens appearing based on their cumulative
+        /// frequency in generated text.
+        /// Positive values will make tokens less likely to appear as their frequency increases and
+        /// decrease the likelihood of the model repeating the same statements verbatim.
+        /// Supported range is [-2, 2].
+        /// </param>
+        /// <param name="internalShouldStreamResponse"> A value indicating whether chat completions should be streamed for this request. </param>
+        /// <param name="presencePenalty">
+        /// A value that influences the probability of generated tokens appearing based on their existing
+        /// presence in generated text.
+        /// Positive values will make tokens less likely to appear when they already exist and increase the
+        /// model's likelihood to output new topics.
+        /// Supported range is [-2, 2].
+        /// </param>
+        /// <param name="temperature">
+        /// The sampling temperature to use that controls the apparent creativity of generated completions.
+        /// Higher values will make output more random while lower values will make results more focused
+        /// and deterministic.
+        /// It is not recommended to modify temperature and top_p for the same completions request as the
+        /// interaction of these two settings is difficult to predict.
+        /// Supported range is [0, 1].
+        /// </param>
+        /// <param name="nucleusSamplingFactor">
+        /// An alternative to sampling with temperature called nucleus sampling. This value causes the
+        /// model to consider the results of tokens with the provided probability mass. As an example, a
+        /// value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+        /// considered.
+        /// It is not recommended to modify temperature and top_p for the same completions request as the
+        /// interaction of these two settings is difficult to predict.
+        /// Supported range is [0, 1].
+        /// </param>
+        /// <param name="maxTokens"> The maximum number of tokens to generate. </param>
+        /// <param name="responseFormat">
+        /// An object specifying the format that the model must output.
+        ///
+        /// Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.
+        ///
+        /// Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON.
+        ///
+        /// **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
+        /// Please note <see cref="ChatCompletionsResponseFormat"/> is the base class. According to the scenario, a derived class of the base class might need to be assigned here, or this property needs to be casted to one of the possible derived classes.
+        /// The available derived classes include <see cref="ChatCompletionsResponseFormatJsonObject"/>, <see cref="ChatCompletionsResponseFormatJsonSchema"/> and <see cref="ChatCompletionsResponseFormatText"/>.
+        /// </param>
+        /// <param name="stopSequences"> A collection of textual sequences that will end completions generation. </param>
+        /// <param name="tools">
+        /// A list of tools the model may request to call. Currently, only functions are supported as a tool. The model
+        /// may response with a function call request and provide the input arguments in JSON format for that function.
+        /// </param>
+        /// <param name="toolChoice"> If specified, the model will configure which of the provided tools it can use for the chat completions response. </param>
+        /// <param name="seed">
+        /// If specified, the system will make a best effort to sample deterministically such that repeated requests with the
+        /// same seed and parameters should return the same result. Determinism is not guaranteed.
+        /// </param>
+        /// <param name="model"> ID of the specific AI model to use, if more than one model is available on the endpoint. </param>
+        /// <param name="additionalProperties"> Additional Properties. </param>
+        internal CompleteRequest(IReadOnlyList<ChatRequestMessage> messages, float? frequencyPenalty, bool? internalShouldStreamResponse, float? presencePenalty, float? temperature, float? nucleusSamplingFactor, int? maxTokens, ChatCompletionsResponseFormat responseFormat, IReadOnlyList<string> stopSequences, IReadOnlyList<ChatCompletionsToolDefinition> tools, BinaryData toolChoice, long? seed, string model, IReadOnlyDictionary<string, BinaryData> additionalProperties)
+        {
+            Messages = messages;
+            FrequencyPenalty = frequencyPenalty;
+            InternalShouldStreamResponse = internalShouldStreamResponse;
+            PresencePenalty = presencePenalty;
+            Temperature = temperature;
+            NucleusSamplingFactor = nucleusSamplingFactor;
+            MaxTokens = maxTokens;
+            ResponseFormat = responseFormat;
+            StopSequences = stopSequences;
+            Tools = tools;
+            ToolChoice = toolChoice;
+            Seed = seed;
+            Model = model;
+            AdditionalProperties = additionalProperties;
+        }
+
+        /// <summary> Initializes a new instance of <see cref="CompleteRequest"/> for deserialization. </summary>
+        internal CompleteRequest()
+        {
+        }
+
+        /// <summary>
+        /// The collection of context messages associated with this chat completions request.
+        /// Typical usage begins with a chat message for the System role that provides instructions for
+        /// the behavior of the assistant, followed by alternating messages between the User and
+        /// Assistant roles.
+        /// Please note <see cref="ChatRequestMessage"/> is the base class. According to the scenario, a derived class of the base class might need to be assigned here, or this property needs to be casted to one of the possible derived classes.
+        /// The available derived classes include <see cref="ChatRequestAssistantMessage"/>, <see cref="ChatRequestSystemMessage"/>, <see cref="ChatRequestToolMessage"/> and <see cref="ChatRequestUserMessage"/>.
+        /// </summary>
+        public IReadOnlyList<ChatRequestMessage> Messages { get; }
+        /// <summary>
+        /// A value that influences the probability of generated tokens appearing based on their cumulative
+        /// frequency in generated text.
+        /// Positive values will make tokens less likely to appear as their frequency increases and
+        /// decrease the likelihood of the model repeating the same statements verbatim.
+        /// Supported range is [-2, 2].
+        /// </summary>
+        public float? FrequencyPenalty { get; }
+        /// <summary> A value indicating whether chat completions should be streamed for this request. </summary>
+        public bool? InternalShouldStreamResponse { get; }
+        /// <summary>
+        /// A value that influences the probability of generated tokens appearing based on their existing
+        /// presence in generated text.
+        /// Positive values will make tokens less likely to appear when they already exist and increase the
+        /// model's likelihood to output new topics.
+        /// Supported range is [-2, 2].
+        /// </summary>
+        public float? PresencePenalty { get; }
         /// <summary>
-        /// Keeps track of any properties unknown to the library.
+        /// The sampling temperature to use that controls the apparent creativity of generated completions.
+        /// Higher values will make output more random while lower values will make results more focused
+        /// and deterministic.
+        /// It is not recommended to modify temperature and top_p for the same completions request as the
+        /// interaction of these two settings is difficult to predict.
+        /// Supported range is [0, 1].
+        /// </summary>
+        public float? Temperature { get; }
+        /// <summary>
+        /// An alternative to sampling with temperature called nucleus sampling. This value causes the
+        /// model to consider the results of tokens with the provided probability mass. As an example, a
+        /// value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+        /// considered.
+        /// It is not recommended to modify temperature and top_p for the same completions request as the
+        /// interaction of these two settings is difficult to predict.
+        /// Supported range is [0, 1].
+        /// </summary>
+        public float? NucleusSamplingFactor { get; }
+        /// <summary> The maximum number of tokens to generate. </summary>
+        public int? MaxTokens { get; }
+        /// <summary>
+        /// An object specifying the format that the model must output.
+        ///
+        /// Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which ensures the model will match your supplied JSON schema.
+        ///
+        /// Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the message the model generates is valid JSON.
+        ///
+        /// **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
+        /// Please note <see cref="ChatCompletionsResponseFormat"/> is the base class. According to the scenario, a derived class of the base class might need to be assigned here, or this property needs to be casted to one of the possible derived classes.
+        /// The available derived classes include <see cref="ChatCompletionsResponseFormatJsonObject"/>, <see cref="ChatCompletionsResponseFormatJsonSchema"/> and <see cref="ChatCompletionsResponseFormatText"/>.
+        /// </summary>
+        public ChatCompletionsResponseFormat ResponseFormat { get; }
+        /// <summary> A collection of textual sequences that will end completions generation. </summary>
+        public IReadOnlyList<string> StopSequences { get; }
+        /// <summary>
+        /// A list of tools the model may request to call. Currently, only functions are supported as a tool. The model
+        /// may response with a function call request and provide the input arguments in JSON format for that function.
+        /// </summary>
+        public IReadOnlyList<ChatCompletionsToolDefinition> Tools { get; }
+        /// <summary>
+        /// If specified, the model will configure which of the provided tools it can use for the chat completions response.
+        /// <para>
+        /// To assign an object to this property use <see cref="BinaryData.FromObjectAsJson{T}(T, System.Text.Json.JsonSerializerOptions?)"/>.
+        /// </para>
+        /// <para>
+        /// To assign an already formatted json string to this property use <see cref="BinaryData.FromString(string)"/>.
+        /// </para>
+        /// <para>
+        /// <remarks>
+        /// Supported types:
+        /// <list type="bullet">
+        /// <item>
+        /// <description><see cref="ChatCompletionsToolChoicePreset"/></description>
+        /// </item>
+        /// <item>
+        /// <description><see cref="ChatCompletionsNamedToolChoice"/></description>
+        /// </item>
+        /// </list>
+        /// </remarks>
+        /// Examples:
+        /// <list type="bullet">
+        /// <item>
+        /// <term>BinaryData.FromObjectAsJson("foo")</term>
+        /// <description>Creates a payload of "foo".</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromString("\"foo\"")</term>
+        /// <description>Creates a payload of "foo".</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromObjectAsJson(new { key = "value" })</term>
+        /// <description>Creates a payload of { "key": "value" }.</description>
+        /// </item>
+        /// <item>
+        /// <term>BinaryData.FromString("{\"key\": \"value\"}")</term>
+        /// <description>Creates a payload of { "key": "value" }.</description>
+        /// </item>
+        /// </list>
+        /// </para>
+        /// </summary>
+        public BinaryData ToolChoice { get; }
+        /// <summary>
+        /// If specified, the system will make a best effort to sample deterministically such that repeated requests with the
+        /// same seed and parameters should return the same result. Determinism is not guaranteed.
+        /// </summary>
+        public long? Seed { get; }
+        /// <summary> ID of the specific AI model to use, if more than one model is available on the endpoint. </summary>
+        public string Model { get; }
+        /// <summary>
+        /// Additional Properties
         /// <para>
         /// To assign an object to the value of this property use <see cref="BinaryData.FromObjectAsJson{T}(T, System.Text.Json.JsonSerializerOptions?)"/>.
         /// </para>
@@ -43,33 +268,6 @@ internal partial class CompleteRequest
         /// </list>
         /// </para>
         /// </summary>
-        private IDictionary<string, BinaryData> _serializedAdditionalRawData;
-
-        /// <summary> Initializes a new instance of <see cref="CompleteRequest"/>. </summary>
-        /// <param name="chatCompletionsOptions"></param>
-        /// <exception cref="ArgumentNullException"> <paramref name="chatCompletionsOptions"/> is null. </exception>
-        internal CompleteRequest(ChatCompletionsOptions chatCompletionsOptions)
-        {
-            Argument.AssertNotNull(chatCompletionsOptions, nameof(chatCompletionsOptions));
-
-            ChatCompletionsOptions = chatCompletionsOptions;
-        }
-
-        /// <summary> Initializes a new instance of <see cref="CompleteRequest"/>. </summary>
-        /// <param name="chatCompletionsOptions"></param>
-        /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
-        internal CompleteRequest(ChatCompletionsOptions chatCompletionsOptions, IDictionary<string, BinaryData> serializedAdditionalRawData)
-        {
-            ChatCompletionsOptions = chatCompletionsOptions;
-            _serializedAdditionalRawData = serializedAdditionalRawData;
-        }
-
-        /// <summary> Initializes a new instance of <see cref="CompleteRequest"/> for deserialization. </summary>
-        internal CompleteRequest()
-        {
-        }
-
-        /// <summary> Gets the chat completions options. </summary>
-        public ChatCompletionsOptions ChatCompletionsOptions { get; }
+        public IReadOnlyDictionary<string, BinaryData> AdditionalProperties { get; }
     }
 }
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/Docs/EmbeddingsClient.xml b/sdk/ai/Azure.AI.Inference/src/Generated/Docs/EmbeddingsClient.xml
deleted file mode 100644
index 2f52491f2297..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/Docs/EmbeddingsClient.xml
+++ /dev/null
@@ -1,217 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<doc>
-  <members>
-    <member name="EmbedAsync(EmbeddingsOptions,ExtraParameters?,CancellationToken)">
-      <example>
-This sample shows how to call EmbedAsync.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-EmbeddingsOptions embeddingsOptions = null;
-Response<EmbeddingsResult> response = await client.EmbedAsync(embeddingsOptions);
-]]></code>
-This sample shows how to call EmbedAsync.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-EmbeddingsOptions embeddingsOptions = null;
-Response<EmbeddingsResult> response = await client.EmbedAsync(embeddingsOptions);
-]]></code></example>
-    </member>
-    <member name="Embed(EmbeddingsOptions,ExtraParameters?,CancellationToken)">
-      <example>
-This sample shows how to call Embed.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-EmbeddingsOptions embeddingsOptions = null;
-Response<EmbeddingsResult> response = client.Embed(embeddingsOptions);
-]]></code>
-This sample shows how to call Embed.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-EmbeddingsOptions embeddingsOptions = null;
-Response<EmbeddingsResult> response = client.Embed(embeddingsOptions);
-]]></code></example>
-    </member>
-    <member name="EmbedAsync(RequestContent,string,RequestContext)">
-      <example>
-This sample shows how to call EmbedAsync and parse the result.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-using RequestContent content = RequestContent.Create(new object());
-Response response = await client.EmbedAsync(content);
-
-JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-Console.WriteLine(result.GetProperty("id").ToString());
-Console.WriteLine(result.GetProperty("data")[0].GetProperty("embedding").ToString());
-Console.WriteLine(result.GetProperty("data")[0].GetProperty("index").ToString());
-Console.WriteLine(result.GetProperty("usage").GetProperty("prompt_tokens").ToString());
-Console.WriteLine(result.GetProperty("usage").GetProperty("total_tokens").ToString());
-Console.WriteLine(result.GetProperty("model").ToString());
-]]></code>
-This sample shows how to call EmbedAsync and parse the result.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-using RequestContent content = RequestContent.Create(new object());
-Response response = await client.EmbedAsync(content);
-
-JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-Console.WriteLine(result.GetProperty("id").ToString());
-Console.WriteLine(result.GetProperty("data")[0].GetProperty("embedding").ToString());
-Console.WriteLine(result.GetProperty("data")[0].GetProperty("index").ToString());
-Console.WriteLine(result.GetProperty("usage").GetProperty("prompt_tokens").ToString());
-Console.WriteLine(result.GetProperty("usage").GetProperty("total_tokens").ToString());
-Console.WriteLine(result.GetProperty("model").ToString());
-]]></code></example>
-    </member>
-    <member name="Embed(RequestContent,string,RequestContext)">
-      <example>
-This sample shows how to call Embed and parse the result.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-using RequestContent content = RequestContent.Create(new object());
-Response response = client.Embed(content);
-
-JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-Console.WriteLine(result.GetProperty("id").ToString());
-Console.WriteLine(result.GetProperty("data")[0].GetProperty("embedding").ToString());
-Console.WriteLine(result.GetProperty("data")[0].GetProperty("index").ToString());
-Console.WriteLine(result.GetProperty("usage").GetProperty("prompt_tokens").ToString());
-Console.WriteLine(result.GetProperty("usage").GetProperty("total_tokens").ToString());
-Console.WriteLine(result.GetProperty("model").ToString());
-]]></code>
-This sample shows how to call Embed and parse the result.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-using RequestContent content = RequestContent.Create(new object());
-Response response = client.Embed(content);
-
-JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-Console.WriteLine(result.GetProperty("id").ToString());
-Console.WriteLine(result.GetProperty("data")[0].GetProperty("embedding").ToString());
-Console.WriteLine(result.GetProperty("data")[0].GetProperty("index").ToString());
-Console.WriteLine(result.GetProperty("usage").GetProperty("prompt_tokens").ToString());
-Console.WriteLine(result.GetProperty("usage").GetProperty("total_tokens").ToString());
-Console.WriteLine(result.GetProperty("model").ToString());
-]]></code></example>
-    </member>
-    <member name="GetModelInfoAsync(CancellationToken)">
-      <example>
-This sample shows how to call GetModelInfoAsync.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-Response<ModelInfo> response = await client.GetModelInfoAsync();
-]]></code>
-This sample shows how to call GetModelInfoAsync.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-Response<ModelInfo> response = await client.GetModelInfoAsync();
-]]></code></example>
-    </member>
-    <member name="GetModelInfo(CancellationToken)">
-      <example>
-This sample shows how to call GetModelInfo.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-Response<ModelInfo> response = client.GetModelInfo();
-]]></code>
-This sample shows how to call GetModelInfo.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-Response<ModelInfo> response = client.GetModelInfo();
-]]></code></example>
-    </member>
-    <member name="GetModelInfoAsync(RequestContext)">
-      <example>
-This sample shows how to call GetModelInfoAsync and parse the result.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-Response response = await client.GetModelInfoAsync(null);
-
-JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-Console.WriteLine(result.GetProperty("model_name").ToString());
-Console.WriteLine(result.GetProperty("model_type").ToString());
-Console.WriteLine(result.GetProperty("model_provider_name").ToString());
-]]></code>
-This sample shows how to call GetModelInfoAsync and parse the result.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-Response response = await client.GetModelInfoAsync(null);
-
-JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-Console.WriteLine(result.GetProperty("model_name").ToString());
-Console.WriteLine(result.GetProperty("model_type").ToString());
-Console.WriteLine(result.GetProperty("model_provider_name").ToString());
-]]></code></example>
-    </member>
-    <member name="GetModelInfo(RequestContext)">
-      <example>
-This sample shows how to call GetModelInfo and parse the result.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-Response response = client.GetModelInfo(null);
-
-JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-Console.WriteLine(result.GetProperty("model_name").ToString());
-Console.WriteLine(result.GetProperty("model_type").ToString());
-Console.WriteLine(result.GetProperty("model_provider_name").ToString());
-]]></code>
-This sample shows how to call GetModelInfo and parse the result.
-<code><![CDATA[
-Uri endpoint = new Uri("<endpoint>");
-AzureKeyCredential credential = new AzureKeyCredential("<key>");
-EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-Response response = client.GetModelInfo(null);
-
-JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-Console.WriteLine(result.GetProperty("model_name").ToString());
-Console.WriteLine(result.GetProperty("model_type").ToString());
-Console.WriteLine(result.GetProperty("model_provider_name").ToString());
-]]></code></example>
-    </member>
-  </members>
-</doc>
\ No newline at end of file
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingEncodingFormat.cs b/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingEncodingFormat.cs
deleted file mode 100644
index 968dfae30f12..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingEncodingFormat.cs
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.ComponentModel;
-
-namespace Azure.AI.Inference
-{
-    /// <summary>
-    /// The format of the embeddings result.
-    /// Returns a 422 error if the model doesn't support the value or parameter.
-    /// </summary>
-    public readonly partial struct EmbeddingEncodingFormat : IEquatable<EmbeddingEncodingFormat>
-    {
-        private readonly string _value;
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingEncodingFormat"/>. </summary>
-        /// <exception cref="ArgumentNullException"> <paramref name="value"/> is null. </exception>
-        public EmbeddingEncodingFormat(string value)
-        {
-            _value = value ?? throw new ArgumentNullException(nameof(value));
-        }
-
-        private const string Base64Value = "base64";
-        private const string BinaryValue = "binary";
-        private const string SingleValue = "float";
-        private const string SByteValue = "int8";
-        private const string UbinaryValue = "ubinary";
-        private const string ByteValue = "uint8";
-
-        /// <summary> Base64. </summary>
-        public static EmbeddingEncodingFormat Base64 { get; } = new EmbeddingEncodingFormat(Base64Value);
-        /// <summary> Binary. </summary>
-        public static EmbeddingEncodingFormat Binary { get; } = new EmbeddingEncodingFormat(BinaryValue);
-        /// <summary> ubinary. </summary>
-        public static EmbeddingEncodingFormat Ubinary { get; } = new EmbeddingEncodingFormat(UbinaryValue);
-        /// <summary> Determines if two <see cref="EmbeddingEncodingFormat"/> values are the same. </summary>
-        public static bool operator ==(EmbeddingEncodingFormat left, EmbeddingEncodingFormat right) => left.Equals(right);
-        /// <summary> Determines if two <see cref="EmbeddingEncodingFormat"/> values are not the same. </summary>
-        public static bool operator !=(EmbeddingEncodingFormat left, EmbeddingEncodingFormat right) => !left.Equals(right);
-        /// <summary> Converts a <see cref="string"/> to a <see cref="EmbeddingEncodingFormat"/>. </summary>
-        public static implicit operator EmbeddingEncodingFormat(string value) => new EmbeddingEncodingFormat(value);
-
-        /// <inheritdoc />
-        [EditorBrowsable(EditorBrowsableState.Never)]
-        public override bool Equals(object obj) => obj is EmbeddingEncodingFormat other && Equals(other);
-        /// <inheritdoc />
-        public bool Equals(EmbeddingEncodingFormat other) => string.Equals(_value, other._value, StringComparison.InvariantCultureIgnoreCase);
-
-        /// <inheritdoc />
-        [EditorBrowsable(EditorBrowsableState.Never)]
-        public override int GetHashCode() => _value != null ? StringComparer.InvariantCultureIgnoreCase.GetHashCode(_value) : 0;
-        /// <inheritdoc />
-        public override string ToString() => _value;
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingInputType.cs b/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingInputType.cs
deleted file mode 100644
index 9e75eba6dc3c..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingInputType.cs
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.ComponentModel;
-
-namespace Azure.AI.Inference
-{
-    /// <summary> Represents the input types used for embedding search. </summary>
-    public readonly partial struct EmbeddingInputType : IEquatable<EmbeddingInputType>
-    {
-        private readonly string _value;
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingInputType"/>. </summary>
-        /// <exception cref="ArgumentNullException"> <paramref name="value"/> is null. </exception>
-        public EmbeddingInputType(string value)
-        {
-            _value = value ?? throw new ArgumentNullException(nameof(value));
-        }
-
-        private const string TextValue = "text";
-        private const string QueryValue = "query";
-        private const string DocumentValue = "document";
-
-        /// <summary> Indicates the input is a general text input. </summary>
-        public static EmbeddingInputType Text { get; } = new EmbeddingInputType(TextValue);
-        /// <summary> Indicates the input represents a search query to find the most relevant documents in your vector database. </summary>
-        public static EmbeddingInputType Query { get; } = new EmbeddingInputType(QueryValue);
-        /// <summary> Indicates the input represents a document that is stored in a vector database. </summary>
-        public static EmbeddingInputType Document { get; } = new EmbeddingInputType(DocumentValue);
-        /// <summary> Determines if two <see cref="EmbeddingInputType"/> values are the same. </summary>
-        public static bool operator ==(EmbeddingInputType left, EmbeddingInputType right) => left.Equals(right);
-        /// <summary> Determines if two <see cref="EmbeddingInputType"/> values are not the same. </summary>
-        public static bool operator !=(EmbeddingInputType left, EmbeddingInputType right) => !left.Equals(right);
-        /// <summary> Converts a <see cref="string"/> to a <see cref="EmbeddingInputType"/>. </summary>
-        public static implicit operator EmbeddingInputType(string value) => new EmbeddingInputType(value);
-
-        /// <inheritdoc />
-        [EditorBrowsable(EditorBrowsableState.Never)]
-        public override bool Equals(object obj) => obj is EmbeddingInputType other && Equals(other);
-        /// <inheritdoc />
-        public bool Equals(EmbeddingInputType other) => string.Equals(_value, other._value, StringComparison.InvariantCultureIgnoreCase);
-
-        /// <inheritdoc />
-        [EditorBrowsable(EditorBrowsableState.Never)]
-        public override int GetHashCode() => _value != null ? StringComparer.InvariantCultureIgnoreCase.GetHashCode(_value) : 0;
-        /// <inheritdoc />
-        public override string ToString() => _value;
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingItem.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingItem.Serialization.cs
deleted file mode 100644
index d0a9f2e6bdb0..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingItem.Serialization.cs
+++ /dev/null
@@ -1,157 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.ClientModel.Primitives;
-using System.Collections.Generic;
-using System.Text.Json;
-using Azure.Core;
-
-namespace Azure.AI.Inference
-{
-    public partial class EmbeddingItem : IUtf8JsonSerializable, IJsonModel<EmbeddingItem>
-    {
-        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<EmbeddingItem>)this).Write(writer, ModelSerializationExtensions.WireOptions);
-
-        void IJsonModel<EmbeddingItem>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
-        {
-            writer.WriteStartObject();
-            JsonModelWriteCore(writer, options);
-            writer.WriteEndObject();
-        }
-
-        /// <param name="writer"> The JSON writer. </param>
-        /// <param name="options"> The client options for reading and writing models. </param>
-        protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingItem>)this).GetFormatFromOptions(options) : options.Format;
-            if (format != "J")
-            {
-                throw new FormatException($"The model {nameof(EmbeddingItem)} does not support writing '{format}' format.");
-            }
-
-            writer.WritePropertyName("embedding"u8);
-#if NET6_0_OR_GREATER
-				writer.WriteRawValue(Embedding);
-#else
-            using (JsonDocument document = JsonDocument.Parse(Embedding))
-            {
-                JsonSerializer.Serialize(writer, document.RootElement);
-            }
-#endif
-            writer.WritePropertyName("index"u8);
-            writer.WriteNumberValue(Index);
-            if (options.Format != "W" && _serializedAdditionalRawData != null)
-            {
-                foreach (var item in _serializedAdditionalRawData)
-                {
-                    writer.WritePropertyName(item.Key);
-#if NET6_0_OR_GREATER
-				writer.WriteRawValue(item.Value);
-#else
-                    using (JsonDocument document = JsonDocument.Parse(item.Value))
-                    {
-                        JsonSerializer.Serialize(writer, document.RootElement);
-                    }
-#endif
-                }
-            }
-        }
-
-        EmbeddingItem IJsonModel<EmbeddingItem>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingItem>)this).GetFormatFromOptions(options) : options.Format;
-            if (format != "J")
-            {
-                throw new FormatException($"The model {nameof(EmbeddingItem)} does not support reading '{format}' format.");
-            }
-
-            using JsonDocument document = JsonDocument.ParseValue(ref reader);
-            return DeserializeEmbeddingItem(document.RootElement, options);
-        }
-
-        internal static EmbeddingItem DeserializeEmbeddingItem(JsonElement element, ModelReaderWriterOptions options = null)
-        {
-            options ??= ModelSerializationExtensions.WireOptions;
-
-            if (element.ValueKind == JsonValueKind.Null)
-            {
-                return null;
-            }
-            BinaryData embedding = default;
-            int index = default;
-            IDictionary<string, BinaryData> serializedAdditionalRawData = default;
-            Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
-            foreach (var property in element.EnumerateObject())
-            {
-                if (property.NameEquals("embedding"u8))
-                {
-                    embedding = BinaryData.FromString(property.Value.GetRawText());
-                    continue;
-                }
-                if (property.NameEquals("index"u8))
-                {
-                    index = property.Value.GetInt32();
-                    continue;
-                }
-                if (options.Format != "W")
-                {
-                    rawDataDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
-                }
-            }
-            serializedAdditionalRawData = rawDataDictionary;
-            return new EmbeddingItem(embedding, index, serializedAdditionalRawData);
-        }
-
-        BinaryData IPersistableModel<EmbeddingItem>.Write(ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingItem>)this).GetFormatFromOptions(options) : options.Format;
-
-            switch (format)
-            {
-                case "J":
-                    return ModelReaderWriter.Write(this, options);
-                default:
-                    throw new FormatException($"The model {nameof(EmbeddingItem)} does not support writing '{options.Format}' format.");
-            }
-        }
-
-        EmbeddingItem IPersistableModel<EmbeddingItem>.Create(BinaryData data, ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingItem>)this).GetFormatFromOptions(options) : options.Format;
-
-            switch (format)
-            {
-                case "J":
-                    {
-                        using JsonDocument document = JsonDocument.Parse(data);
-                        return DeserializeEmbeddingItem(document.RootElement, options);
-                    }
-                default:
-                    throw new FormatException($"The model {nameof(EmbeddingItem)} does not support reading '{options.Format}' format.");
-            }
-        }
-
-        string IPersistableModel<EmbeddingItem>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
-
-        /// <summary> Deserializes the model from a raw response. </summary>
-        /// <param name="response"> The response to deserialize the model from. </param>
-        internal static EmbeddingItem FromResponse(Response response)
-        {
-            using var document = JsonDocument.Parse(response.Content);
-            return DeserializeEmbeddingItem(document.RootElement);
-        }
-
-        /// <summary> Convert into a <see cref="RequestContent"/>. </summary>
-        internal virtual RequestContent ToRequestContent()
-        {
-            var content = new Utf8JsonRequestContent();
-            content.JsonWriter.WriteObjectValue(this, ModelSerializationExtensions.WireOptions);
-            return content;
-        }
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingItem.cs b/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingItem.cs
deleted file mode 100644
index 676f0aba0bc7..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingItem.cs
+++ /dev/null
@@ -1,128 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.Collections.Generic;
-
-namespace Azure.AI.Inference
-{
-    /// <summary> Representation of a single embeddings relatedness comparison. </summary>
-    public partial class EmbeddingItem
-    {
-        /// <summary>
-        /// Keeps track of any properties unknown to the library.
-        /// <para>
-        /// To assign an object to the value of this property use <see cref="BinaryData.FromObjectAsJson{T}(T, System.Text.Json.JsonSerializerOptions?)"/>.
-        /// </para>
-        /// <para>
-        /// To assign an already formatted json string to this property use <see cref="BinaryData.FromString(string)"/>.
-        /// </para>
-        /// <para>
-        /// Examples:
-        /// <list type="bullet">
-        /// <item>
-        /// <term>BinaryData.FromObjectAsJson("foo")</term>
-        /// <description>Creates a payload of "foo".</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromString("\"foo\"")</term>
-        /// <description>Creates a payload of "foo".</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromObjectAsJson(new { key = "value" })</term>
-        /// <description>Creates a payload of { "key": "value" }.</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromString("{\"key\": \"value\"}")</term>
-        /// <description>Creates a payload of { "key": "value" }.</description>
-        /// </item>
-        /// </list>
-        /// </para>
-        /// </summary>
-        private IDictionary<string, BinaryData> _serializedAdditionalRawData;
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingItem"/>. </summary>
-        /// <param name="embedding">
-        /// List of embedding values for the input prompt. These represent a measurement of the
-        /// vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.
-        /// </param>
-        /// <param name="index"> Index of the prompt to which the EmbeddingItem corresponds. </param>
-        /// <exception cref="ArgumentNullException"> <paramref name="embedding"/> is null. </exception>
-        internal EmbeddingItem(BinaryData embedding, int index)
-        {
-            Argument.AssertNotNull(embedding, nameof(embedding));
-
-            Embedding = embedding;
-            Index = index;
-        }
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingItem"/>. </summary>
-        /// <param name="embedding">
-        /// List of embedding values for the input prompt. These represent a measurement of the
-        /// vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.
-        /// </param>
-        /// <param name="index"> Index of the prompt to which the EmbeddingItem corresponds. </param>
-        /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
-        internal EmbeddingItem(BinaryData embedding, int index, IDictionary<string, BinaryData> serializedAdditionalRawData)
-        {
-            Embedding = embedding;
-            Index = index;
-            _serializedAdditionalRawData = serializedAdditionalRawData;
-        }
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingItem"/> for deserialization. </summary>
-        internal EmbeddingItem()
-        {
-        }
-
-        /// <summary>
-        /// List of embedding values for the input prompt. These represent a measurement of the
-        /// vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.
-        /// <para>
-        /// To assign an object to this property use <see cref="BinaryData.FromObjectAsJson{T}(T, System.Text.Json.JsonSerializerOptions?)"/>.
-        /// </para>
-        /// <para>
-        /// To assign an already formatted json string to this property use <see cref="BinaryData.FromString(string)"/>.
-        /// </para>
-        /// <para>
-        /// <remarks>
-        /// Supported types:
-        /// <list type="bullet">
-        /// <item>
-        /// <description><see cref="string"/></description>
-        /// </item>
-        /// <item>
-        /// <description><see cref="IList{T}"/> where <c>T</c> is of type <see cref="float"/></description>
-        /// </item>
-        /// </list>
-        /// </remarks>
-        /// Examples:
-        /// <list type="bullet">
-        /// <item>
-        /// <term>BinaryData.FromObjectAsJson("foo")</term>
-        /// <description>Creates a payload of "foo".</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromString("\"foo\"")</term>
-        /// <description>Creates a payload of "foo".</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromObjectAsJson(new { key = "value" })</term>
-        /// <description>Creates a payload of { "key": "value" }.</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromString("{\"key\": \"value\"}")</term>
-        /// <description>Creates a payload of { "key": "value" }.</description>
-        /// </item>
-        /// </list>
-        /// </para>
-        /// </summary>
-        public BinaryData Embedding { get; }
-        /// <summary> Index of the prompt to which the EmbeddingItem corresponds. </summary>
-        public int Index { get; }
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsClient.cs b/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsClient.cs
deleted file mode 100644
index 1d747f7cedad..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsClient.cs
+++ /dev/null
@@ -1,229 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.Collections.Generic;
-using System.Threading;
-using System.Threading.Tasks;
-using Azure.Core;
-using Azure.Core.Pipeline;
-
-namespace Azure.AI.Inference
-{
-    // Data plane generated client.
-    /// <summary> The Embeddings service client. </summary>
-    public partial class EmbeddingsClient
-    {
-        private const string AuthorizationHeader = "Authorization";
-        private readonly AzureKeyCredential _keyCredential;
-        private const string AuthorizationApiKeyPrefix = "Bearer";
-        private static readonly string[] AuthorizationScopes = new string[] { "https://ml.azure.com/.default" };
-        private readonly TokenCredential _tokenCredential;
-        private readonly HttpPipeline _pipeline;
-        private readonly Uri _endpoint;
-        private readonly string _apiVersion;
-
-        /// <summary> The ClientDiagnostics is used to provide tracing support for the client library. </summary>
-        internal ClientDiagnostics ClientDiagnostics { get; }
-
-        /// <summary> The HTTP pipeline for sending and receiving REST requests and responses. </summary>
-        public virtual HttpPipeline Pipeline => _pipeline;
-
-        /// <summary> Initializes a new instance of EmbeddingsClient for mocking. </summary>
-        protected EmbeddingsClient()
-        {
-        }
-
-        /// <summary> Initializes a new instance of EmbeddingsClient. </summary>
-        /// <param name="endpoint"> Service host. </param>
-        /// <param name="credential"> A credential used to authenticate to an Azure Service. </param>
-        /// <exception cref="ArgumentNullException"> <paramref name="endpoint"/> or <paramref name="credential"/> is null. </exception>
-        public EmbeddingsClient(Uri endpoint, AzureKeyCredential credential) : this(endpoint, credential, new AzureAIInferenceClientOptions())
-        {
-        }
-
-        /// <summary> Initializes a new instance of EmbeddingsClient. </summary>
-        /// <param name="endpoint"> Service host. </param>
-        /// <param name="credential"> A credential used to authenticate to an Azure Service. </param>
-        /// <exception cref="ArgumentNullException"> <paramref name="endpoint"/> or <paramref name="credential"/> is null. </exception>
-        public EmbeddingsClient(Uri endpoint, TokenCredential credential) : this(endpoint, credential, new AzureAIInferenceClientOptions())
-        {
-        }
-
-        /// <summary> Initializes a new instance of EmbeddingsClient. </summary>
-        /// <param name="endpoint"> Service host. </param>
-        /// <param name="credential"> A credential used to authenticate to an Azure Service. </param>
-        /// <param name="options"> The options for configuring the client. </param>
-        /// <exception cref="ArgumentNullException"> <paramref name="endpoint"/> or <paramref name="credential"/> is null. </exception>
-        public EmbeddingsClient(Uri endpoint, TokenCredential credential, AzureAIInferenceClientOptions options)
-        {
-            Argument.AssertNotNull(endpoint, nameof(endpoint));
-            Argument.AssertNotNull(credential, nameof(credential));
-            options ??= new AzureAIInferenceClientOptions();
-
-            ClientDiagnostics = new ClientDiagnostics(options, true);
-            _tokenCredential = credential;
-            _pipeline = HttpPipelineBuilder.Build(options, Array.Empty<HttpPipelinePolicy>(), new HttpPipelinePolicy[] { new BearerTokenAuthenticationPolicy(_tokenCredential, AuthorizationScopes) }, new ResponseClassifier());
-            _endpoint = endpoint;
-            _apiVersion = options.Version;
-        }
-
-        /// <summary>
-        /// Returns information about the AI model.
-        /// The method makes a REST API call to the `/info` route on the given endpoint.
-        /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
-        /// </summary>
-        /// <param name="cancellationToken"> The cancellation token to use. </param>
-        /// <include file="Docs/EmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(CancellationToken)']/*" />
-        public virtual async Task<Response<ModelInfo>> GetModelInfoAsync(CancellationToken cancellationToken = default)
-        {
-            RequestContext context = FromCancellationToken(cancellationToken);
-            Response response = await GetModelInfoAsync(context).ConfigureAwait(false);
-            return Response.FromValue(ModelInfo.FromResponse(response), response);
-        }
-
-        /// <summary>
-        /// Returns information about the AI model.
-        /// The method makes a REST API call to the `/info` route on the given endpoint.
-        /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
-        /// </summary>
-        /// <param name="cancellationToken"> The cancellation token to use. </param>
-        /// <include file="Docs/EmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfo(CancellationToken)']/*" />
-        public virtual Response<ModelInfo> GetModelInfo(CancellationToken cancellationToken = default)
-        {
-            RequestContext context = FromCancellationToken(cancellationToken);
-            Response response = GetModelInfo(context);
-            return Response.FromValue(ModelInfo.FromResponse(response), response);
-        }
-
-        /// <summary>
-        /// [Protocol Method] Returns information about the AI model.
-        /// The method makes a REST API call to the `/info` route on the given endpoint.
-        /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
-        /// <list type="bullet">
-        /// <item>
-        /// <description>
-        /// This <see href="https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/samples/ProtocolMethods.md">protocol method</see> allows explicit creation of the request and processing of the response for advanced scenarios.
-        /// </description>
-        /// </item>
-        /// <item>
-        /// <description>
-        /// Please try the simpler <see cref="GetModelInfoAsync(CancellationToken)"/> convenience overload with strongly typed models first.
-        /// </description>
-        /// </item>
-        /// </list>
-        /// </summary>
-        /// <param name="context"> The request context, which can override default behaviors of the client pipeline on a per-call basis. </param>
-        /// <exception cref="RequestFailedException"> Service returned a non-success status code. </exception>
-        /// <returns> The response returned from the service. </returns>
-        /// <include file="Docs/EmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfoAsync(RequestContext)']/*" />
-        public virtual async Task<Response> GetModelInfoAsync(RequestContext context)
-        {
-            using var scope = ClientDiagnostics.CreateScope("EmbeddingsClient.GetModelInfo");
-            scope.Start();
-            try
-            {
-                using HttpMessage message = CreateGetModelInfoRequest(context);
-                return await _pipeline.ProcessMessageAsync(message, context).ConfigureAwait(false);
-            }
-            catch (Exception e)
-            {
-                scope.Failed(e);
-                throw;
-            }
-        }
-
-        /// <summary>
-        /// [Protocol Method] Returns information about the AI model.
-        /// The method makes a REST API call to the `/info` route on the given endpoint.
-        /// This method will only work when using Serverless API or Managed Compute endpoint.
-        /// It will not work for GitHub Models endpoint or Azure OpenAI endpoint.
-        /// <list type="bullet">
-        /// <item>
-        /// <description>
-        /// This <see href="https://github.com/Azure/azure-sdk-for-net/blob/main/sdk/core/Azure.Core/samples/ProtocolMethods.md">protocol method</see> allows explicit creation of the request and processing of the response for advanced scenarios.
-        /// </description>
-        /// </item>
-        /// <item>
-        /// <description>
-        /// Please try the simpler <see cref="GetModelInfo(CancellationToken)"/> convenience overload with strongly typed models first.
-        /// </description>
-        /// </item>
-        /// </list>
-        /// </summary>
-        /// <param name="context"> The request context, which can override default behaviors of the client pipeline on a per-call basis. </param>
-        /// <exception cref="RequestFailedException"> Service returned a non-success status code. </exception>
-        /// <returns> The response returned from the service. </returns>
-        /// <include file="Docs/EmbeddingsClient.xml" path="doc/members/member[@name='GetModelInfo(RequestContext)']/*" />
-        public virtual Response GetModelInfo(RequestContext context)
-        {
-            using var scope = ClientDiagnostics.CreateScope("EmbeddingsClient.GetModelInfo");
-            scope.Start();
-            try
-            {
-                using HttpMessage message = CreateGetModelInfoRequest(context);
-                return _pipeline.ProcessMessage(message, context);
-            }
-            catch (Exception e)
-            {
-                scope.Failed(e);
-                throw;
-            }
-        }
-
-        internal HttpMessage CreateEmbedRequest(RequestContent content, string extraParams, RequestContext context)
-        {
-            var message = _pipeline.CreateMessage(context, ResponseClassifier200);
-            var request = message.Request;
-            request.Method = RequestMethod.Post;
-            var uri = new RawRequestUriBuilder();
-            uri.Reset(_endpoint);
-            uri.AppendPath("/embeddings", false);
-            uri.AppendQuery("api-version", _apiVersion, true);
-            request.Uri = uri;
-            request.Headers.Add("Accept", "application/json");
-            if (extraParams != null)
-            {
-                request.Headers.Add("extra-parameters", extraParams);
-            }
-            request.Headers.Add("Content-Type", "application/json");
-            request.Content = content;
-            return message;
-        }
-
-        internal HttpMessage CreateGetModelInfoRequest(RequestContext context)
-        {
-            var message = _pipeline.CreateMessage(context, ResponseClassifier200);
-            var request = message.Request;
-            request.Method = RequestMethod.Get;
-            var uri = new RawRequestUriBuilder();
-            uri.Reset(_endpoint);
-            uri.AppendPath("/info", false);
-            uri.AppendQuery("api-version", _apiVersion, true);
-            request.Uri = uri;
-            request.Headers.Add("Accept", "application/json");
-            return message;
-        }
-
-        private static RequestContext DefaultRequestContext = new RequestContext();
-        internal static RequestContext FromCancellationToken(CancellationToken cancellationToken = default)
-        {
-            if (!cancellationToken.CanBeCanceled)
-            {
-                return DefaultRequestContext;
-            }
-
-            return new RequestContext() { CancellationToken = cancellationToken };
-        }
-
-        private static ResponseClassifier _responseClassifier200;
-        private static ResponseClassifier ResponseClassifier200 => _responseClassifier200 ??= new StatusCodeClassifier(stackalloc ushort[] { 200 });
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsOptions.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsOptions.Serialization.cs
deleted file mode 100644
index 5627de6a0da8..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsOptions.Serialization.cs
+++ /dev/null
@@ -1,208 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.ClientModel.Primitives;
-using System.Collections.Generic;
-using System.Text.Json;
-using Azure.Core;
-
-namespace Azure.AI.Inference
-{
-    public partial class EmbeddingsOptions : IUtf8JsonSerializable, IJsonModel<EmbeddingsOptions>
-    {
-        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<EmbeddingsOptions>)this).Write(writer, ModelSerializationExtensions.WireOptions);
-
-        void IJsonModel<EmbeddingsOptions>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
-        {
-            writer.WriteStartObject();
-            JsonModelWriteCore(writer, options);
-            writer.WriteEndObject();
-        }
-
-        /// <param name="writer"> The JSON writer. </param>
-        /// <param name="options"> The client options for reading and writing models. </param>
-        protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingsOptions>)this).GetFormatFromOptions(options) : options.Format;
-            if (format != "J")
-            {
-                throw new FormatException($"The model {nameof(EmbeddingsOptions)} does not support writing '{format}' format.");
-            }
-
-            writer.WritePropertyName("input"u8);
-            writer.WriteStartArray();
-            foreach (var item in Input)
-            {
-                writer.WriteStringValue(item);
-            }
-            writer.WriteEndArray();
-            if (Optional.IsDefined(Dimensions))
-            {
-                writer.WritePropertyName("dimensions"u8);
-                writer.WriteNumberValue(Dimensions.Value);
-            }
-            if (Optional.IsDefined(EncodingFormat))
-            {
-                writer.WritePropertyName("encoding_format"u8);
-                writer.WriteStringValue(EncodingFormat.Value.ToString());
-            }
-            if (Optional.IsDefined(InputType))
-            {
-                writer.WritePropertyName("input_type"u8);
-                writer.WriteStringValue(InputType.Value.ToString());
-            }
-            if (Optional.IsDefined(Model))
-            {
-                writer.WritePropertyName("model"u8);
-                writer.WriteStringValue(Model);
-            }
-            foreach (var item in AdditionalProperties)
-            {
-                writer.WritePropertyName(item.Key);
-#if NET6_0_OR_GREATER
-				writer.WriteRawValue(item.Value);
-#else
-                using (JsonDocument document = JsonDocument.Parse(item.Value))
-                {
-                    JsonSerializer.Serialize(writer, document.RootElement);
-                }
-#endif
-            }
-        }
-
-        EmbeddingsOptions IJsonModel<EmbeddingsOptions>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingsOptions>)this).GetFormatFromOptions(options) : options.Format;
-            if (format != "J")
-            {
-                throw new FormatException($"The model {nameof(EmbeddingsOptions)} does not support reading '{format}' format.");
-            }
-
-            using JsonDocument document = JsonDocument.ParseValue(ref reader);
-            return DeserializeEmbeddingsOptions(document.RootElement, options);
-        }
-
-        internal static EmbeddingsOptions DeserializeEmbeddingsOptions(JsonElement element, ModelReaderWriterOptions options = null)
-        {
-            options ??= ModelSerializationExtensions.WireOptions;
-
-            if (element.ValueKind == JsonValueKind.Null)
-            {
-                return null;
-            }
-            IList<string> input = default;
-            int? dimensions = default;
-            EmbeddingEncodingFormat? encodingFormat = default;
-            EmbeddingInputType? inputType = default;
-            string model = default;
-            IDictionary<string, BinaryData> additionalProperties = default;
-            Dictionary<string, BinaryData> additionalPropertiesDictionary = new Dictionary<string, BinaryData>();
-            foreach (var property in element.EnumerateObject())
-            {
-                if (property.NameEquals("input"u8))
-                {
-                    List<string> array = new List<string>();
-                    foreach (var item in property.Value.EnumerateArray())
-                    {
-                        array.Add(item.GetString());
-                    }
-                    input = array;
-                    continue;
-                }
-                if (property.NameEquals("dimensions"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    dimensions = property.Value.GetInt32();
-                    continue;
-                }
-                if (property.NameEquals("encoding_format"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    encodingFormat = new EmbeddingEncodingFormat(property.Value.GetString());
-                    continue;
-                }
-                if (property.NameEquals("input_type"u8))
-                {
-                    if (property.Value.ValueKind == JsonValueKind.Null)
-                    {
-                        continue;
-                    }
-                    inputType = new EmbeddingInputType(property.Value.GetString());
-                    continue;
-                }
-                if (property.NameEquals("model"u8))
-                {
-                    model = property.Value.GetString();
-                    continue;
-                }
-                additionalPropertiesDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
-            }
-            additionalProperties = additionalPropertiesDictionary;
-            return new EmbeddingsOptions(
-                input,
-                dimensions,
-                encodingFormat,
-                inputType,
-                model,
-                additionalProperties);
-        }
-
-        BinaryData IPersistableModel<EmbeddingsOptions>.Write(ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingsOptions>)this).GetFormatFromOptions(options) : options.Format;
-
-            switch (format)
-            {
-                case "J":
-                    return ModelReaderWriter.Write(this, options);
-                default:
-                    throw new FormatException($"The model {nameof(EmbeddingsOptions)} does not support writing '{options.Format}' format.");
-            }
-        }
-
-        EmbeddingsOptions IPersistableModel<EmbeddingsOptions>.Create(BinaryData data, ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingsOptions>)this).GetFormatFromOptions(options) : options.Format;
-
-            switch (format)
-            {
-                case "J":
-                    {
-                        using JsonDocument document = JsonDocument.Parse(data);
-                        return DeserializeEmbeddingsOptions(document.RootElement, options);
-                    }
-                default:
-                    throw new FormatException($"The model {nameof(EmbeddingsOptions)} does not support reading '{options.Format}' format.");
-            }
-        }
-
-        string IPersistableModel<EmbeddingsOptions>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
-
-        /// <summary> Deserializes the model from a raw response. </summary>
-        /// <param name="response"> The response to deserialize the model from. </param>
-        internal static EmbeddingsOptions FromResponse(Response response)
-        {
-            using var document = JsonDocument.Parse(response.Content);
-            return DeserializeEmbeddingsOptions(document.RootElement);
-        }
-
-        /// <summary> Convert into a <see cref="RequestContent"/>. </summary>
-        internal virtual RequestContent ToRequestContent()
-        {
-            var content = new Utf8JsonRequestContent();
-            content.JsonWriter.WriteObjectValue(this, ModelSerializationExtensions.WireOptions);
-            return content;
-        }
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsOptions.cs b/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsOptions.cs
deleted file mode 100644
index 879babca6604..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsOptions.cs
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-
-namespace Azure.AI.Inference
-{
-    /// <summary> The configuration information for an embeddings request. </summary>
-    public partial class EmbeddingsOptions
-    {
-        /// <summary> Initializes a new instance of <see cref="EmbeddingsOptions"/>. </summary>
-        /// <param name="input">
-        /// Input text to embed, encoded as a string or array of tokens.
-        /// To embed multiple inputs in a single request, pass an array
-        /// of strings or array of token arrays.
-        /// </param>
-        /// <exception cref="ArgumentNullException"> <paramref name="input"/> is null. </exception>
-        public EmbeddingsOptions(IEnumerable<string> input)
-        {
-            Argument.AssertNotNull(input, nameof(input));
-
-            Input = input.ToList();
-            AdditionalProperties = new ChangeTrackingDictionary<string, BinaryData>();
-        }
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingsOptions"/>. </summary>
-        /// <param name="input">
-        /// Input text to embed, encoded as a string or array of tokens.
-        /// To embed multiple inputs in a single request, pass an array
-        /// of strings or array of token arrays.
-        /// </param>
-        /// <param name="dimensions">
-        /// Optional. The number of dimensions the resulting output embeddings should have.
-        /// Passing null causes the model to use its default value.
-        /// Returns a 422 error if the model doesn't support the value or parameter.
-        /// </param>
-        /// <param name="encodingFormat"> Optional. The desired format for the returned embeddings. </param>
-        /// <param name="inputType">
-        /// Optional. The type of the input.
-        /// Returns a 422 error if the model doesn't support the value or parameter.
-        /// </param>
-        /// <param name="model"> ID of the specific AI model to use, if more than one model is available on the endpoint. </param>
-        /// <param name="additionalProperties"> Additional Properties. </param>
-        internal EmbeddingsOptions(IList<string> input, int? dimensions, EmbeddingEncodingFormat? encodingFormat, EmbeddingInputType? inputType, string model, IDictionary<string, BinaryData> additionalProperties)
-        {
-            Input = input;
-            Dimensions = dimensions;
-            EncodingFormat = encodingFormat;
-            InputType = inputType;
-            Model = model;
-            AdditionalProperties = additionalProperties;
-        }
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingsOptions"/> for deserialization. </summary>
-        internal EmbeddingsOptions()
-        {
-        }
-
-        /// <summary>
-        /// Input text to embed, encoded as a string or array of tokens.
-        /// To embed multiple inputs in a single request, pass an array
-        /// of strings or array of token arrays.
-        /// </summary>
-        public IList<string> Input { get; }
-        /// <summary>
-        /// Optional. The number of dimensions the resulting output embeddings should have.
-        /// Passing null causes the model to use its default value.
-        /// Returns a 422 error if the model doesn't support the value or parameter.
-        /// </summary>
-        public int? Dimensions { get; set; }
-        /// <summary> Optional. The desired format for the returned embeddings. </summary>
-        public EmbeddingEncodingFormat? EncodingFormat { get; set; }
-        /// <summary>
-        /// Optional. The type of the input.
-        /// Returns a 422 error if the model doesn't support the value or parameter.
-        /// </summary>
-        public EmbeddingInputType? InputType { get; set; }
-        /// <summary> ID of the specific AI model to use, if more than one model is available on the endpoint. </summary>
-        public string Model { get; set; }
-        /// <summary>
-        /// Additional Properties
-        /// <para>
-        /// To assign an object to the value of this property use <see cref="BinaryData.FromObjectAsJson{T}(T, System.Text.Json.JsonSerializerOptions?)"/>.
-        /// </para>
-        /// <para>
-        /// To assign an already formatted json string to this property use <see cref="BinaryData.FromString(string)"/>.
-        /// </para>
-        /// <para>
-        /// Examples:
-        /// <list type="bullet">
-        /// <item>
-        /// <term>BinaryData.FromObjectAsJson("foo")</term>
-        /// <description>Creates a payload of "foo".</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromString("\"foo\"")</term>
-        /// <description>Creates a payload of "foo".</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromObjectAsJson(new { key = "value" })</term>
-        /// <description>Creates a payload of { "key": "value" }.</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromString("{\"key\": \"value\"}")</term>
-        /// <description>Creates a payload of { "key": "value" }.</description>
-        /// </item>
-        /// </list>
-        /// </para>
-        /// </summary>
-        public IDictionary<string, BinaryData> AdditionalProperties { get; }
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsResult.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsResult.Serialization.cs
deleted file mode 100644
index c7ee31c5d7f1..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsResult.Serialization.cs
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.ClientModel.Primitives;
-using System.Collections.Generic;
-using System.Text.Json;
-using Azure.Core;
-
-namespace Azure.AI.Inference
-{
-    public partial class EmbeddingsResult : IUtf8JsonSerializable, IJsonModel<EmbeddingsResult>
-    {
-        void IUtf8JsonSerializable.Write(Utf8JsonWriter writer) => ((IJsonModel<EmbeddingsResult>)this).Write(writer, ModelSerializationExtensions.WireOptions);
-
-        void IJsonModel<EmbeddingsResult>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options)
-        {
-            writer.WriteStartObject();
-            JsonModelWriteCore(writer, options);
-            writer.WriteEndObject();
-        }
-
-        /// <param name="writer"> The JSON writer. </param>
-        /// <param name="options"> The client options for reading and writing models. </param>
-        protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingsResult>)this).GetFormatFromOptions(options) : options.Format;
-            if (format != "J")
-            {
-                throw new FormatException($"The model {nameof(EmbeddingsResult)} does not support writing '{format}' format.");
-            }
-
-            writer.WritePropertyName("id"u8);
-            writer.WriteStringValue(Id);
-            writer.WritePropertyName("data"u8);
-            writer.WriteStartArray();
-            foreach (var item in Data)
-            {
-                writer.WriteObjectValue(item, options);
-            }
-            writer.WriteEndArray();
-            writer.WritePropertyName("usage"u8);
-            writer.WriteObjectValue(Usage, options);
-            writer.WritePropertyName("model"u8);
-            writer.WriteStringValue(Model);
-            if (options.Format != "W" && _serializedAdditionalRawData != null)
-            {
-                foreach (var item in _serializedAdditionalRawData)
-                {
-                    writer.WritePropertyName(item.Key);
-#if NET6_0_OR_GREATER
-				writer.WriteRawValue(item.Value);
-#else
-                    using (JsonDocument document = JsonDocument.Parse(item.Value))
-                    {
-                        JsonSerializer.Serialize(writer, document.RootElement);
-                    }
-#endif
-                }
-            }
-        }
-
-        EmbeddingsResult IJsonModel<EmbeddingsResult>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingsResult>)this).GetFormatFromOptions(options) : options.Format;
-            if (format != "J")
-            {
-                throw new FormatException($"The model {nameof(EmbeddingsResult)} does not support reading '{format}' format.");
-            }
-
-            using JsonDocument document = JsonDocument.ParseValue(ref reader);
-            return DeserializeEmbeddingsResult(document.RootElement, options);
-        }
-
-        internal static EmbeddingsResult DeserializeEmbeddingsResult(JsonElement element, ModelReaderWriterOptions options = null)
-        {
-            options ??= ModelSerializationExtensions.WireOptions;
-
-            if (element.ValueKind == JsonValueKind.Null)
-            {
-                return null;
-            }
-            string id = default;
-            IReadOnlyList<EmbeddingItem> data = default;
-            EmbeddingsUsage usage = default;
-            string model = default;
-            IDictionary<string, BinaryData> serializedAdditionalRawData = default;
-            Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
-            foreach (var property in element.EnumerateObject())
-            {
-                if (property.NameEquals("id"u8))
-                {
-                    id = property.Value.GetString();
-                    continue;
-                }
-                if (property.NameEquals("data"u8))
-                {
-                    List<EmbeddingItem> array = new List<EmbeddingItem>();
-                    foreach (var item in property.Value.EnumerateArray())
-                    {
-                        array.Add(EmbeddingItem.DeserializeEmbeddingItem(item, options));
-                    }
-                    data = array;
-                    continue;
-                }
-                if (property.NameEquals("usage"u8))
-                {
-                    usage = EmbeddingsUsage.DeserializeEmbeddingsUsage(property.Value, options);
-                    continue;
-                }
-                if (property.NameEquals("model"u8))
-                {
-                    model = property.Value.GetString();
-                    continue;
-                }
-                if (options.Format != "W")
-                {
-                    rawDataDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
-                }
-            }
-            serializedAdditionalRawData = rawDataDictionary;
-            return new EmbeddingsResult(id, data, usage, model, serializedAdditionalRawData);
-        }
-
-        BinaryData IPersistableModel<EmbeddingsResult>.Write(ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingsResult>)this).GetFormatFromOptions(options) : options.Format;
-
-            switch (format)
-            {
-                case "J":
-                    return ModelReaderWriter.Write(this, options);
-                default:
-                    throw new FormatException($"The model {nameof(EmbeddingsResult)} does not support writing '{options.Format}' format.");
-            }
-        }
-
-        EmbeddingsResult IPersistableModel<EmbeddingsResult>.Create(BinaryData data, ModelReaderWriterOptions options)
-        {
-            var format = options.Format == "W" ? ((IPersistableModel<EmbeddingsResult>)this).GetFormatFromOptions(options) : options.Format;
-
-            switch (format)
-            {
-                case "J":
-                    {
-                        using JsonDocument document = JsonDocument.Parse(data);
-                        return DeserializeEmbeddingsResult(document.RootElement, options);
-                    }
-                default:
-                    throw new FormatException($"The model {nameof(EmbeddingsResult)} does not support reading '{options.Format}' format.");
-            }
-        }
-
-        string IPersistableModel<EmbeddingsResult>.GetFormatFromOptions(ModelReaderWriterOptions options) => "J";
-
-        /// <summary> Deserializes the model from a raw response. </summary>
-        /// <param name="response"> The response to deserialize the model from. </param>
-        internal static EmbeddingsResult FromResponse(Response response)
-        {
-            using var document = JsonDocument.Parse(response.Content);
-            return DeserializeEmbeddingsResult(document.RootElement);
-        }
-
-        /// <summary> Convert into a <see cref="RequestContent"/>. </summary>
-        internal virtual RequestContent ToRequestContent()
-        {
-            var content = new Utf8JsonRequestContent();
-            content.JsonWriter.WriteObjectValue(this, ModelSerializationExtensions.WireOptions);
-            return content;
-        }
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsResult.cs b/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsResult.cs
deleted file mode 100644
index c44b8e64397b..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsResult.cs
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.Collections.Generic;
-using System.Linq;
-
-namespace Azure.AI.Inference
-{
-    /// <summary>
-    /// Representation of the response data from an embeddings request.
-    /// Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
-    /// recommendations, and other similar scenarios.
-    /// </summary>
-    public partial class EmbeddingsResult
-    {
-        /// <summary>
-        /// Keeps track of any properties unknown to the library.
-        /// <para>
-        /// To assign an object to the value of this property use <see cref="BinaryData.FromObjectAsJson{T}(T, System.Text.Json.JsonSerializerOptions?)"/>.
-        /// </para>
-        /// <para>
-        /// To assign an already formatted json string to this property use <see cref="BinaryData.FromString(string)"/>.
-        /// </para>
-        /// <para>
-        /// Examples:
-        /// <list type="bullet">
-        /// <item>
-        /// <term>BinaryData.FromObjectAsJson("foo")</term>
-        /// <description>Creates a payload of "foo".</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromString("\"foo\"")</term>
-        /// <description>Creates a payload of "foo".</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromObjectAsJson(new { key = "value" })</term>
-        /// <description>Creates a payload of { "key": "value" }.</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromString("{\"key\": \"value\"}")</term>
-        /// <description>Creates a payload of { "key": "value" }.</description>
-        /// </item>
-        /// </list>
-        /// </para>
-        /// </summary>
-        private IDictionary<string, BinaryData> _serializedAdditionalRawData;
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingsResult"/>. </summary>
-        /// <param name="id"> Unique identifier for the embeddings result. </param>
-        /// <param name="data"> Embedding values for the prompts submitted in the request. </param>
-        /// <param name="usage"> Usage counts for tokens input using the embeddings API. </param>
-        /// <param name="model"> The model ID used to generate this result. </param>
-        /// <exception cref="ArgumentNullException"> <paramref name="id"/>, <paramref name="data"/>, <paramref name="usage"/> or <paramref name="model"/> is null. </exception>
-        internal EmbeddingsResult(string id, IEnumerable<EmbeddingItem> data, EmbeddingsUsage usage, string model)
-        {
-            Argument.AssertNotNull(id, nameof(id));
-            Argument.AssertNotNull(data, nameof(data));
-            Argument.AssertNotNull(usage, nameof(usage));
-            Argument.AssertNotNull(model, nameof(model));
-
-            Id = id;
-            Data = data.ToList();
-            Usage = usage;
-            Model = model;
-        }
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingsResult"/>. </summary>
-        /// <param name="id"> Unique identifier for the embeddings result. </param>
-        /// <param name="data"> Embedding values for the prompts submitted in the request. </param>
-        /// <param name="usage"> Usage counts for tokens input using the embeddings API. </param>
-        /// <param name="model"> The model ID used to generate this result. </param>
-        /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
-        internal EmbeddingsResult(string id, IReadOnlyList<EmbeddingItem> data, EmbeddingsUsage usage, string model, IDictionary<string, BinaryData> serializedAdditionalRawData)
-        {
-            Id = id;
-            Data = data;
-            Usage = usage;
-            Model = model;
-            _serializedAdditionalRawData = serializedAdditionalRawData;
-        }
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingsResult"/> for deserialization. </summary>
-        internal EmbeddingsResult()
-        {
-        }
-
-        /// <summary> Unique identifier for the embeddings result. </summary>
-        public string Id { get; }
-        /// <summary> Embedding values for the prompts submitted in the request. </summary>
-        public IReadOnlyList<EmbeddingItem> Data { get; }
-        /// <summary> Usage counts for tokens input using the embeddings API. </summary>
-        public EmbeddingsUsage Usage { get; }
-        /// <summary> The model ID used to generate this result. </summary>
-        public string Model { get; }
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsUsage.cs b/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsUsage.cs
deleted file mode 100644
index f0808a074012..000000000000
--- a/sdk/ai/Azure.AI.Inference/src/Generated/EmbeddingsUsage.cs
+++ /dev/null
@@ -1,87 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.Collections.Generic;
-
-namespace Azure.AI.Inference
-{
-    /// <summary> Measurement of the amount of tokens used in this request and response. </summary>
-    public partial class EmbeddingsUsage
-    {
-        /// <summary>
-        /// Keeps track of any properties unknown to the library.
-        /// <para>
-        /// To assign an object to the value of this property use <see cref="BinaryData.FromObjectAsJson{T}(T, System.Text.Json.JsonSerializerOptions?)"/>.
-        /// </para>
-        /// <para>
-        /// To assign an already formatted json string to this property use <see cref="BinaryData.FromString(string)"/>.
-        /// </para>
-        /// <para>
-        /// Examples:
-        /// <list type="bullet">
-        /// <item>
-        /// <term>BinaryData.FromObjectAsJson("foo")</term>
-        /// <description>Creates a payload of "foo".</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromString("\"foo\"")</term>
-        /// <description>Creates a payload of "foo".</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromObjectAsJson(new { key = "value" })</term>
-        /// <description>Creates a payload of { "key": "value" }.</description>
-        /// </item>
-        /// <item>
-        /// <term>BinaryData.FromString("{\"key\": \"value\"}")</term>
-        /// <description>Creates a payload of { "key": "value" }.</description>
-        /// </item>
-        /// </list>
-        /// </para>
-        /// </summary>
-        private IDictionary<string, BinaryData> _serializedAdditionalRawData;
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingsUsage"/>. </summary>
-        /// <param name="promptTokens"> Number of tokens in the request. </param>
-        /// <param name="totalTokens">
-        /// Total number of tokens transacted in this request/response. Should equal the
-        /// number of tokens in the request.
-        /// </param>
-        internal EmbeddingsUsage(int promptTokens, int totalTokens)
-        {
-            PromptTokens = promptTokens;
-            TotalTokens = totalTokens;
-        }
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingsUsage"/>. </summary>
-        /// <param name="promptTokens"> Number of tokens in the request. </param>
-        /// <param name="totalTokens">
-        /// Total number of tokens transacted in this request/response. Should equal the
-        /// number of tokens in the request.
-        /// </param>
-        /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
-        internal EmbeddingsUsage(int promptTokens, int totalTokens, IDictionary<string, BinaryData> serializedAdditionalRawData)
-        {
-            PromptTokens = promptTokens;
-            TotalTokens = totalTokens;
-            _serializedAdditionalRawData = serializedAdditionalRawData;
-        }
-
-        /// <summary> Initializes a new instance of <see cref="EmbeddingsUsage"/> for deserialization. </summary>
-        internal EmbeddingsUsage()
-        {
-        }
-
-        /// <summary> Number of tokens in the request. </summary>
-        public int PromptTokens { get; }
-        /// <summary>
-        /// Total number of tokens transacted in this request/response. Should equal the
-        /// number of tokens in the request.
-        /// </summary>
-        public int TotalTokens { get; }
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ExtraParameters.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ExtraParameters.cs
index 9ffda5f51e8a..f049bc41d649 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ExtraParameters.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ExtraParameters.cs
@@ -11,7 +11,7 @@
 namespace Azure.AI.Inference
 {
     /// <summary> Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload. </summary>
-    public readonly partial struct ExtraParameters : IEquatable<ExtraParameters>
+    internal readonly partial struct ExtraParameters : IEquatable<ExtraParameters>
     {
         private readonly string _value;
 
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/ModelType.cs b/sdk/ai/Azure.AI.Inference/src/Generated/ModelType.cs
index 960fae6ae43b..c9a683ad7562 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/ModelType.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/ModelType.cs
@@ -27,20 +27,20 @@ public ModelType(string value)
         private const string TextGenerationValue = "text_generation";
         private const string ImageEmbeddingsValue = "image_embeddings";
         private const string AudioGenerationValue = "audio_generation";
-        private const string ChatValue = "chat";
+        private const string ChatCompletionValue = "chat_completion";
 
-        /// <summary> Embeddings. </summary>
+        /// <summary> A model capable of generating embeddings from a text. </summary>
         public static ModelType Embeddings { get; } = new ModelType(EmbeddingsValue);
-        /// <summary> Image generation. </summary>
+        /// <summary> A model capable of generating images from an image and text description. </summary>
         public static ModelType ImageGeneration { get; } = new ModelType(ImageGenerationValue);
-        /// <summary> Text generation. </summary>
+        /// <summary> A text generation model. </summary>
         public static ModelType TextGeneration { get; } = new ModelType(TextGenerationValue);
-        /// <summary> Image embeddings. </summary>
+        /// <summary> A model capable of generating embeddings from an image. </summary>
         public static ModelType ImageEmbeddings { get; } = new ModelType(ImageEmbeddingsValue);
-        /// <summary> Audio generation. </summary>
+        /// <summary> A text-to-audio generative model. </summary>
         public static ModelType AudioGeneration { get; } = new ModelType(AudioGenerationValue);
-        /// <summary> Chat completions. </summary>
-        public static ModelType Chat { get; } = new ModelType(ChatValue);
+        /// <summary> A model capable of taking chat-formatted messages and generate responses. </summary>
+        public static ModelType ChatCompletion { get; } = new ModelType(ChatCompletionValue);
         /// <summary> Determines if two <see cref="ModelType"/> values are the same. </summary>
         public static bool operator ==(ModelType left, ModelType right) => left.Equals(right);
         /// <summary> Determines if two <see cref="ModelType"/> values are not the same. </summary>
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatCompletionsUpdate.Serialization.cs b/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatCompletionsUpdate.Serialization.cs
index 6a0c2f39f312..fdd8543b88eb 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatCompletionsUpdate.Serialization.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatCompletionsUpdate.Serialization.cs
@@ -40,8 +40,6 @@ protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWrit
             writer.WriteNumberValue(Created, "U");
             writer.WritePropertyName("model"u8);
             writer.WriteStringValue(Model);
-            writer.WritePropertyName("usage"u8);
-            writer.WriteObjectValue(Usage, options);
             writer.WritePropertyName("choices"u8);
             writer.WriteStartArray();
             foreach (var item in Choices)
@@ -49,6 +47,11 @@ protected virtual void JsonModelWriteCore(Utf8JsonWriter writer, ModelReaderWrit
                 writer.WriteObjectValue(item, options);
             }
             writer.WriteEndArray();
+            if (Optional.IsDefined(Usage))
+            {
+                writer.WritePropertyName("usage"u8);
+                writer.WriteObjectValue(Usage, options);
+            }
             if (options.Format != "W" && _serializedAdditionalRawData != null)
             {
                 foreach (var item in _serializedAdditionalRawData)
@@ -89,8 +92,8 @@ internal static StreamingChatCompletionsUpdate DeserializeStreamingChatCompletio
             string id = default;
             DateTimeOffset created = default;
             string model = default;
-            CompletionsUsage usage = default;
             IReadOnlyList<StreamingChatChoiceUpdate> choices = default;
+            CompletionsUsage usage = default;
             IDictionary<string, BinaryData> serializedAdditionalRawData = default;
             Dictionary<string, BinaryData> rawDataDictionary = new Dictionary<string, BinaryData>();
             foreach (var property in element.EnumerateObject())
@@ -110,11 +113,6 @@ internal static StreamingChatCompletionsUpdate DeserializeStreamingChatCompletio
                     model = property.Value.GetString();
                     continue;
                 }
-                if (property.NameEquals("usage"u8))
-                {
-                    usage = CompletionsUsage.DeserializeCompletionsUsage(property.Value, options);
-                    continue;
-                }
                 if (property.NameEquals("choices"u8))
                 {
                     List<StreamingChatChoiceUpdate> array = new List<StreamingChatChoiceUpdate>();
@@ -125,6 +123,15 @@ internal static StreamingChatCompletionsUpdate DeserializeStreamingChatCompletio
                     choices = array;
                     continue;
                 }
+                if (property.NameEquals("usage"u8))
+                {
+                    if (property.Value.ValueKind == JsonValueKind.Null)
+                    {
+                        continue;
+                    }
+                    usage = CompletionsUsage.DeserializeCompletionsUsage(property.Value, options);
+                    continue;
+                }
                 if (options.Format != "W")
                 {
                     rawDataDictionary.Add(property.Name, BinaryData.FromString(property.Value.GetRawText()));
@@ -135,8 +142,8 @@ internal static StreamingChatCompletionsUpdate DeserializeStreamingChatCompletio
                 id,
                 created,
                 model,
-                usage,
                 choices,
+                usage,
                 serializedAdditionalRawData);
         }
 
diff --git a/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatCompletionsUpdate.cs b/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatCompletionsUpdate.cs
index de1a266e5616..6cdc90bb0036 100644
--- a/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatCompletionsUpdate.cs
+++ b/sdk/ai/Azure.AI.Inference/src/Generated/StreamingChatCompletionsUpdate.cs
@@ -58,24 +58,21 @@ public partial class StreamingChatCompletionsUpdate
         /// represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
         /// </param>
         /// <param name="model"> The model used for the chat completion. </param>
-        /// <param name="usage"> Usage information for tokens processed and generated as part of this completions operation. </param>
         /// <param name="choices">
         /// An update to the collection of completion choices associated with this completions response.
         /// Generally, `n` choices are generated per provided prompt with a default value of 1.
         /// Token limits and other settings may limit the number of choices generated.
         /// </param>
-        /// <exception cref="ArgumentNullException"> <paramref name="id"/>, <paramref name="model"/>, <paramref name="usage"/> or <paramref name="choices"/> is null. </exception>
-        internal StreamingChatCompletionsUpdate(string id, DateTimeOffset created, string model, CompletionsUsage usage, IEnumerable<StreamingChatChoiceUpdate> choices)
+        /// <exception cref="ArgumentNullException"> <paramref name="id"/>, <paramref name="model"/> or <paramref name="choices"/> is null. </exception>
+        internal StreamingChatCompletionsUpdate(string id, DateTimeOffset created, string model, IEnumerable<StreamingChatChoiceUpdate> choices)
         {
             Argument.AssertNotNull(id, nameof(id));
             Argument.AssertNotNull(model, nameof(model));
-            Argument.AssertNotNull(usage, nameof(usage));
             Argument.AssertNotNull(choices, nameof(choices));
 
             Id = id;
             Created = created;
             Model = model;
-            Usage = usage;
             Choices = choices.ToList();
         }
 
@@ -86,20 +83,20 @@ internal StreamingChatCompletionsUpdate(string id, DateTimeOffset created, strin
         /// represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
         /// </param>
         /// <param name="model"> The model used for the chat completion. </param>
-        /// <param name="usage"> Usage information for tokens processed and generated as part of this completions operation. </param>
         /// <param name="choices">
         /// An update to the collection of completion choices associated with this completions response.
         /// Generally, `n` choices are generated per provided prompt with a default value of 1.
         /// Token limits and other settings may limit the number of choices generated.
         /// </param>
+        /// <param name="usage"> Usage information for tokens processed and generated as part of this completions operation. </param>
         /// <param name="serializedAdditionalRawData"> Keeps track of any properties unknown to the library. </param>
-        internal StreamingChatCompletionsUpdate(string id, DateTimeOffset created, string model, CompletionsUsage usage, IReadOnlyList<StreamingChatChoiceUpdate> choices, IDictionary<string, BinaryData> serializedAdditionalRawData)
+        internal StreamingChatCompletionsUpdate(string id, DateTimeOffset created, string model, IReadOnlyList<StreamingChatChoiceUpdate> choices, CompletionsUsage usage, IDictionary<string, BinaryData> serializedAdditionalRawData)
         {
             Id = id;
             Created = created;
             Model = model;
-            Usage = usage;
             Choices = choices;
+            Usage = usage;
             _serializedAdditionalRawData = serializedAdditionalRawData;
         }
 
@@ -107,13 +104,13 @@ internal StreamingChatCompletionsUpdate(string id, DateTimeOffset created, strin
         internal StreamingChatCompletionsUpdate()
         {
         }
-        /// <summary> Usage information for tokens processed and generated as part of this completions operation. </summary>
-        public CompletionsUsage Usage { get; }
         /// <summary>
         /// An update to the collection of completion choices associated with this completions response.
         /// Generally, `n` choices are generated per provided prompt with a default value of 1.
         /// Token limits and other settings may limit the number of choices generated.
         /// </summary>
         public IReadOnlyList<StreamingChatChoiceUpdate> Choices { get; }
+        /// <summary> Usage information for tokens processed and generated as part of this completions operation. </summary>
+        public CompletionsUsage Usage { get; }
     }
 }
diff --git a/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_EmbeddingsClient.cs b/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_EmbeddingsClient.cs
deleted file mode 100644
index 1244fa042540..000000000000
--- a/sdk/ai/Azure.AI.Inference/tests/Generated/Samples/Samples_EmbeddingsClient.cs
+++ /dev/null
@@ -1,255 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-// <auto-generated/>
-
-#nullable disable
-
-using System;
-using System.Text.Json;
-using System.Threading.Tasks;
-using Azure.Core;
-using Azure.Identity;
-using NUnit.Framework;
-
-namespace Azure.AI.Inference.Samples
-{
-    public partial class Samples_EmbeddingsClient
-    {
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public void Example_Client2_Embed_MaximumSetEmbeddings()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            using RequestContent content = RequestContent.Create(new object());
-            Response response = client.Embed(content);
-
-            JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-            Console.WriteLine(result.GetProperty("id").ToString());
-            Console.WriteLine(result.GetProperty("data")[0].GetProperty("embedding").ToString());
-            Console.WriteLine(result.GetProperty("data")[0].GetProperty("index").ToString());
-            Console.WriteLine(result.GetProperty("usage").GetProperty("prompt_tokens").ToString());
-            Console.WriteLine(result.GetProperty("usage").GetProperty("total_tokens").ToString());
-            Console.WriteLine(result.GetProperty("model").ToString());
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client2_Embed_MaximumSetEmbeddings_Async()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            using RequestContent content = RequestContent.Create(new object());
-            Response response = await client.EmbedAsync(content);
-
-            JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-            Console.WriteLine(result.GetProperty("id").ToString());
-            Console.WriteLine(result.GetProperty("data")[0].GetProperty("embedding").ToString());
-            Console.WriteLine(result.GetProperty("data")[0].GetProperty("index").ToString());
-            Console.WriteLine(result.GetProperty("usage").GetProperty("prompt_tokens").ToString());
-            Console.WriteLine(result.GetProperty("usage").GetProperty("total_tokens").ToString());
-            Console.WriteLine(result.GetProperty("model").ToString());
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public void Example_Client2_Embed_MaximumSetEmbeddings_Convenience()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            EmbeddingsOptions embeddingsOptions = null;
-            Response<EmbeddingsResult> response = client.Embed(embeddingsOptions);
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client2_Embed_MaximumSetEmbeddings_Convenience_Async()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            EmbeddingsOptions embeddingsOptions = null;
-            Response<EmbeddingsResult> response = await client.EmbedAsync(embeddingsOptions);
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public void Example_Client2_Embed_MinimumSetEmbeddings()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            using RequestContent content = RequestContent.Create(new object());
-            Response response = client.Embed(content);
-
-            JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-            Console.WriteLine(result.GetProperty("id").ToString());
-            Console.WriteLine(result.GetProperty("data")[0].GetProperty("embedding").ToString());
-            Console.WriteLine(result.GetProperty("data")[0].GetProperty("index").ToString());
-            Console.WriteLine(result.GetProperty("usage").GetProperty("prompt_tokens").ToString());
-            Console.WriteLine(result.GetProperty("usage").GetProperty("total_tokens").ToString());
-            Console.WriteLine(result.GetProperty("model").ToString());
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client2_Embed_MinimumSetEmbeddings_Async()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            using RequestContent content = RequestContent.Create(new object());
-            Response response = await client.EmbedAsync(content);
-
-            JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-            Console.WriteLine(result.GetProperty("id").ToString());
-            Console.WriteLine(result.GetProperty("data")[0].GetProperty("embedding").ToString());
-            Console.WriteLine(result.GetProperty("data")[0].GetProperty("index").ToString());
-            Console.WriteLine(result.GetProperty("usage").GetProperty("prompt_tokens").ToString());
-            Console.WriteLine(result.GetProperty("usage").GetProperty("total_tokens").ToString());
-            Console.WriteLine(result.GetProperty("model").ToString());
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public void Example_Client2_Embed_MinimumSetEmbeddings_Convenience()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            EmbeddingsOptions embeddingsOptions = null;
-            Response<EmbeddingsResult> response = client.Embed(embeddingsOptions);
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client2_Embed_MinimumSetEmbeddings_Convenience_Async()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            EmbeddingsOptions embeddingsOptions = null;
-            Response<EmbeddingsResult> response = await client.EmbedAsync(embeddingsOptions);
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public void Example_Client2_GetModelInfo_MaximumSetModelInformation()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            Response response = client.GetModelInfo(null);
-
-            JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-            Console.WriteLine(result.GetProperty("model_name").ToString());
-            Console.WriteLine(result.GetProperty("model_type").ToString());
-            Console.WriteLine(result.GetProperty("model_provider_name").ToString());
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client2_GetModelInfo_MaximumSetModelInformation_Async()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            Response response = await client.GetModelInfoAsync(null);
-
-            JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-            Console.WriteLine(result.GetProperty("model_name").ToString());
-            Console.WriteLine(result.GetProperty("model_type").ToString());
-            Console.WriteLine(result.GetProperty("model_provider_name").ToString());
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public void Example_Client2_GetModelInfo_MaximumSetModelInformation_Convenience()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            Response<ModelInfo> response = client.GetModelInfo();
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client2_GetModelInfo_MaximumSetModelInformation_Convenience_Async()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            Response<ModelInfo> response = await client.GetModelInfoAsync();
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public void Example_Client2_GetModelInfo_MinimumSetModelInformation()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            Response response = client.GetModelInfo(null);
-
-            JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-            Console.WriteLine(result.GetProperty("model_name").ToString());
-            Console.WriteLine(result.GetProperty("model_type").ToString());
-            Console.WriteLine(result.GetProperty("model_provider_name").ToString());
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client2_GetModelInfo_MinimumSetModelInformation_Async()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            Response response = await client.GetModelInfoAsync(null);
-
-            JsonElement result = JsonDocument.Parse(response.ContentStream).RootElement;
-            Console.WriteLine(result.GetProperty("model_name").ToString());
-            Console.WriteLine(result.GetProperty("model_type").ToString());
-            Console.WriteLine(result.GetProperty("model_provider_name").ToString());
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public void Example_Client2_GetModelInfo_MinimumSetModelInformation_Convenience()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            Response<ModelInfo> response = client.GetModelInfo();
-        }
-
-        [Test]
-        [Ignore("Only validating compilation of examples")]
-        public async Task Example_Client2_GetModelInfo_MinimumSetModelInformation_Convenience_Async()
-        {
-            Uri endpoint = new Uri("<endpoint>");
-            AzureKeyCredential credential = new AzureKeyCredential("<key>");
-            EmbeddingsClient client = new EmbeddingsClient(endpoint, credential);
-
-            Response<ModelInfo> response = await client.GetModelInfoAsync();
-        }
-    }
-}
diff --git a/sdk/ai/Azure.AI.Inference/tsp-location.yaml b/sdk/ai/Azure.AI.Inference/tsp-location.yaml
index a65691285852..2a6c4e479e5a 100644
--- a/sdk/ai/Azure.AI.Inference/tsp-location.yaml
+++ b/sdk/ai/Azure.AI.Inference/tsp-location.yaml
@@ -1,3 +1,4 @@
 directory: specification/ai/ModelClient
-repo: trangevi/azure-rest-api-specs
-commit: aa97e50e44d2c620c92e5a4c791bb387a741d64c
+commit: cca025fa6ce483a4878daf0dd3082c97fd29c83b
+repo: Azure/azure-rest-api-specs
+additionalDirectories: