From a41e04ae55cb4eb4f266585aede96b996810ab92 Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Fri, 12 Apr 2024 09:46:22 -0400
Subject: [PATCH 01/19] start doc

---
 docs/ai/how-to/work-with-local-models.md | 113 +++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 docs/ai/how-to/work-with-local-models.md

diff --git a/docs/ai/how-to/work-with-local-models.md b/docs/ai/how-to/work-with-local-models.md
new file mode 100644
index 0000000000000..07bad679ec40d
--- /dev/null
+++ b/docs/ai/how-to/work-with-local-models.md
@@ -0,0 +1,113 @@
+---
+title: "Work with local models"
+description: "Demonstrates how to use custom or local models with the Semantic Kernel SDK for .NET."
+author: haywoodsloan
+ms.topic: how-to 
+ms.date: 04/11/2024
+
+#customer intent: As a .NET developer, I want use custom or local AI models with the Semantic Kernel SDK so that I can choose the model that's best for me.
+
+---
+
+<!-- --------------------------------------
+
+- Use this template with pattern instructions for:
+
+How To
+
+- Before you sign off or merge:
+
+Remove all comments except the customer intent.
+
+- Feedback:
+
+https://aka.ms/patterns-feedback
+
+-->
+
+# Work with local models
+
+<!-- Required: Article headline - H1
+
+Identify the product or service and the task the
+article describes.
+
+-->
+
+[Introduce and explain the purpose of the article.]
+
+<!-- Required: Introductory paragraphs (no heading)
+
+Write a brief introduction that can help the user
+determine whether the article is relevant for them
+and to describe the task the article covers.
+
+-->
+
+## Prerequisites
+
+<!-- Optional: Prerequisites - H2
+
+If included, "Prerequisites" must be the first H2 in the article.
+
+List any items that are needed for the integration,
+such as permissions or software.
+
+If you need to sign in to a portal to do the quickstart, 
+provide instructions and a link.
+
+-->
+
+## "[verb] * [noun]"
+
+[Introduce the procedure.]
+
+1. Procedure step
+1. Procedure step
+1. Procedure step
+
+<!-- Required: Steps to complete the task - H2
+
+In one or more H2 sections, organize procedures. A section
+contains a major grouping of steps that help the user complete
+a task.
+
+Begin each section with a brief explanation for context, and
+provide an ordered list of steps to complete the procedure.
+
+If it applies, provide sections that describe alternative tasks or
+procedures.
+
+-->
+
+## Next step -or- Related content
+
+> [!div class="nextstepaction"]
+> [Next sequential article title](link.md)
+
+-or-
+
+* [Related article title](link.md)
+* [Related article title](link.md)
+* [Related article title](link.md)
+
+<!-- Optional: Next step or Related content - H2
+
+Consider adding one of these H2 sections (not both):
+
+A "Next step" section that uses 1 link in a blue box 
+to point to a next, consecutive article in a sequence.
+
+-or- 
+
+A "Related content" section that lists links to 
+1 to 3 articles the user might find helpful.
+
+-->
+
+<!--
+
+Remove all comments except the customer intent
+before you sign off or merge to the main branch.
+
+-->

From 80b8b7e3f70f4bffbf54334dbc030e292fa6780d Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Fri, 12 Apr 2024 13:00:15 -0400
Subject: [PATCH 02/19] add sample code

---
 .../semantic-kernel/LocalModelExamples.cs     | 110 ++++++++++++++++++
 .../snippets/semantic-kernel/Program.cs       |   1 +
 .../semantic-kernel/semantic-kernel.csproj    |  15 +++
 .../services/MyChatCompletionService.cs       |  45 +++++++
 .../services/MyTextGenerationService.cs       |  44 +++++++
 docs/ai/how-to/work-with-local-models.md      |  22 +++-
 6 files changed, 231 insertions(+), 6 deletions(-)
 create mode 100644 docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
 create mode 100644 docs/ai/how-to/snippets/semantic-kernel/Program.cs
 create mode 100644 docs/ai/how-to/snippets/semantic-kernel/semantic-kernel.csproj
 create mode 100644 docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
 create mode 100644 docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs

diff --git a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
new file mode 100644
index 0000000000000..5396e976476ff
--- /dev/null
+++ b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
@@ -0,0 +1,110 @@
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.ChatCompletion;
+using Microsoft.SemanticKernel.TextGeneration;
+
+class LocalModelExamples
+{
+    public static async Task Examples()
+    {
+        AddTextGenerationServiceExample();
+        AddChatCompletionServiceExample();
+        await UseTextGenerationServiceExample();
+        await UseChatCompletionServiceExample();
+    }
+
+    static Kernel AddTextGenerationServiceExample()
+    {
+        // <addTextService>
+        IKernelBuilder builder = Kernel.CreateBuilder();
+
+        // Add your text generation service as a singleton instance
+        var service = new MyTextGenerationService();
+        builder.Services.AddKeyedSingleton<ITextGenerationService>("myTextService1", service);
+
+        // Alternatively, add your text generation service as a factory method
+        builder.Services.AddKeyedSingleton<ITextGenerationService>("myTextService2", (_, _) => new MyTextGenerationService());
+
+        // Add any other Kernel services or configurations
+        // ...
+        Kernel kernel = builder.Build();
+        // </addTextService>
+
+        return builder.Build();
+    }
+
+    static Kernel AddChatCompletionServiceExample()
+    {
+        // <addChatService>
+        IKernelBuilder builder = Kernel.CreateBuilder();
+
+        // Add your chat completion service as a singleton instance
+        var service = new MyChatCompletionService();
+        builder.Services.AddKeyedSingleton<IChatCompletionService>("myChatService1", service);
+
+        // Alternatively, add your chat completion service as a factory method
+        builder.Services.AddKeyedSingleton<IChatCompletionService>("myChatService2", (_, _) => new MyChatCompletionService());
+
+        // Add any other Kernel services or configurations
+        // ...
+        Kernel kernel = builder.Build();
+        // </addChatService>
+
+        return kernel;
+    }
+
+    static async Task UseTextGenerationServiceExample()
+    {
+        IKernelBuilder builder = Kernel.CreateBuilder();
+        builder.Services.AddKeyedSingleton<ITextGenerationService>("myTextService", new MyTextGenerationService());
+        Kernel kernel = builder.Build();
+
+        // <useTextService>
+        var executionSettings = new PromptExecutionSettings
+        {
+            // Add execution settings, such as the ModelID and ExtensionData
+        };
+
+        // Send a prompt to your model directly through the Kernel
+        string prompt = "Please list three services offered by Azure";
+        string? response = await kernel.InvokePromptAsync<string>(prompt);
+        Console.WriteLine($"Output: {response}");
+
+        // Alteratively, send a prompt to your model through the text generation service
+        ITextGenerationService textService = kernel.GetRequiredService<ITextGenerationService>();
+        TextContent responseContents = await textService.GetTextContentAsync(prompt, executionSettings);
+        Console.WriteLine($"Output: {responseContents.Text}");
+        // </useTextService>
+    }
+
+    static async Task UseChatCompletionServiceExample()
+    {
+        IKernelBuilder builder = Kernel.CreateBuilder();
+        builder.Services.AddKeyedSingleton<IChatCompletionService>("myChatService", new MyChatCompletionService());
+        Kernel kernel = builder.Build();
+
+        // <useChatService>
+        var executionSettings = new PromptExecutionSettings
+        {
+            // Add execution settings, such as the ModelID and ExtensionData
+        };
+
+        // Initialize a chat history with your initial system message
+        string systemMessage = "<the initial system message for your chat history>";
+        Console.WriteLine($"System Prompt: {systemMessage}");
+        var chatHistory = new ChatHistory(systemMessage);
+
+        // Add the user's input to your chat history
+        string userGreeting = "<the user's initial greeting message>";
+        Console.WriteLine($"User: {userGreeting}");
+        chatHistory.AddUserMessage(userGreeting);
+
+        // Send the chat history to your model through the chat completion service
+        // Add the model's response to the chat history
+        IChatCompletionService service = kernel.GetRequiredService<IChatCompletionService>();
+        ChatMessageContent response = await service.GetChatMessageContentAsync(chatHistory, executionSettings);
+        Console.WriteLine($"Assistant: {response.Content}");
+        chatHistory.Add(response);
+        // </useChatService>
+    }
+}
\ No newline at end of file
diff --git a/docs/ai/how-to/snippets/semantic-kernel/Program.cs b/docs/ai/how-to/snippets/semantic-kernel/Program.cs
new file mode 100644
index 0000000000000..92a1944b44964
--- /dev/null
+++ b/docs/ai/how-to/snippets/semantic-kernel/Program.cs
@@ -0,0 +1 @@
+﻿await LocalModelExamples.Examples();
\ No newline at end of file
diff --git a/docs/ai/how-to/snippets/semantic-kernel/semantic-kernel.csproj b/docs/ai/how-to/snippets/semantic-kernel/semantic-kernel.csproj
new file mode 100644
index 0000000000000..98a64803401ba
--- /dev/null
+++ b/docs/ai/how-to/snippets/semantic-kernel/semantic-kernel.csproj
@@ -0,0 +1,15 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <RootNamespace>semantic_kernel</RootNamespace>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.SemanticKernel" Version="1.7.1" />
+  </ItemGroup>
+
+</Project>
diff --git a/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs b/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
new file mode 100644
index 0000000000000..629bea7496ebe
--- /dev/null
+++ b/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
@@ -0,0 +1,45 @@
+using System.Runtime.CompilerServices;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.ChatCompletion;
+
+// <service>
+class MyChatCompletionService : IChatCompletionService
+{
+    public IReadOnlyDictionary<string, object?> Attributes => new Dictionary<string, object?>();
+
+    public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
+    {
+        string myModelOutput = await Task.Run(() =>
+        {
+            // Access your model, send the chat history, and receive the output
+            // ...
+            return "<placeholder for your model's output text>";
+        }, cancellationToken);
+
+        // Return your models output as an assistant message
+        return 
+        [
+            new(AuthorRole.Assistant, myModelOutput)
+            // Include any additional outputs from your model in this list
+            // ...
+        ];
+    }
+
+    public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        string myModelOutput = await Task.Run(() =>
+        {
+            // Access your model, send the chat history, and receive the output
+            // ...
+            return "<placeholder for your model's output text>";
+        }, cancellationToken);
+
+
+        foreach (string word in myModelOutput.Split(' ', StringSplitOptions.RemoveEmptyEntries))
+        {
+            cancellationToken.ThrowIfCancellationRequested();
+            yield return new StreamingChatMessageContent(AuthorRole.Assistant, $"{word} ");
+        }
+    }
+}
+// </service>
\ No newline at end of file
diff --git a/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs b/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
new file mode 100644
index 0000000000000..036ba524beae3
--- /dev/null
+++ b/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
@@ -0,0 +1,44 @@
+using System.Runtime.CompilerServices;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.Services;
+using Microsoft.SemanticKernel.TextGeneration;
+
+// <service>
+class MyTextGenerationService : ITextGenerationService
+{
+    public IReadOnlyDictionary<string, object?> Attributes => new Dictionary<string, object?>();
+
+    public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        string myModelOutput = await Task.Run(() =>
+        {
+            // Access your model, send the prompt, and receive the output
+            // ...
+            return "<placeholder for your model's output text>";
+        }, cancellationToken);
+
+        foreach (string word in myModelOutput.Split(' ', StringSplitOptions.RemoveEmptyEntries))
+        {
+            cancellationToken.ThrowIfCancellationRequested();
+            yield return new StreamingTextContent($"{word} ");
+        }
+    }
+
+    public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
+    {
+        string myModelOutput = await Task.Run(() =>
+        {
+            // Access your model, send the prompt, and receive the output
+            // ...
+            return "<placeholder for your model's output text>";
+        }, cancellationToken);
+
+        return
+        [
+            new(myModelOutput)
+            // Include any additional outputs from your model in this list
+            // ...
+        ];
+    }
+}
+// </service>
\ No newline at end of file
diff --git a/docs/ai/how-to/work-with-local-models.md b/docs/ai/how-to/work-with-local-models.md
index 07bad679ec40d..fda38e08b0d7b 100644
--- a/docs/ai/how-to/work-with-local-models.md
+++ b/docs/ai/how-to/work-with-local-models.md
@@ -34,7 +34,9 @@ article describes.
 
 -->
 
-[Introduce and explain the purpose of the article.]
+This article demonstrates how to integrate a local or custom model into the [Semantic Kernel SDK](/semantic-kernel/overview).
+
+Integrating a custom model enables the Semantic Kernel SDK to utilize any model you have available, regardless of where or how you access the model.
 
 <!-- Required: Introductory paragraphs (no heading)
 
@@ -46,6 +48,11 @@ and to describe the task the article covers.
 
 ## Prerequisites
 
+* An Azure account with an active subscription. [Create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F).
+* [.NET SDK](https://dotnet.microsoft.com/download/visual-studio-sdks)
+* [`Microsoft.SemanticKernel` NuGet package](https://www.nuget.org/packages/Microsoft.SemanticKernel)
+* Deploy your model, it should be accessible to your .NET application
+
 <!-- Optional: Prerequisites - H2
 
 If included, "Prerequisites" must be the first H2 in the article.
@@ -58,13 +65,16 @@ provide instructions and a link.
 
 -->
 
-## "[verb] * [noun]"
+## Implement prompt completion using a local model
+
+The following section shows how to integration your model with the Semantic Kernel SDK for prompt completions.
+
+1. Create a service class that implements the `ITextGenerationService` interface. For example:
 
-[Introduce the procedure.]
+    :::code language="csharp" source="./snippets/semantic-kernel/services/MyTextGenerationService.cs" id="service":::
 
-1. Procedure step
-1. Procedure step
-1. Procedure step
+2. Procedure step
+3. Procedure step
 
 <!-- Required: Steps to complete the task - H2
 

From 33828821a65c83aa6d9a82fbd39d42cd581a4b67 Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Fri, 12 Apr 2024 15:25:18 -0400
Subject: [PATCH 03/19] add invoke prompt example for chat completion

---
 .../semantic-kernel/LocalModelExamples.cs     | 31 +++++++++++++------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
index 5396e976476ff..b93720209d320 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
@@ -89,22 +89,35 @@ static async Task UseChatCompletionServiceExample()
             // Add execution settings, such as the ModelID and ExtensionData
         };
 
-        // Initialize a chat history with your initial system message
+        // Send a string representation of the chat history to your model directly through the Kernel
+        // This uses a special syntax to denote the role for each message
+        // For more information on this syntax see https://learn.microsoft.com/en-us/semantic-kernel/prompts/your-first-prompt?tabs=Csharp#6-using-message-roles-in-chat-completion-prompts
+        string prompt = @"
+        <message role=""system"">the initial system message for your chat history</message>
+        <message role=""user"">the user's initial message</message>";
+
+        string? response = await kernel.InvokePromptAsync<string>(prompt);
+        Console.WriteLine($"Output: {response}");
+
+        // Alteratively, send a prompt to your model through the chat completion service
+        // First, initialize a chat history with your initial system message
         string systemMessage = "<the initial system message for your chat history>";
         Console.WriteLine($"System Prompt: {systemMessage}");
         var chatHistory = new ChatHistory(systemMessage);
 
         // Add the user's input to your chat history
-        string userGreeting = "<the user's initial greeting message>";
-        Console.WriteLine($"User: {userGreeting}");
-        chatHistory.AddUserMessage(userGreeting);
+        string userRequest = "<the user's initial message>";
+        Console.WriteLine($"User: {userRequest}");
+        chatHistory.AddUserMessage(userRequest);
 
-        // Send the chat history to your model through the chat completion service
-        // Add the model's response to the chat history
+        // Get the models response and add it to the chat history
         IChatCompletionService service = kernel.GetRequiredService<IChatCompletionService>();
-        ChatMessageContent response = await service.GetChatMessageContentAsync(chatHistory, executionSettings);
-        Console.WriteLine($"Assistant: {response.Content}");
-        chatHistory.Add(response);
+        ChatMessageContent responseMessage = await service.GetChatMessageContentAsync(chatHistory, executionSettings);
+        Console.WriteLine($"Assistant: {responseMessage.Content}");
+        chatHistory.Add(responseMessage);
+
+        // Continue sending and receiving messages between the user and model
+        // ...
         // </useChatService>
     }
 }
\ No newline at end of file

From a1a9017dd410bb2326a0fccfa49149e384857ab4 Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Fri, 12 Apr 2024 15:51:09 -0400
Subject: [PATCH 04/19] finish first draft

---
 docs/ai/how-to/work-with-local-models.md | 96 ++++--------------------
 1 file changed, 16 insertions(+), 80 deletions(-)

diff --git a/docs/ai/how-to/work-with-local-models.md b/docs/ai/how-to/work-with-local-models.md
index fda38e08b0d7b..0c3488144c220 100644
--- a/docs/ai/how-to/work-with-local-models.md
+++ b/docs/ai/how-to/work-with-local-models.md
@@ -9,43 +9,12 @@ ms.date: 04/11/2024
 
 ---
 
-<!-- --------------------------------------
-
-- Use this template with pattern instructions for:
-
-How To
-
-- Before you sign off or merge:
-
-Remove all comments except the customer intent.
-
-- Feedback:
-
-https://aka.ms/patterns-feedback
-
--->
-
 # Work with local models
 
-<!-- Required: Article headline - H1
-
-Identify the product or service and the task the
-article describes.
-
--->
-
 This article demonstrates how to integrate a local or custom model into the [Semantic Kernel SDK](/semantic-kernel/overview).
 
 Integrating a custom model enables the Semantic Kernel SDK to utilize any model you have available, regardless of where or how you access the model.
 
-<!-- Required: Introductory paragraphs (no heading)
-
-Write a brief introduction that can help the user
-determine whether the article is relevant for them
-and to describe the task the article covers.
-
--->
-
 ## Prerequisites
 
 * An Azure account with an active subscription. [Create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F).
@@ -53,71 +22,38 @@ and to describe the task the article covers.
 * [`Microsoft.SemanticKernel` NuGet package](https://www.nuget.org/packages/Microsoft.SemanticKernel)
 * Deploy your model, it should be accessible to your .NET application
 
-<!-- Optional: Prerequisites - H2
-
-If included, "Prerequisites" must be the first H2 in the article.
-
-List any items that are needed for the integration,
-such as permissions or software.
-
-If you need to sign in to a portal to do the quickstart, 
-provide instructions and a link.
-
--->
+## Implement text generation using a local model
 
-## Implement prompt completion using a local model
-
-The following section shows how to integration your model with the Semantic Kernel SDK for prompt completions.
+The following section shows how to integrate your model with the Semantic Kernel SDK and use it for text generation.
 
 1. Create a service class that implements the `ITextGenerationService` interface. For example:
 
     :::code language="csharp" source="./snippets/semantic-kernel/services/MyTextGenerationService.cs" id="service":::
 
-2. Procedure step
-3. Procedure step
-
-<!-- Required: Steps to complete the task - H2
-
-In one or more H2 sections, organize procedures. A section
-contains a major grouping of steps that help the user complete
-a task.
-
-Begin each section with a brief explanation for context, and
-provide an ordered list of steps to complete the procedure.
-
-If it applies, provide sections that describe alternative tasks or
-procedures.
-
--->
+2. Include the new service class when building the `Kernel`. For example:
 
-## Next step -or- Related content
+    :::code language="csharp" source="./snippets/semantic-kernel/LocalModelExamples.cs" id="addTextService":::
 
-> [!div class="nextstepaction"]
-> [Next sequential article title](link.md)
+3. Send a text generation prompt to your model directly through the `Kernel` or using the service class. For example:
 
--or-
+    :::code language="csharp" source="./snippets/semantic-kernel/LocalModelExamples.cs" id="useTextService":::
 
-* [Related article title](link.md)
-* [Related article title](link.md)
-* [Related article title](link.md)
+## Implement chat completion using a local model
 
-<!-- Optional: Next step or Related content - H2
+The following section shows how to integrate your model with the Semantic Kernel SDK and use it for chat completion.
 
-Consider adding one of these H2 sections (not both):
+1. Create a service class that implements the `IChatCompletionService` interface. For example:
 
-A "Next step" section that uses 1 link in a blue box 
-to point to a next, consecutive article in a sequence.
+    :::code language="csharp" source="./snippets/semantic-kernel/services/MyChatCompletionService.cs" id="service":::
 
--or- 
+2. Include the new service class when building the `Kernel`. For example:
 
-A "Related content" section that lists links to 
-1 to 3 articles the user might find helpful.
+    :::code language="csharp" source="./snippets/semantic-kernel/LocalModelExamples.cs" id="addChatService":::
 
--->
+3. Send a chat completion prompt to your model directly through the `Kernel` or using the service class. For example:
 
-<!--
+    :::code language="csharp" source="./snippets/semantic-kernel/LocalModelExamples.cs" id="useChatService":::
 
-Remove all comments except the customer intent
-before you sign off or merge to the main branch.
+## Related content
 
--->
+* [What is Semantic Kernel](/semantic-kernel/overview/)

From 0630b129f980a3a92afdb054a4bc3b92a249cc4a Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Fri, 12 Apr 2024 15:53:24 -0400
Subject: [PATCH 05/19] some rewording

---
 docs/ai/how-to/work-with-local-models.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/ai/how-to/work-with-local-models.md b/docs/ai/how-to/work-with-local-models.md
index 0c3488144c220..42f154d867d1d 100644
--- a/docs/ai/how-to/work-with-local-models.md
+++ b/docs/ai/how-to/work-with-local-models.md
@@ -5,7 +5,7 @@ author: haywoodsloan
 ms.topic: how-to 
 ms.date: 04/11/2024
 
-#customer intent: As a .NET developer, I want use custom or local AI models with the Semantic Kernel SDK so that I can choose the model that's best for me.
+#customer intent: As a .NET developer, I want use custom or local AI models with the Semantic Kernel SDK so that I can use any model available to me.
 
 ---
 
@@ -13,7 +13,7 @@ ms.date: 04/11/2024
 
 This article demonstrates how to integrate a local or custom model into the [Semantic Kernel SDK](/semantic-kernel/overview).
 
-Integrating a custom model enables the Semantic Kernel SDK to utilize any model you have available, regardless of where or how you access the model.
+Integrating a custom model enables the Semantic Kernel SDK to utilize any model you have available, regardless of where or how you access it.
 
 ## Prerequisites
 

From 0244f653290d73cf9f932a8c38969d6f5214c17e Mon Sep 17 00:00:00 2001
From: Sloan Haywood <haywood.sloan@yahoo.com>
Date: Sat, 13 Apr 2024 00:30:36 -0400
Subject: [PATCH 06/19] Use a lazy-init pattern for the Attributes property

---
 .../semantic-kernel/services/MyChatCompletionService.cs        | 3 ++-
 .../semantic-kernel/services/MyTextGenerationService.cs        | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs b/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
index 629bea7496ebe..c4ac1bfd60f15 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
@@ -5,7 +5,8 @@
 // <service>
 class MyChatCompletionService : IChatCompletionService
 {
-    public IReadOnlyDictionary<string, object?> Attributes => new Dictionary<string, object?>();
+    private IReadOnlyDictionary<string, object?>? _attributes;
+    public IReadOnlyDictionary<string, object?> Attributes => _attributes ??= new Dictionary<string, object?>();
 
     public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
     {
diff --git a/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs b/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
index 036ba524beae3..879aea9417f30 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
@@ -6,7 +6,8 @@
 // <service>
 class MyTextGenerationService : ITextGenerationService
 {
-    public IReadOnlyDictionary<string, object?> Attributes => new Dictionary<string, object?>();
+    private IReadOnlyDictionary<string, object?>? _attributes;
+    public IReadOnlyDictionary<string, object?> Attributes => _attributes ??= new Dictionary<string, object?>();
 
     public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {

From 2925c10074bf08527db25bce7e69353f5c4189bb Mon Sep 17 00:00:00 2001
From: Steven Thomas <stthomas@aquent.com>
Date: Sat, 13 Apr 2024 12:33:30 -0700
Subject: [PATCH 07/19] revised metadata and content to better match guidelines

---
 docs/ai/conceptual/embeddings.md | 64 ++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 docs/ai/conceptual/embeddings.md

diff --git a/docs/ai/conceptual/embeddings.md b/docs/ai/conceptual/embeddings.md
new file mode 100644
index 0000000000000..3a487c7fdf1b0
--- /dev/null
+++ b/docs/ai/conceptual/embeddings.md
@@ -0,0 +1,64 @@
+---
+title: "Using Embeddings for Semantic Analysis - .NET"
+description: "Learn how embeddings enable AI models to perform semantic analysis in .NET."
+author: catbutler
+ms.topic: concept-article #Don't change.
+ms.date: 04/10/2024
+
+#customer intent: As a .NET developer, I want to understand how embeddings enable semantic analysis and when to use them in .NET .
+
+---
+
+# Embeddings in .NET
+
+This article explains how embeddings work in .NET.
+
+Embeddings are numeric representations of non-numeric data. You can use embeddings to help an AI model understand the semantic value of inputs so it can perform various comparisons and transformations, such as summarizing text or creating images from text descriptions.
+
+## Generate, store and process embeddings
+
+This section describes how embeddings work in Semantic Kernel and Azure OpenAI.
+
+Embeddings help you to address LLMs' limited input capacity. LLMs limit the number of tokens per input. If you try to include more than the limit in one input, the model will ignore some or all of that input. Some LLMs also feature quota systems, another reason to watch your token counts.
+
+An embedding is a semantic value that's encoded numerically, so an LLM can compare it to other embeddings that occupy the same dimensions. Two embeddings with very similar vectors also have very similar semantic values, regardless of the format or amount of raw data they represent. For example, a course syllabus and a textbook for that course have very similar meanings.
+
+### An embedding model generates embeddings
+
+You use an embedding model to generate embeddings for your raw data. An embedding model is an AI model that can encode a piece of non-numeric data into a long array of numbers: a vector embedding. The model can also decode an embedding into a piece of non-numeric data that has the same meaning as the original, raw data.
+
+In Semantic Kernel&mdash;an SDK&mdash;you create a [plugin](/semantic-kernel/agents/plugins/?tabs=Csharp) to work with an embedding model. In Azure OpenAI&mdash;a AI service&mdash; you already have access to [embedding models](/azure/ai-services/openai/concepts/models#embeddings).
+
+### Store and process embeddings in a vector database
+
+[Vector databases](vector-dbs.md) are designed to store vectors, so they're a natural home for embeddings. Different vector databases offer different processing capabilities, so you should choose one based on your raw data and your goals.
+
+## Use cases for embeddings
+
+This section lists the main use cases for embeddings.
+
+### Increase the amount of text a model will process
+
+Use embeddings to increase the amount of text a model will process, such as during [prompt engineering](prompt-engineering-in-dot-net.md).
+
+For example, suppose you want to include 500 pages of text in a prompt. The number of tokens for that much raw text will exceed the input token limit, making it impossible to include in a prompt. You can use embeddings to summarize and break down large amounts of that text into pieces that are small enough to fit in one input, and then assess the similarity of each piece to the entire raw text. Then you can choose a piece that best preserves the semantic meaning of the raw text and use it in your prompt without hitting the token limit.
+
+### More relevant and coherent text generation
+
+Use embeddings to help models generate more relevant and coherent text. For example, embeddings can help the model to generate better stories, poems, jokes, slogans, captions, newsletters, and the like.
+
+### Perform text classification, summarization, or translation
+
+Use embeddings to help a model understand the meaning and context of text, and then classify, summarize, or translate that text. For example, you can use embeddings to help models classify texts as positive or negative, spam or not spam, news or opinion, etc.
+
+### Turn text into images or images into text
+
+Use embeddings to help a model create images from text or vice versa, by converting different types of data into a common representation (that is, vectors). For example, you can use embeddings to help a model generate or describe images such as logos, faces, animals, and landscapes.
+
+### Generate or document code
+
+Use embeddings to help a model create code from text or vice versa, by converting different code or text expressions into a common representation. For example, you can use embeddings to help a model generate or document code in C# or Python.
+
+## Related content
+
+- [Retrieval augmented generation](retrieval-augmented-generation.md)

From 56cac784326c62754aabd8c3fc8864bef7ba93f7 Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Mon, 15 Apr 2024 10:34:24 -0400
Subject: [PATCH 08/19] Revert "revised metadata and content to better match
 guidelines"

This reverts commit 2925c10074bf08527db25bce7e69353f5c4189bb.
---
 docs/ai/conceptual/embeddings.md | 64 --------------------------------
 1 file changed, 64 deletions(-)
 delete mode 100644 docs/ai/conceptual/embeddings.md

diff --git a/docs/ai/conceptual/embeddings.md b/docs/ai/conceptual/embeddings.md
deleted file mode 100644
index 3a487c7fdf1b0..0000000000000
--- a/docs/ai/conceptual/embeddings.md
+++ /dev/null
@@ -1,64 +0,0 @@
----
-title: "Using Embeddings for Semantic Analysis - .NET"
-description: "Learn how embeddings enable AI models to perform semantic analysis in .NET."
-author: catbutler
-ms.topic: concept-article #Don't change.
-ms.date: 04/10/2024
-
-#customer intent: As a .NET developer, I want to understand how embeddings enable semantic analysis and when to use them in .NET .
-
----
-
-# Embeddings in .NET
-
-This article explains how embeddings work in .NET.
-
-Embeddings are numeric representations of non-numeric data. You can use embeddings to help an AI model understand the semantic value of inputs so it can perform various comparisons and transformations, such as summarizing text or creating images from text descriptions.
-
-## Generate, store and process embeddings
-
-This section describes how embeddings work in Semantic Kernel and Azure OpenAI.
-
-Embeddings help you to address LLMs' limited input capacity. LLMs limit the number of tokens per input. If you try to include more than the limit in one input, the model will ignore some or all of that input. Some LLMs also feature quota systems, another reason to watch your token counts.
-
-An embedding is a semantic value that's encoded numerically, so an LLM can compare it to other embeddings that occupy the same dimensions. Two embeddings with very similar vectors also have very similar semantic values, regardless of the format or amount of raw data they represent. For example, a course syllabus and a textbook for that course have very similar meanings.
-
-### An embedding model generates embeddings
-
-You use an embedding model to generate embeddings for your raw data. An embedding model is an AI model that can encode a piece of non-numeric data into a long array of numbers: a vector embedding. The model can also decode an embedding into a piece of non-numeric data that has the same meaning as the original, raw data.
-
-In Semantic Kernel&mdash;an SDK&mdash;you create a [plugin](/semantic-kernel/agents/plugins/?tabs=Csharp) to work with an embedding model. In Azure OpenAI&mdash;a AI service&mdash; you already have access to [embedding models](/azure/ai-services/openai/concepts/models#embeddings).
-
-### Store and process embeddings in a vector database
-
-[Vector databases](vector-dbs.md) are designed to store vectors, so they're a natural home for embeddings. Different vector databases offer different processing capabilities, so you should choose one based on your raw data and your goals.
-
-## Use cases for embeddings
-
-This section lists the main use cases for embeddings.
-
-### Increase the amount of text a model will process
-
-Use embeddings to increase the amount of text a model will process, such as during [prompt engineering](prompt-engineering-in-dot-net.md).
-
-For example, suppose you want to include 500 pages of text in a prompt. The number of tokens for that much raw text will exceed the input token limit, making it impossible to include in a prompt. You can use embeddings to summarize and break down large amounts of that text into pieces that are small enough to fit in one input, and then assess the similarity of each piece to the entire raw text. Then you can choose a piece that best preserves the semantic meaning of the raw text and use it in your prompt without hitting the token limit.
-
-### More relevant and coherent text generation
-
-Use embeddings to help models generate more relevant and coherent text. For example, embeddings can help the model to generate better stories, poems, jokes, slogans, captions, newsletters, and the like.
-
-### Perform text classification, summarization, or translation
-
-Use embeddings to help a model understand the meaning and context of text, and then classify, summarize, or translate that text. For example, you can use embeddings to help models classify texts as positive or negative, spam or not spam, news or opinion, etc.
-
-### Turn text into images or images into text
-
-Use embeddings to help a model create images from text or vice versa, by converting different types of data into a common representation (that is, vectors). For example, you can use embeddings to help a model generate or describe images such as logos, faces, animals, and landscapes.
-
-### Generate or document code
-
-Use embeddings to help a model create code from text or vice versa, by converting different code or text expressions into a common representation. For example, you can use embeddings to help a model generate or document code in C# or Python.
-
-## Related content
-
-- [Retrieval augmented generation](retrieval-augmented-generation.md)

From 48ff0124f35c5e98798ae1a4ceeddd7e8e85a682 Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Mon, 15 Apr 2024 10:58:45 -0400
Subject: [PATCH 09/19] pr feedback

---
 docs/ai/how-to/work-with-local-models.md | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/docs/ai/how-to/work-with-local-models.md b/docs/ai/how-to/work-with-local-models.md
index 42f154d867d1d..f5c1a43620a0e 100644
--- a/docs/ai/how-to/work-with-local-models.md
+++ b/docs/ai/how-to/work-with-local-models.md
@@ -1,30 +1,30 @@
 ---
-title: "Work with local models"
-description: "Demonstrates how to use custom or local models with the Semantic Kernel SDK for .NET."
+title: "Use Custom and Local AI Models with the Semantic Kernel SDK for .NET"
+description: "Learn how to use custom or local models for text generation and chat completions in Semantic Kernel SDK for .NET."
 author: haywoodsloan
 ms.topic: how-to 
 ms.date: 04/11/2024
 
-#customer intent: As a .NET developer, I want use custom or local AI models with the Semantic Kernel SDK so that I can use any model available to me.
+#customer intent: As a .NET developer, I want to use custom or local AI models with the Semantic Kernel SDK so that I can perform text generation and chat completions using any model available to me.
 
 ---
 
-# Work with local models
+# Use Custom and Local AI Models with the Semantic Kernel SDK
 
-This article demonstrates how to integrate a local or custom model into the [Semantic Kernel SDK](/semantic-kernel/overview).
+This article demonstrates how to integrate custom and local models into the [Semantic Kernel SDK](/semantic-kernel/overview) and use them for text generation and chat completions.
 
-Integrating a custom model enables the Semantic Kernel SDK to utilize any model you have available, regardless of where or how you access it.
+You can adapt the steps to use them with any model that you can access, regardless of where or how you access it. For example, you can integrate the [codellama](https://ollama.com/library/codellama) model with the Semantic Kernel SDK to enable code generation and discussion.
 
 ## Prerequisites
 
 * An Azure account with an active subscription. [Create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F).
 * [.NET SDK](https://dotnet.microsoft.com/download/visual-studio-sdks)
 * [`Microsoft.SemanticKernel` NuGet package](https://www.nuget.org/packages/Microsoft.SemanticKernel)
-* Deploy your model, it should be accessible to your .NET application
+* A custom or local model, deployed and accessible to your .NET application
 
 ## Implement text generation using a local model
 
-The following section shows how to integrate your model with the Semantic Kernel SDK and use it for text generation.
+The following section shows how you can integrate your model with the Semantic Kernel SDK and then use it to generate text completions.
 
 1. Create a service class that implements the `ITextGenerationService` interface. For example:
 
@@ -40,7 +40,7 @@ The following section shows how to integrate your model with the Semantic Kernel
 
 ## Implement chat completion using a local model
 
-The following section shows how to integrate your model with the Semantic Kernel SDK and use it for chat completion.
+The following section shows how you can integrate your model with the Semantic Kernel SDK and then use it for chat completions.
 
 1. Create a service class that implements the `IChatCompletionService` interface. For example:
 
@@ -57,3 +57,4 @@ The following section shows how to integrate your model with the Semantic Kernel
 ## Related content
 
 * [What is Semantic Kernel](/semantic-kernel/overview/)
+* [Understanding AI plugins in Semantic Kernel](/semantic-kernel/agents/plugins/?tabs=Csharp)

From 26c3037d047002d06679518ffa109ce40497f3c1 Mon Sep 17 00:00:00 2001
From: Steven Thomas <stthomas@aquent.com>
Date: Sat, 13 Apr 2024 12:33:30 -0700
Subject: [PATCH 10/19] revised metadata and content to better match guidelines

---
 docs/ai/conceptual/embeddings.md | 64 ++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 docs/ai/conceptual/embeddings.md

diff --git a/docs/ai/conceptual/embeddings.md b/docs/ai/conceptual/embeddings.md
new file mode 100644
index 0000000000000..3a487c7fdf1b0
--- /dev/null
+++ b/docs/ai/conceptual/embeddings.md
@@ -0,0 +1,64 @@
+---
+title: "Using Embeddings for Semantic Analysis - .NET"
+description: "Learn how embeddings enable AI models to perform semantic analysis in .NET."
+author: catbutler
+ms.topic: concept-article #Don't change.
+ms.date: 04/10/2024
+
+#customer intent: As a .NET developer, I want to understand how embeddings enable semantic analysis and when to use them in .NET .
+
+---
+
+# Embeddings in .NET
+
+This article explains how embeddings work in .NET.
+
+Embeddings are numeric representations of non-numeric data. You can use embeddings to help an AI model understand the semantic value of inputs so it can perform various comparisons and transformations, such as summarizing text or creating images from text descriptions.
+
+## Generate, store and process embeddings
+
+This section describes how embeddings work in Semantic Kernel and Azure OpenAI.
+
+Embeddings help you to address LLMs' limited input capacity. LLMs limit the number of tokens per input. If you try to include more than the limit in one input, the model will ignore some or all of that input. Some LLMs also feature quota systems, another reason to watch your token counts.
+
+An embedding is a semantic value that's encoded numerically, so an LLM can compare it to other embeddings that occupy the same dimensions. Two embeddings with very similar vectors also have very similar semantic values, regardless of the format or amount of raw data they represent. For example, a course syllabus and a textbook for that course have very similar meanings.
+
+### An embedding model generates embeddings
+
+You use an embedding model to generate embeddings for your raw data. An embedding model is an AI model that can encode a piece of non-numeric data into a long array of numbers: a vector embedding. The model can also decode an embedding into a piece of non-numeric data that has the same meaning as the original, raw data.
+
+In Semantic Kernel&mdash;an SDK&mdash;you create a [plugin](/semantic-kernel/agents/plugins/?tabs=Csharp) to work with an embedding model. In Azure OpenAI&mdash;a AI service&mdash; you already have access to [embedding models](/azure/ai-services/openai/concepts/models#embeddings).
+
+### Store and process embeddings in a vector database
+
+[Vector databases](vector-dbs.md) are designed to store vectors, so they're a natural home for embeddings. Different vector databases offer different processing capabilities, so you should choose one based on your raw data and your goals.
+
+## Use cases for embeddings
+
+This section lists the main use cases for embeddings.
+
+### Increase the amount of text a model will process
+
+Use embeddings to increase the amount of text a model will process, such as during [prompt engineering](prompt-engineering-in-dot-net.md).
+
+For example, suppose you want to include 500 pages of text in a prompt. The number of tokens for that much raw text will exceed the input token limit, making it impossible to include in a prompt. You can use embeddings to summarize and break down large amounts of that text into pieces that are small enough to fit in one input, and then assess the similarity of each piece to the entire raw text. Then you can choose a piece that best preserves the semantic meaning of the raw text and use it in your prompt without hitting the token limit.
+
+### More relevant and coherent text generation
+
+Use embeddings to help models generate more relevant and coherent text. For example, embeddings can help the model to generate better stories, poems, jokes, slogans, captions, newsletters, and the like.
+
+### Perform text classification, summarization, or translation
+
+Use embeddings to help a model understand the meaning and context of text, and then classify, summarize, or translate that text. For example, you can use embeddings to help models classify texts as positive or negative, spam or not spam, news or opinion, etc.
+
+### Turn text into images or images into text
+
+Use embeddings to help a model create images from text or vice versa, by converting different types of data into a common representation (that is, vectors). For example, you can use embeddings to help a model generate or describe images such as logos, faces, animals, and landscapes.
+
+### Generate or document code
+
+Use embeddings to help a model create code from text or vice versa, by converting different code or text expressions into a common representation. For example, you can use embeddings to help a model generate or document code in C# or Python.
+
+## Related content
+
+- [Retrieval augmented generation](retrieval-augmented-generation.md)

From 06d1d9382bd34bc5e0ef29d02f3b9f95522ece0c Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Mon, 15 Apr 2024 12:09:15 -0400
Subject: [PATCH 11/19] update code samples to show more mock code around the
 model interactions

---
 .../semantic-kernel/LocalModelExamples.cs     |  4 ++
 .../snippets/semantic-kernel/model/MyModel.cs | 21 ++++++++++
 .../semantic-kernel/model/MyModelRequest.cs   | 27 +++++++++++++
 .../semantic-kernel/model/MyModelResponse.cs  |  5 +++
 .../services/MyChatCompletionService.cs       | 38 ++++++++-----------
 .../services/MyTextGenerationService.cs       | 37 ++++++++----------
 6 files changed, 89 insertions(+), 43 deletions(-)
 create mode 100644 docs/ai/how-to/snippets/semantic-kernel/model/MyModel.cs
 create mode 100644 docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs
 create mode 100644 docs/ai/how-to/snippets/semantic-kernel/model/MyModelResponse.cs

diff --git a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
index b93720209d320..a3a9d6469a150 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
@@ -63,6 +63,8 @@ static async Task UseTextGenerationServiceExample()
         var executionSettings = new PromptExecutionSettings
         {
             // Add execution settings, such as the ModelID and ExtensionData
+            ModelId = "MyModelId",
+            ExtensionData = new Dictionary<string, object> { { "MaxTokens", 500 } }
         };
 
         // Send a prompt to your model directly through the Kernel
@@ -87,6 +89,8 @@ static async Task UseChatCompletionServiceExample()
         var executionSettings = new PromptExecutionSettings
         {
             // Add execution settings, such as the ModelID and ExtensionData
+            ModelId = "MyModelId",
+            ExtensionData = new Dictionary<string, object> { { "MaxTokens", 500 } }
         };
 
         // Send a string representation of the chat history to your model directly through the Kernel
diff --git a/docs/ai/how-to/snippets/semantic-kernel/model/MyModel.cs b/docs/ai/how-to/snippets/semantic-kernel/model/MyModel.cs
new file mode 100644
index 0000000000000..f17a74f6ec401
--- /dev/null
+++ b/docs/ai/how-to/snippets/semantic-kernel/model/MyModel.cs
@@ -0,0 +1,21 @@
+using System.Runtime.CompilerServices;
+
+// This is a simple mock model to provide the needed function signatures for the snippet code
+class MyModel
+{
+    public MyModelRequest? LastRequest { get; private set; }
+
+    public Task<MyModelResponse> GetCompletionsAsync(MyModelRequest request, CancellationToken cancellationToken = default)
+    {
+        LastRequest = request;
+        cancellationToken.ThrowIfCancellationRequested();
+        return Task.FromResult(new MyModelResponse { Completions = ["Hello World!"] });
+    }
+
+    public async IAsyncEnumerable<string> GetStreamingCompletionAsync(MyModelRequest request, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        LastRequest = request;
+        cancellationToken.ThrowIfCancellationRequested();
+        yield return await Task.FromResult("Hello World!");
+    }
+}
\ No newline at end of file
diff --git a/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs b/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs
new file mode 100644
index 0000000000000..85f261982b292
--- /dev/null
+++ b/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs
@@ -0,0 +1,27 @@
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.ChatCompletion;
+
+// This is a simple mock request to provide the needed function signatures for the snippet code
+class MyModelRequest
+{
+    public required string Request { get; init; }
+    public PromptExecutionSettings? Settings { get; init; }
+
+    public static MyModelRequest FromChatHistory(ChatHistory history, PromptExecutionSettings? settings)
+    {
+        return new MyModelRequest()
+        {
+            Request = history.Last().Content!,
+            Settings = settings
+        };
+    }
+
+    public static MyModelRequest FromPrompt(string prompt, PromptExecutionSettings? settings)
+    {
+        return new MyModelRequest()
+        {
+            Request = prompt,
+            Settings = settings
+        };
+    }
+}
diff --git a/docs/ai/how-to/snippets/semantic-kernel/model/MyModelResponse.cs b/docs/ai/how-to/snippets/semantic-kernel/model/MyModelResponse.cs
new file mode 100644
index 0000000000000..337c3e7ff6f43
--- /dev/null
+++ b/docs/ai/how-to/snippets/semantic-kernel/model/MyModelResponse.cs
@@ -0,0 +1,5 @@
+// This is a simple mock response to provide the needed function signatures for the snippet code
+class MyModelResponse
+{
+    public IReadOnlyList<string> Completions { get; init; } = [];
+}
\ No newline at end of file
diff --git a/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs b/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
index c4ac1bfd60f15..aac6694f60171 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
@@ -1,3 +1,4 @@
+using System.Collections.Immutable;
 using System.Runtime.CompilerServices;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.ChatCompletion;
@@ -5,41 +6,34 @@
 // <service>
 class MyChatCompletionService : IChatCompletionService
 {
+    private MyModel myModel = new();
+
     private IReadOnlyDictionary<string, object?>? _attributes;
     public IReadOnlyDictionary<string, object?> Attributes => _attributes ??= new Dictionary<string, object?>();
 
     public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
     {
-        string myModelOutput = await Task.Run(() =>
-        {
-            // Access your model, send the chat history, and receive the output
-            // ...
-            return "<placeholder for your model's output text>";
-        }, cancellationToken);
+        // Access your model, send the chat history, and receive the output
+        MyModelRequest request = MyModelRequest.FromChatHistory(chatHistory, executionSettings);
+        MyModelResponse response = await myModel.GetCompletionsAsync(request, cancellationToken);
 
-        // Return your models output as an assistant message
-        return 
-        [
-            new(AuthorRole.Assistant, myModelOutput)
-            // Include any additional outputs from your model in this list
-            // ...
-        ];
+        // Convert your model's response into a list of ChatMessageContent
+        return response.Completions
+            .Select<string, ChatMessageContent>(completion => new(AuthorRole.Assistant, completion))
+            .ToImmutableList();
     }
 
     public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
-        string myModelOutput = await Task.Run(() =>
-        {
-            // Access your model, send the chat history, and receive the output
-            // ...
-            return "<placeholder for your model's output text>";
-        }, cancellationToken);
-
+        // Access your model, send the chat history, and stream the output
+        MyModelRequest request = MyModelRequest.FromChatHistory(chatHistory, executionSettings);
+        IAsyncEnumerable<string> responseStream = myModel.GetStreamingCompletionAsync(request, cancellationToken);
 
-        foreach (string word in myModelOutput.Split(' ', StringSplitOptions.RemoveEmptyEntries))
+        // Enumerate over the model's response stream, yield a StreamingChatMessageContent for each iteration
+        await foreach (string chunk in responseStream)
         {
             cancellationToken.ThrowIfCancellationRequested();
-            yield return new StreamingChatMessageContent(AuthorRole.Assistant, $"{word} ");
+            yield return new StreamingChatMessageContent(AuthorRole.Assistant, chunk);
         }
     }
 }
diff --git a/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs b/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
index 879aea9417f30..e1cd718888add 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
@@ -1,45 +1,40 @@
+using System.Collections.Immutable;
 using System.Runtime.CompilerServices;
 using Microsoft.SemanticKernel;
-using Microsoft.SemanticKernel.Services;
 using Microsoft.SemanticKernel.TextGeneration;
 
 // <service>
 class MyTextGenerationService : ITextGenerationService
 {
+    private MyModel myModel = new();
+    
     private IReadOnlyDictionary<string, object?>? _attributes;
     public IReadOnlyDictionary<string, object?> Attributes => _attributes ??= new Dictionary<string, object?>();
 
     public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
-        string myModelOutput = await Task.Run(() =>
-        {
-            // Access your model, send the prompt, and receive the output
-            // ...
-            return "<placeholder for your model's output text>";
-        }, cancellationToken);
+        // Access your model, send the prompt, and stream the output
+        MyModelRequest request = MyModelRequest.FromPrompt(prompt, executionSettings);
+        IAsyncEnumerable<string> responseStream = myModel.GetStreamingCompletionAsync(request, cancellationToken);
 
-        foreach (string word in myModelOutput.Split(' ', StringSplitOptions.RemoveEmptyEntries))
+        // Enumerate over the model's response stream, yield a StreamingTextContent for each iteration
+        await foreach (string chunk in responseStream)
         {
             cancellationToken.ThrowIfCancellationRequested();
-            yield return new StreamingTextContent($"{word} ");
+            yield return new StreamingTextContent(chunk);
         }
     }
 
     public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
     {
-        string myModelOutput = await Task.Run(() =>
-        {
-            // Access your model, send the prompt, and receive the output
-            // ...
-            return "<placeholder for your model's output text>";
-        }, cancellationToken);
+        // Access your model, send the prompt, and receive the output
+        MyModelRequest request = MyModelRequest.FromPrompt(prompt, executionSettings);
+        MyModelResponse response = await myModel.GetCompletionsAsync(request, cancellationToken);
 
-        return
-        [
-            new(myModelOutput)
-            // Include any additional outputs from your model in this list
-            // ...
-        ];
+        // Convert your model's response into a list of TextContent
+        return response.Completions
+            .Select<string, TextContent>(completion => new(completion))
+            .ToImmutableList();
     }
 }
 // </service>
\ No newline at end of file

From 1ba702d38166a05b4caa26e937e2562fb5e81749 Mon Sep 17 00:00:00 2001
From: Sloan Haywood <haywood.sloan@yahoo.com>
Date: Mon, 15 Apr 2024 15:05:42 -0400
Subject: [PATCH 12/19] Apply suggestions from code review

Co-authored-by: Bill Wagner <wiwagn@microsoft.com>
Co-authored-by: Genevieve Warren <24882762+gewarren@users.noreply.github.com>
---
 .../snippets/semantic-kernel/LocalModelExamples.cs       | 9 +++++----
 docs/ai/how-to/work-with-local-models.md                 | 3 ++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
index a3a9d6469a150..d432e08047a63 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
@@ -30,7 +30,7 @@ static Kernel AddTextGenerationServiceExample()
         Kernel kernel = builder.Build();
         // </addTextService>
 
-        return builder.Build();
+        return kernel;
     }
 
     static Kernel AddChatCompletionServiceExample()
@@ -96,9 +96,10 @@ static async Task UseChatCompletionServiceExample()
         // Send a string representation of the chat history to your model directly through the Kernel
         // This uses a special syntax to denote the role for each message
         // For more information on this syntax see https://learn.microsoft.com/en-us/semantic-kernel/prompts/your-first-prompt?tabs=Csharp#6-using-message-roles-in-chat-completion-prompts
-        string prompt = @"
-        <message role=""system"">the initial system message for your chat history</message>
-        <message role=""user"">the user's initial message</message>";
+        string prompt = """
+        <message role="system">the initial system message for your chat history</message>
+        <message role="user">the user's initial message</message>
+        """;
 
         string? response = await kernel.InvokePromptAsync<string>(prompt);
         Console.WriteLine($"Output: {response}");
diff --git a/docs/ai/how-to/work-with-local-models.md b/docs/ai/how-to/work-with-local-models.md
index f5c1a43620a0e..5c2152bd5cf5b 100644
--- a/docs/ai/how-to/work-with-local-models.md
+++ b/docs/ai/how-to/work-with-local-models.md
@@ -1,5 +1,6 @@
 ---
 title: "Use Custom and Local AI Models with the Semantic Kernel SDK for .NET"
+titleSuffix: ""
 description: "Learn how to use custom or local models for text generation and chat completions in Semantic Kernel SDK for .NET."
 author: haywoodsloan
 ms.topic: how-to 
@@ -9,7 +10,7 @@ ms.date: 04/11/2024
 
 ---
 
-# Use Custom and Local AI Models with the Semantic Kernel SDK
+# Use custom and local AI models with the Semantic Kernel SDK
 
 This article demonstrates how to integrate custom and local models into the [Semantic Kernel SDK](/semantic-kernel/overview) and use them for text generation and chat completions.
 

From e0343246821d4746b0db4bd5bf926fceede63258 Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Mon, 15 Apr 2024 15:16:09 -0400
Subject: [PATCH 13/19] Revert "revised metadata and content to better match
 guidelines"

This reverts commit 26c3037d047002d06679518ffa109ce40497f3c1.
---
 docs/ai/conceptual/embeddings.md | 64 --------------------------------
 1 file changed, 64 deletions(-)
 delete mode 100644 docs/ai/conceptual/embeddings.md

diff --git a/docs/ai/conceptual/embeddings.md b/docs/ai/conceptual/embeddings.md
deleted file mode 100644
index 3a487c7fdf1b0..0000000000000
--- a/docs/ai/conceptual/embeddings.md
+++ /dev/null
@@ -1,64 +0,0 @@
----
-title: "Using Embeddings for Semantic Analysis - .NET"
-description: "Learn how embeddings enable AI models to perform semantic analysis in .NET."
-author: catbutler
-ms.topic: concept-article #Don't change.
-ms.date: 04/10/2024
-
-#customer intent: As a .NET developer, I want to understand how embeddings enable semantic analysis and when to use them in .NET .
-
----
-
-# Embeddings in .NET
-
-This article explains how embeddings work in .NET.
-
-Embeddings are numeric representations of non-numeric data. You can use embeddings to help an AI model understand the semantic value of inputs so it can perform various comparisons and transformations, such as summarizing text or creating images from text descriptions.
-
-## Generate, store and process embeddings
-
-This section describes how embeddings work in Semantic Kernel and Azure OpenAI.
-
-Embeddings help you to address LLMs' limited input capacity. LLMs limit the number of tokens per input. If you try to include more than the limit in one input, the model will ignore some or all of that input. Some LLMs also feature quota systems, another reason to watch your token counts.
-
-An embedding is a semantic value that's encoded numerically, so an LLM can compare it to other embeddings that occupy the same dimensions. Two embeddings with very similar vectors also have very similar semantic values, regardless of the format or amount of raw data they represent. For example, a course syllabus and a textbook for that course have very similar meanings.
-
-### An embedding model generates embeddings
-
-You use an embedding model to generate embeddings for your raw data. An embedding model is an AI model that can encode a piece of non-numeric data into a long array of numbers: a vector embedding. The model can also decode an embedding into a piece of non-numeric data that has the same meaning as the original, raw data.
-
-In Semantic Kernel&mdash;an SDK&mdash;you create a [plugin](/semantic-kernel/agents/plugins/?tabs=Csharp) to work with an embedding model. In Azure OpenAI&mdash;a AI service&mdash; you already have access to [embedding models](/azure/ai-services/openai/concepts/models#embeddings).
-
-### Store and process embeddings in a vector database
-
-[Vector databases](vector-dbs.md) are designed to store vectors, so they're a natural home for embeddings. Different vector databases offer different processing capabilities, so you should choose one based on your raw data and your goals.
-
-## Use cases for embeddings
-
-This section lists the main use cases for embeddings.
-
-### Increase the amount of text a model will process
-
-Use embeddings to increase the amount of text a model will process, such as during [prompt engineering](prompt-engineering-in-dot-net.md).
-
-For example, suppose you want to include 500 pages of text in a prompt. The number of tokens for that much raw text will exceed the input token limit, making it impossible to include in a prompt. You can use embeddings to summarize and break down large amounts of that text into pieces that are small enough to fit in one input, and then assess the similarity of each piece to the entire raw text. Then you can choose a piece that best preserves the semantic meaning of the raw text and use it in your prompt without hitting the token limit.
-
-### More relevant and coherent text generation
-
-Use embeddings to help models generate more relevant and coherent text. For example, embeddings can help the model to generate better stories, poems, jokes, slogans, captions, newsletters, and the like.
-
-### Perform text classification, summarization, or translation
-
-Use embeddings to help a model understand the meaning and context of text, and then classify, summarize, or translate that text. For example, you can use embeddings to help models classify texts as positive or negative, spam or not spam, news or opinion, etc.
-
-### Turn text into images or images into text
-
-Use embeddings to help a model create images from text or vice versa, by converting different types of data into a common representation (that is, vectors). For example, you can use embeddings to help a model generate or describe images such as logos, faces, animals, and landscapes.
-
-### Generate or document code
-
-Use embeddings to help a model create code from text or vice versa, by converting different code or text expressions into a common representation. For example, you can use embeddings to help a model generate or document code in C# or Python.
-
-## Related content
-
-- [Retrieval augmented generation](retrieval-augmented-generation.md)

From 468d871f5d4914876cadfc78635d1896273b5219 Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Mon, 15 Apr 2024 16:21:44 -0400
Subject: [PATCH 14/19] Add TOC entry

---
 docs/ai/toc.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/ai/toc.yml b/docs/ai/toc.yml
index facb304214288..ef182f2c65884 100644
--- a/docs/ai/toc.yml
+++ b/docs/ai/toc.yml
@@ -4,6 +4,8 @@ items:
 - name: Concepts
   items: []
 - name: How-to articles
-  items: []
+  items:
+  - name: Use custom and local AI models with the Semantic Kernel SDK
+    href: how-to/work-with-local-models.md
 - name: Tutorials
   items: []

From 4e09eb522a69aec92add5a5f6e244b6152cc96a5 Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Wed, 17 Apr 2024 09:32:57 -0400
Subject: [PATCH 15/19] add comment to clarify when responses are null

---
 docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
index d432e08047a63..668be629104ad 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
@@ -68,6 +68,7 @@ static async Task UseTextGenerationServiceExample()
         };
 
         // Send a prompt to your model directly through the Kernel
+        // The Kernel response will be null if the model can't reached
         string prompt = "Please list three services offered by Azure";
         string? response = await kernel.InvokePromptAsync<string>(prompt);
         Console.WriteLine($"Output: {response}");

From 891cbc9ea6c09b0d923ba7fa1fbfd1c21884600a Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Wed, 17 Apr 2024 09:33:19 -0400
Subject: [PATCH 16/19] fix typo

---
 docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
index 668be629104ad..370b9626c70a0 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
@@ -68,7 +68,7 @@ static async Task UseTextGenerationServiceExample()
         };
 
         // Send a prompt to your model directly through the Kernel
-        // The Kernel response will be null if the model can't reached
+        // The Kernel response will be null if the model can't be reached
         string prompt = "Please list three services offered by Azure";
         string? response = await kernel.InvokePromptAsync<string>(prompt);
         Console.WriteLine($"Output: {response}");

From 7e1213fc581fbb2a0220b9c5ce5abb5f28a177e6 Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Wed, 17 Apr 2024 11:29:18 -0400
Subject: [PATCH 17/19] updates based on PR feedback

---
 .../semantic-kernel/LocalModelExamples.cs     | 35 +++++++++---
 .../semantic-kernel/model/MyModelRequest.cs   |  6 +-
 .../services/MyChatCompletionService.cs       | 55 +++++++++++++++---
 .../services/MyTextGenerationService.cs       | 57 ++++++++++++++++---
 docs/ai/how-to/work-with-local-models.md      |  2 +
 5 files changed, 128 insertions(+), 27 deletions(-)

diff --git a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
index 370b9626c70a0..cfbb70791ba00 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
@@ -19,11 +19,21 @@ static Kernel AddTextGenerationServiceExample()
         IKernelBuilder builder = Kernel.CreateBuilder();
 
         // Add your text generation service as a singleton instance
-        var service = new MyTextGenerationService();
-        builder.Services.AddKeyedSingleton<ITextGenerationService>("myTextService1", service);
+        builder.Services.AddKeyedSingleton<ITextGenerationService>("myTextService1", new MyTextGenerationService
+        {
+            // Specify any properties specific to your service, such as the url or API key
+            ModelUrl = "https://localhost:38748",
+            ModelApiKey = "myApiKey"
+        });
 
         // Alternatively, add your text generation service as a factory method
-        builder.Services.AddKeyedSingleton<ITextGenerationService>("myTextService2", (_, _) => new MyTextGenerationService());
+        builder.Services.AddKeyedSingleton<ITextGenerationService>("myTextService2", (_, _) => new MyTextGenerationService
+        {
+
+            // Specify any properties specific to your service, such as the url or API key
+            ModelUrl = "https://localhost:38748",
+            ModelApiKey = "myApiKey"
+        });
 
         // Add any other Kernel services or configurations
         // ...
@@ -39,11 +49,20 @@ static Kernel AddChatCompletionServiceExample()
         IKernelBuilder builder = Kernel.CreateBuilder();
 
         // Add your chat completion service as a singleton instance
-        var service = new MyChatCompletionService();
-        builder.Services.AddKeyedSingleton<IChatCompletionService>("myChatService1", service);
+        builder.Services.AddKeyedSingleton<IChatCompletionService>("myChatService1", new MyChatCompletionService
+        {
+            // Specify any properties specific to your service, such as the url or API key
+            ModelUrl = "https://localhost:38748",
+            ModelApiKey = "myApiKey"
+        });
 
         // Alternatively, add your chat completion service as a factory method
-        builder.Services.AddKeyedSingleton<IChatCompletionService>("myChatService2", (_, _) => new MyChatCompletionService());
+        builder.Services.AddKeyedSingleton<IChatCompletionService>("myChatService2", (_, _) => new MyChatCompletionService
+        {
+            // Specify any properties specific to your service, such as the url or API key
+            ModelUrl = "https://localhost:38748",
+            ModelApiKey = "myApiKey"
+        });
 
         // Add any other Kernel services or configurations
         // ...
@@ -56,7 +75,7 @@ static Kernel AddChatCompletionServiceExample()
     static async Task UseTextGenerationServiceExample()
     {
         IKernelBuilder builder = Kernel.CreateBuilder();
-        builder.Services.AddKeyedSingleton<ITextGenerationService>("myTextService", new MyTextGenerationService());
+        builder.Services.AddKeyedSingleton<ITextGenerationService>("myTextService", new MyTextGenerationService { ModelApiKey = "myApiKey" });
         Kernel kernel = builder.Build();
 
         // <useTextService>
@@ -83,7 +102,7 @@ static async Task UseTextGenerationServiceExample()
     static async Task UseChatCompletionServiceExample()
     {
         IKernelBuilder builder = Kernel.CreateBuilder();
-        builder.Services.AddKeyedSingleton<IChatCompletionService>("myChatService", new MyChatCompletionService());
+        builder.Services.AddKeyedSingleton<IChatCompletionService>("myChatService", new MyChatCompletionService { ModelApiKey = "myApiKey" });
         Kernel kernel = builder.Build();
 
         // <useChatService>
diff --git a/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs b/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs
index 85f261982b292..69cdbe24ad5e6 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs
@@ -4,8 +4,10 @@
 // This is a simple mock request to provide the needed function signatures for the snippet code
 class MyModelRequest
 {
-    public required string Request { get; init; }
-    public PromptExecutionSettings? Settings { get; init; }
+    public required string Request { get; set; }
+    public PromptExecutionSettings? Settings { get; set; }
+    public bool Stream {get; set; } = true;
+
 
     public static MyModelRequest FromChatHistory(ChatHistory history, PromptExecutionSettings? settings)
     {
diff --git a/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs b/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
index aac6694f60171..83253c7686478 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
@@ -1,21 +1,37 @@
 using System.Collections.Immutable;
+using System.Net.Http.Json;
 using System.Runtime.CompilerServices;
+using System.Text.Json;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.ChatCompletion;
 
 // <service>
 class MyChatCompletionService : IChatCompletionService
 {
-    private MyModel myModel = new();
-
     private IReadOnlyDictionary<string, object?>? _attributes;
     public IReadOnlyDictionary<string, object?> Attributes => _attributes ??= new Dictionary<string, object?>();
+    
+    public string ModelUrl { get; init; } = "<default url to your model's Chat API>";
+    public required string ModelApiKey { get; init; }
 
     public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
     {
-        // Access your model, send the chat history, and receive the output
+        // Build your model's request object
         MyModelRequest request = MyModelRequest.FromChatHistory(chatHistory, executionSettings);
-        MyModelResponse response = await myModel.GetCompletionsAsync(request, cancellationToken);
+
+        // Send the completion request via HTTP
+        using var httpClient = new HttpClient();
+
+        // Send a POST to your model with the serialized request in the body
+        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(ModelUrl, request, cancellationToken);
+
+        // Verify the request was completed successfully
+        httpResponse.EnsureSuccessStatusCode();
+
+        // Deserialize the response body to your model's response object
+        // Handle when the deserialization fails and returns null
+        MyModelResponse response = await httpResponse.Content.ReadFromJsonAsync<MyModelResponse>(cancellationToken)
+            ?? throw new Exception("Failed to deserialize response from model");
 
         // Convert your model's response into a list of ChatMessageContent
         return response.Completions
@@ -25,14 +41,37 @@ public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync
 
     public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
-        // Access your model, send the chat history, and stream the output
+        // Build your model's request object, specify that streaming is requested
         MyModelRequest request = MyModelRequest.FromChatHistory(chatHistory, executionSettings);
-        IAsyncEnumerable<string> responseStream = myModel.GetStreamingCompletionAsync(request, cancellationToken);
+        request.Stream = true;
+
+        // Send the completion request via HTTP
+        using var httpClient = new HttpClient();
 
-        // Enumerate over the model's response stream, yield a StreamingChatMessageContent for each iteration
-        await foreach (string chunk in responseStream)
+        // Send a POST to your model with the serialized request in the body
+        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(ModelUrl, request, cancellationToken);
+
+        // Verify the request was completed successfully
+        httpResponse.EnsureSuccessStatusCode();
+
+        // Read your models response as a stream
+        using StreamReader reader = new(await httpResponse.Content.ReadAsStreamAsync(cancellationToken));
+
+        // Iteratively read a chunk of the response until the end of the stream
+        // It is more efficient to use a buffer that is the same size as the internal buffer of the stream
+        // If the size of the internal buffer was unspecified when the stream was constructed, its default size is 4 kilobytes (2048 UTF-16 characters)
+        char[] buffer = new char[2048];
+        while(!reader.EndOfStream)
         {
+            // Check the cancellation token with each iteration
             cancellationToken.ThrowIfCancellationRequested();
+
+            // Fill the buffer with the next set of characters, track how many characters were read
+            int readCount = reader.Read(buffer, 0, buffer.Length);
+
+            // Convert the character buffer to a string, only include as many characters as were just read
+            string chunk = new(buffer, 0, readCount);
+
             yield return new StreamingChatMessageContent(AuthorRole.Assistant, chunk);
         }
     }
diff --git a/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs b/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
index e1cd718888add..b1381c76d343f 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
@@ -1,4 +1,5 @@
 using System.Collections.Immutable;
+using System.Net.Http.Json;
 using System.Runtime.CompilerServices;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.TextGeneration;
@@ -6,32 +7,70 @@
 // <service>
 class MyTextGenerationService : ITextGenerationService
 {
-    private MyModel myModel = new();
-    
     private IReadOnlyDictionary<string, object?>? _attributes;
     public IReadOnlyDictionary<string, object?> Attributes => _attributes ??= new Dictionary<string, object?>();
 
+    public string ModelUrl { get; init; } = "<default url to your model's Chat API>";
+    public required string ModelApiKey { get; init; }
+
     public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
     {
-        // Access your model, send the prompt, and stream the output
+        // Build your model's request object, specify that streaming is requested
         MyModelRequest request = MyModelRequest.FromPrompt(prompt, executionSettings);
-        IAsyncEnumerable<string> responseStream = myModel.GetStreamingCompletionAsync(request, cancellationToken);
+        request.Stream = true;
+
+        // Send the completion request via HTTP
+        using var httpClient = new HttpClient();
+
+        // Send a POST to your model with the serialized request in the body
+        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(ModelUrl, request, cancellationToken);
+
+        // Verify the request was completed successfully
+        httpResponse.EnsureSuccessStatusCode();
 
-        // Enumerate over the model's response stream, yield a StreamingTextContent for each iteration
-        await foreach (string chunk in responseStream)
+        // Read your models response as a stream
+        using StreamReader reader = new(await httpResponse.Content.ReadAsStreamAsync(cancellationToken));
+
+        // Iteratively read a chunk of the response until the end of the stream
+        // It is more efficient to use a buffer that is the same size as the internal buffer of the stream
+        // If the size of the internal buffer was unspecified when the stream was constructed, its default size is 4 kilobytes (2048 UTF-16 characters)
+        char[] buffer = new char[2048];
+        while (!reader.EndOfStream)
         {
+            // Check the cancellation token with each iteration
             cancellationToken.ThrowIfCancellationRequested();
+
+            // Fill the buffer with the next set of characters, track how many characters were read
+            int readCount = reader.Read(buffer, 0, buffer.Length);
+
+            // Convert the character buffer to a string, only include as many characters as were just read
+            string chunk = new(buffer, 0, readCount);
+
             yield return new StreamingTextContent(chunk);
         }
     }
 
     public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
     {
-        // Access your model, send the prompt, and receive the output
+        // Build your model's request object
         MyModelRequest request = MyModelRequest.FromPrompt(prompt, executionSettings);
-        MyModelResponse response = await myModel.GetCompletionsAsync(request, cancellationToken);
 
-        // Convert your model's response into a list of TextContent
+        // Send the completion request via HTTP
+        using var httpClient = new HttpClient();
+
+
+        // Send a POST to your model with the serialized request in the body
+        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(ModelUrl, request, cancellationToken);
+
+        // Verify the request was completed successfully
+        httpResponse.EnsureSuccessStatusCode();
+
+        // Deserialize the response body to your model's response object
+        // Handle when the deserialization fails and returns null
+        MyModelResponse response = await httpResponse.Content.ReadFromJsonAsync<MyModelResponse>(cancellationToken)
+            ?? throw new Exception("Failed to deserialize response from model");
+
+        // Convert your model's response into a list of ChatMessageContent
         return response.Completions
             .Select<string, TextContent>(completion => new(completion))
             .ToImmutableList();
diff --git a/docs/ai/how-to/work-with-local-models.md b/docs/ai/how-to/work-with-local-models.md
index 5c2152bd5cf5b..cd7d5ec7bc4ae 100644
--- a/docs/ai/how-to/work-with-local-models.md
+++ b/docs/ai/how-to/work-with-local-models.md
@@ -16,6 +16,8 @@ This article demonstrates how to integrate custom and local models into the [Sem
 
 You can adapt the steps to use them with any model that you can access, regardless of where or how you access it. For example, you can integrate the [codellama](https://ollama.com/library/codellama) model with the Semantic Kernel SDK to enable code generation and discussion.
 
+Custom and local models often provide access via REST APIs, for example see [Ollama OpenAI compatibility](https://ollama.com/blog/openai-compatibility). Before you integrate your model it will need to be hosted and accessible to your .NET application via HTTPS.
+
 ## Prerequisites
 
 * An Azure account with an active subscription. [Create an account for free](https://azure.microsoft.com/free/?WT.mc_id=A261C142F).

From 34cc62c1cb311045277036a8e6a1b186b812490a Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Thu, 18 Apr 2024 11:02:19 -0400
Subject: [PATCH 18/19] format code and remove old file

---
 .../semantic-kernel/LocalModelExamples.cs     | 91 ++++++++++++-------
 .../snippets/semantic-kernel/Program.cs       |  2 +-
 .../snippets/semantic-kernel/model/MyModel.cs | 21 -----
 .../semantic-kernel/model/MyModelRequest.cs   | 20 ++--
 .../semantic-kernel/model/MyModelResponse.cs  |  2 +-
 .../services/MyChatCompletionService.cs       | 47 +++++++---
 .../services/MyTextGenerationService.cs       | 42 ++++++---
 7 files changed, 133 insertions(+), 92 deletions(-)
 delete mode 100644 docs/ai/how-to/snippets/semantic-kernel/model/MyModel.cs

diff --git a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
index cfbb70791ba00..5e136b0f22180 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
@@ -19,21 +19,27 @@ static Kernel AddTextGenerationServiceExample()
         IKernelBuilder builder = Kernel.CreateBuilder();
 
         // Add your text generation service as a singleton instance
-        builder.Services.AddKeyedSingleton<ITextGenerationService>("myTextService1", new MyTextGenerationService
-        {
-            // Specify any properties specific to your service, such as the url or API key
-            ModelUrl = "https://localhost:38748",
-            ModelApiKey = "myApiKey"
-        });
+        builder.Services.AddKeyedSingleton<ITextGenerationService>(
+            "myTextService1",
+            new MyTextGenerationService
+            {
+                // Specify any properties specific to your service, such as the url or API key
+                ModelUrl = "https://localhost:38748",
+                ModelApiKey = "myApiKey"
+            }
+        );
 
         // Alternatively, add your text generation service as a factory method
-        builder.Services.AddKeyedSingleton<ITextGenerationService>("myTextService2", (_, _) => new MyTextGenerationService
-        {
-
-            // Specify any properties specific to your service, such as the url or API key
-            ModelUrl = "https://localhost:38748",
-            ModelApiKey = "myApiKey"
-        });
+        builder.Services.AddKeyedSingleton<ITextGenerationService>(
+            "myTextService2",
+            (_, _) =>
+                new MyTextGenerationService
+                {
+                    // Specify any properties specific to your service, such as the url or API key
+                    ModelUrl = "https://localhost:38748",
+                    ModelApiKey = "myApiKey"
+                }
+        );
 
         // Add any other Kernel services or configurations
         // ...
@@ -49,20 +55,27 @@ static Kernel AddChatCompletionServiceExample()
         IKernelBuilder builder = Kernel.CreateBuilder();
 
         // Add your chat completion service as a singleton instance
-        builder.Services.AddKeyedSingleton<IChatCompletionService>("myChatService1", new MyChatCompletionService
-        {
-            // Specify any properties specific to your service, such as the url or API key
-            ModelUrl = "https://localhost:38748",
-            ModelApiKey = "myApiKey"
-        });
+        builder.Services.AddKeyedSingleton<IChatCompletionService>(
+            "myChatService1",
+            new MyChatCompletionService
+            {
+                // Specify any properties specific to your service, such as the url or API key
+                ModelUrl = "https://localhost:38748",
+                ModelApiKey = "myApiKey"
+            }
+        );
 
         // Alternatively, add your chat completion service as a factory method
-        builder.Services.AddKeyedSingleton<IChatCompletionService>("myChatService2", (_, _) => new MyChatCompletionService
-        {
-            // Specify any properties specific to your service, such as the url or API key
-            ModelUrl = "https://localhost:38748",
-            ModelApiKey = "myApiKey"
-        });
+        builder.Services.AddKeyedSingleton<IChatCompletionService>(
+            "myChatService2",
+            (_, _) =>
+                new MyChatCompletionService
+                {
+                    // Specify any properties specific to your service, such as the url or API key
+                    ModelUrl = "https://localhost:38748",
+                    ModelApiKey = "myApiKey"
+                }
+        );
 
         // Add any other Kernel services or configurations
         // ...
@@ -75,7 +88,10 @@ static Kernel AddChatCompletionServiceExample()
     static async Task UseTextGenerationServiceExample()
     {
         IKernelBuilder builder = Kernel.CreateBuilder();
-        builder.Services.AddKeyedSingleton<ITextGenerationService>("myTextService", new MyTextGenerationService { ModelApiKey = "myApiKey" });
+        builder.Services.AddKeyedSingleton<ITextGenerationService>(
+            "myTextService",
+            new MyTextGenerationService { ModelApiKey = "myApiKey" }
+        );
         Kernel kernel = builder.Build();
 
         // <useTextService>
@@ -94,7 +110,10 @@ static async Task UseTextGenerationServiceExample()
 
         // Alteratively, send a prompt to your model through the text generation service
         ITextGenerationService textService = kernel.GetRequiredService<ITextGenerationService>();
-        TextContent responseContents = await textService.GetTextContentAsync(prompt, executionSettings);
+        TextContent responseContents = await textService.GetTextContentAsync(
+            prompt,
+            executionSettings
+        );
         Console.WriteLine($"Output: {responseContents.Text}");
         // </useTextService>
     }
@@ -102,7 +121,10 @@ static async Task UseTextGenerationServiceExample()
     static async Task UseChatCompletionServiceExample()
     {
         IKernelBuilder builder = Kernel.CreateBuilder();
-        builder.Services.AddKeyedSingleton<IChatCompletionService>("myChatService", new MyChatCompletionService { ModelApiKey = "myApiKey" });
+        builder.Services.AddKeyedSingleton<IChatCompletionService>(
+            "myChatService",
+            new MyChatCompletionService { ModelApiKey = "myApiKey" }
+        );
         Kernel kernel = builder.Build();
 
         // <useChatService>
@@ -117,9 +139,9 @@ static async Task UseChatCompletionServiceExample()
         // This uses a special syntax to denote the role for each message
         // For more information on this syntax see https://learn.microsoft.com/en-us/semantic-kernel/prompts/your-first-prompt?tabs=Csharp#6-using-message-roles-in-chat-completion-prompts
         string prompt = """
-        <message role="system">the initial system message for your chat history</message>
-        <message role="user">the user's initial message</message>
-        """;
+            <message role="system">the initial system message for your chat history</message>
+            <message role="user">the user's initial message</message>
+            """;
 
         string? response = await kernel.InvokePromptAsync<string>(prompt);
         Console.WriteLine($"Output: {response}");
@@ -137,7 +159,10 @@ static async Task UseChatCompletionServiceExample()
 
         // Get the models response and add it to the chat history
         IChatCompletionService service = kernel.GetRequiredService<IChatCompletionService>();
-        ChatMessageContent responseMessage = await service.GetChatMessageContentAsync(chatHistory, executionSettings);
+        ChatMessageContent responseMessage = await service.GetChatMessageContentAsync(
+            chatHistory,
+            executionSettings
+        );
         Console.WriteLine($"Assistant: {responseMessage.Content}");
         chatHistory.Add(responseMessage);
 
@@ -145,4 +170,4 @@ static async Task UseChatCompletionServiceExample()
         // ...
         // </useChatService>
     }
-}
\ No newline at end of file
+}
diff --git a/docs/ai/how-to/snippets/semantic-kernel/Program.cs b/docs/ai/how-to/snippets/semantic-kernel/Program.cs
index 92a1944b44964..84d17775c364c 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/Program.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/Program.cs
@@ -1 +1 @@
-﻿await LocalModelExamples.Examples();
\ No newline at end of file
+﻿await LocalModelExamples.Examples();
diff --git a/docs/ai/how-to/snippets/semantic-kernel/model/MyModel.cs b/docs/ai/how-to/snippets/semantic-kernel/model/MyModel.cs
deleted file mode 100644
index f17a74f6ec401..0000000000000
--- a/docs/ai/how-to/snippets/semantic-kernel/model/MyModel.cs
+++ /dev/null
@@ -1,21 +0,0 @@
-using System.Runtime.CompilerServices;
-
-// This is a simple mock model to provide the needed function signatures for the snippet code
-class MyModel
-{
-    public MyModelRequest? LastRequest { get; private set; }
-
-    public Task<MyModelResponse> GetCompletionsAsync(MyModelRequest request, CancellationToken cancellationToken = default)
-    {
-        LastRequest = request;
-        cancellationToken.ThrowIfCancellationRequested();
-        return Task.FromResult(new MyModelResponse { Completions = ["Hello World!"] });
-    }
-
-    public async IAsyncEnumerable<string> GetStreamingCompletionAsync(MyModelRequest request, [EnumeratorCancellation] CancellationToken cancellationToken = default)
-    {
-        LastRequest = request;
-        cancellationToken.ThrowIfCancellationRequested();
-        yield return await Task.FromResult("Hello World!");
-    }
-}
\ No newline at end of file
diff --git a/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs b/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs
index 69cdbe24ad5e6..13b3fe2937125 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs
@@ -6,24 +6,18 @@ class MyModelRequest
 {
     public required string Request { get; set; }
     public PromptExecutionSettings? Settings { get; set; }
-    public bool Stream {get; set; } = true;
+    public bool Stream { get; set; } = true;
 
-
-    public static MyModelRequest FromChatHistory(ChatHistory history, PromptExecutionSettings? settings)
+    public static MyModelRequest FromChatHistory(
+        ChatHistory history,
+        PromptExecutionSettings? settings
+    )
     {
-        return new MyModelRequest()
-        {
-            Request = history.Last().Content!,
-            Settings = settings
-        };
+        return new MyModelRequest() { Request = history.Last().Content!, Settings = settings };
     }
 
     public static MyModelRequest FromPrompt(string prompt, PromptExecutionSettings? settings)
     {
-        return new MyModelRequest()
-        {
-            Request = prompt,
-            Settings = settings
-        };
+        return new MyModelRequest() { Request = prompt, Settings = settings };
     }
 }
diff --git a/docs/ai/how-to/snippets/semantic-kernel/model/MyModelResponse.cs b/docs/ai/how-to/snippets/semantic-kernel/model/MyModelResponse.cs
index 337c3e7ff6f43..fb0b722a63c16 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/model/MyModelResponse.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/model/MyModelResponse.cs
@@ -2,4 +2,4 @@
 class MyModelResponse
 {
     public IReadOnlyList<string> Completions { get; init; } = [];
-}
\ No newline at end of file
+}
diff --git a/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs b/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
index 83253c7686478..5d547e26fffb0 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
@@ -9,12 +9,18 @@
 class MyChatCompletionService : IChatCompletionService
 {
     private IReadOnlyDictionary<string, object?>? _attributes;
-    public IReadOnlyDictionary<string, object?> Attributes => _attributes ??= new Dictionary<string, object?>();
-    
+    public IReadOnlyDictionary<string, object?> Attributes =>
+        _attributes ??= new Dictionary<string, object?>();
+
     public string ModelUrl { get; init; } = "<default url to your model's Chat API>";
     public required string ModelApiKey { get; init; }
 
-    public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
+    public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(
+        ChatHistory chatHistory,
+        PromptExecutionSettings? executionSettings = null,
+        Kernel? kernel = null,
+        CancellationToken cancellationToken = default
+    )
     {
         // Build your model's request object
         MyModelRequest request = MyModelRequest.FromChatHistory(chatHistory, executionSettings);
@@ -23,23 +29,35 @@ public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync
         using var httpClient = new HttpClient();
 
         // Send a POST to your model with the serialized request in the body
-        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(ModelUrl, request, cancellationToken);
+        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(
+            ModelUrl,
+            request,
+            cancellationToken
+        );
 
         // Verify the request was completed successfully
         httpResponse.EnsureSuccessStatusCode();
 
         // Deserialize the response body to your model's response object
         // Handle when the deserialization fails and returns null
-        MyModelResponse response = await httpResponse.Content.ReadFromJsonAsync<MyModelResponse>(cancellationToken)
+        MyModelResponse response =
+            await httpResponse.Content.ReadFromJsonAsync<MyModelResponse>(cancellationToken)
             ?? throw new Exception("Failed to deserialize response from model");
 
         // Convert your model's response into a list of ChatMessageContent
-        return response.Completions
-            .Select<string, ChatMessageContent>(completion => new(AuthorRole.Assistant, completion))
+        return response
+            .Completions.Select<string, ChatMessageContent>(completion =>
+                new(AuthorRole.Assistant, completion)
+            )
             .ToImmutableList();
     }
 
-    public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(
+        ChatHistory chatHistory,
+        PromptExecutionSettings? executionSettings = null,
+        Kernel? kernel = null,
+        [EnumeratorCancellation] CancellationToken cancellationToken = default
+    )
     {
         // Build your model's request object, specify that streaming is requested
         MyModelRequest request = MyModelRequest.FromChatHistory(chatHistory, executionSettings);
@@ -49,19 +67,24 @@ public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessa
         using var httpClient = new HttpClient();
 
         // Send a POST to your model with the serialized request in the body
-        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(ModelUrl, request, cancellationToken);
+        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(
+            ModelUrl,
+            request,
+            cancellationToken
+        );
 
         // Verify the request was completed successfully
         httpResponse.EnsureSuccessStatusCode();
 
         // Read your models response as a stream
-        using StreamReader reader = new(await httpResponse.Content.ReadAsStreamAsync(cancellationToken));
+        using StreamReader reader =
+            new(await httpResponse.Content.ReadAsStreamAsync(cancellationToken));
 
         // Iteratively read a chunk of the response until the end of the stream
         // It is more efficient to use a buffer that is the same size as the internal buffer of the stream
         // If the size of the internal buffer was unspecified when the stream was constructed, its default size is 4 kilobytes (2048 UTF-16 characters)
         char[] buffer = new char[2048];
-        while(!reader.EndOfStream)
+        while (!reader.EndOfStream)
         {
             // Check the cancellation token with each iteration
             cancellationToken.ThrowIfCancellationRequested();
@@ -76,4 +99,4 @@ public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessa
         }
     }
 }
-// </service>
\ No newline at end of file
+// </service>
diff --git a/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs b/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
index b1381c76d343f..c594f9f2aee2f 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/services/MyTextGenerationService.cs
@@ -8,12 +8,18 @@
 class MyTextGenerationService : ITextGenerationService
 {
     private IReadOnlyDictionary<string, object?>? _attributes;
-    public IReadOnlyDictionary<string, object?> Attributes => _attributes ??= new Dictionary<string, object?>();
+    public IReadOnlyDictionary<string, object?> Attributes =>
+        _attributes ??= new Dictionary<string, object?>();
 
     public string ModelUrl { get; init; } = "<default url to your model's Chat API>";
     public required string ModelApiKey { get; init; }
 
-    public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsync(
+        string prompt,
+        PromptExecutionSettings? executionSettings = null,
+        Kernel? kernel = null,
+        [EnumeratorCancellation] CancellationToken cancellationToken = default
+    )
     {
         // Build your model's request object, specify that streaming is requested
         MyModelRequest request = MyModelRequest.FromPrompt(prompt, executionSettings);
@@ -23,13 +29,18 @@ public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsyn
         using var httpClient = new HttpClient();
 
         // Send a POST to your model with the serialized request in the body
-        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(ModelUrl, request, cancellationToken);
+        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(
+            ModelUrl,
+            request,
+            cancellationToken
+        );
 
         // Verify the request was completed successfully
         httpResponse.EnsureSuccessStatusCode();
 
         // Read your models response as a stream
-        using StreamReader reader = new(await httpResponse.Content.ReadAsStreamAsync(cancellationToken));
+        using StreamReader reader =
+            new(await httpResponse.Content.ReadAsStreamAsync(cancellationToken));
 
         // Iteratively read a chunk of the response until the end of the stream
         // It is more efficient to use a buffer that is the same size as the internal buffer of the stream
@@ -50,7 +61,12 @@ public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsyn
         }
     }
 
-    public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
+    public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(
+        string prompt,
+        PromptExecutionSettings? executionSettings = null,
+        Kernel? kernel = null,
+        CancellationToken cancellationToken = default
+    )
     {
         // Build your model's request object
         MyModelRequest request = MyModelRequest.FromPrompt(prompt, executionSettings);
@@ -58,22 +74,26 @@ public async Task<IReadOnlyList<TextContent>> GetTextContentsAsync(string prompt
         // Send the completion request via HTTP
         using var httpClient = new HttpClient();
 
-
         // Send a POST to your model with the serialized request in the body
-        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(ModelUrl, request, cancellationToken);
+        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(
+            ModelUrl,
+            request,
+            cancellationToken
+        );
 
         // Verify the request was completed successfully
         httpResponse.EnsureSuccessStatusCode();
 
         // Deserialize the response body to your model's response object
         // Handle when the deserialization fails and returns null
-        MyModelResponse response = await httpResponse.Content.ReadFromJsonAsync<MyModelResponse>(cancellationToken)
+        MyModelResponse response =
+            await httpResponse.Content.ReadFromJsonAsync<MyModelResponse>(cancellationToken)
             ?? throw new Exception("Failed to deserialize response from model");
 
         // Convert your model's response into a list of ChatMessageContent
-        return response.Completions
-            .Select<string, TextContent>(completion => new(completion))
+        return response
+            .Completions.Select<string, TextContent>(completion => new(completion))
             .ToImmutableList();
     }
 }
-// </service>
\ No newline at end of file
+// </service>

From 64e360c035f85a1f9bc2f38f42d24676e57f181b Mon Sep 17 00:00:00 2001
From: Sloan Haywood <shaywood@aquent.com>
Date: Thu, 18 Apr 2024 11:17:24 -0400
Subject: [PATCH 19/19] move link to new line to prevent scroll

---
 docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
index 5e136b0f22180..2c70e3343aa07 100644
--- a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
+++ b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
@@ -137,7 +137,8 @@ static async Task UseChatCompletionServiceExample()
 
         // Send a string representation of the chat history to your model directly through the Kernel
         // This uses a special syntax to denote the role for each message
-        // For more information on this syntax see https://learn.microsoft.com/en-us/semantic-kernel/prompts/your-first-prompt?tabs=Csharp#6-using-message-roles-in-chat-completion-prompts
+        // For more information on this syntax see:
+        // https://learn.microsoft.com/en-us/semantic-kernel/prompts/your-first-prompt?tabs=Csharp
         string prompt = """
             <message role="system">the initial system message for your chat history</message>
             <message role="user">the user's initial message</message>