Merge pull request dotnet#40470 from haywoodsloan/shaywood/howto-loca…

…l-models How To - Work with local models
gewarren · Apr 24, 2024 · 1c7a8f3 · 1c7a8f3
2 parents 826e0b7 + 64e360c
commit 1c7a8f3
Show file tree

Hide file tree

Showing 9 changed files with 485 additions and 1 deletion.
diff --git a/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs b/docs/ai/how-to/snippets/semantic-kernel/LocalModelExamples.cs
@@ -0,0 +1,174 @@
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.ChatCompletion;
+using Microsoft.SemanticKernel.TextGeneration;
+
+class LocalModelExamples
+{
+    public static async Task Examples()
+    {
+        AddTextGenerationServiceExample();
+        AddChatCompletionServiceExample();
+        await UseTextGenerationServiceExample();
+        await UseChatCompletionServiceExample();
+    }
+
+    static Kernel AddTextGenerationServiceExample()
+    {
+        // <addTextService>
+        IKernelBuilder builder = Kernel.CreateBuilder();
+
+        // Add your text generation service as a singleton instance
+        builder.Services.AddKeyedSingleton<ITextGenerationService>(
+            "myTextService1",
+            new MyTextGenerationService
+            {
+                // Specify any properties specific to your service, such as the url or API key
+                ModelUrl = "https://localhost:38748",
+                ModelApiKey = "myApiKey"
+            }
+        );
+
+        // Alternatively, add your text generation service as a factory method
+        builder.Services.AddKeyedSingleton<ITextGenerationService>(
+            "myTextService2",
+            (_, _) =>
+                new MyTextGenerationService
+                {
+                    // Specify any properties specific to your service, such as the url or API key
+                    ModelUrl = "https://localhost:38748",
+                    ModelApiKey = "myApiKey"
+                }
+        );
+
+        // Add any other Kernel services or configurations
+        // ...
+        Kernel kernel = builder.Build();
+        // </addTextService>
+
+        return kernel;
+    }
+
+    static Kernel AddChatCompletionServiceExample()
+    {
+        // <addChatService>
+        IKernelBuilder builder = Kernel.CreateBuilder();
+
+        // Add your chat completion service as a singleton instance
+        builder.Services.AddKeyedSingleton<IChatCompletionService>(
+            "myChatService1",
+            new MyChatCompletionService
+            {
+                // Specify any properties specific to your service, such as the url or API key
+                ModelUrl = "https://localhost:38748",
+                ModelApiKey = "myApiKey"
+            }
+        );
+
+        // Alternatively, add your chat completion service as a factory method
+        builder.Services.AddKeyedSingleton<IChatCompletionService>(
+            "myChatService2",
+            (_, _) =>
+                new MyChatCompletionService
+                {
+                    // Specify any properties specific to your service, such as the url or API key
+                    ModelUrl = "https://localhost:38748",
+                    ModelApiKey = "myApiKey"
+                }
+        );
+
+        // Add any other Kernel services or configurations
+        // ...
+        Kernel kernel = builder.Build();
+        // </addChatService>
+
+        return kernel;
+    }
+
+    static async Task UseTextGenerationServiceExample()
+    {
+        IKernelBuilder builder = Kernel.CreateBuilder();
+        builder.Services.AddKeyedSingleton<ITextGenerationService>(
+            "myTextService",
+            new MyTextGenerationService { ModelApiKey = "myApiKey" }
+        );
+        Kernel kernel = builder.Build();
+
+        // <useTextService>
+        var executionSettings = new PromptExecutionSettings
+        {
+            // Add execution settings, such as the ModelID and ExtensionData
+            ModelId = "MyModelId",
+            ExtensionData = new Dictionary<string, object> { { "MaxTokens", 500 } }
+        };
+
+        // Send a prompt to your model directly through the Kernel
+        // The Kernel response will be null if the model can't be reached
+        string prompt = "Please list three services offered by Azure";
+        string? response = await kernel.InvokePromptAsync<string>(prompt);
+        Console.WriteLine($"Output: {response}");
+
+        // Alteratively, send a prompt to your model through the text generation service
+        ITextGenerationService textService = kernel.GetRequiredService<ITextGenerationService>();
+        TextContent responseContents = await textService.GetTextContentAsync(
+            prompt,
+            executionSettings
+        );
+        Console.WriteLine($"Output: {responseContents.Text}");
+        // </useTextService>
+    }
+
+    static async Task UseChatCompletionServiceExample()
+    {
+        IKernelBuilder builder = Kernel.CreateBuilder();
+        builder.Services.AddKeyedSingleton<IChatCompletionService>(
+            "myChatService",
+            new MyChatCompletionService { ModelApiKey = "myApiKey" }
+        );
+        Kernel kernel = builder.Build();
+
+        // <useChatService>
+        var executionSettings = new PromptExecutionSettings
+        {
+            // Add execution settings, such as the ModelID and ExtensionData
+            ModelId = "MyModelId",
+            ExtensionData = new Dictionary<string, object> { { "MaxTokens", 500 } }
+        };
+
+        // Send a string representation of the chat history to your model directly through the Kernel
+        // This uses a special syntax to denote the role for each message
+        // For more information on this syntax see:
+        // https://learn.microsoft.com/en-us/semantic-kernel/prompts/your-first-prompt?tabs=Csharp
+        string prompt = """
+            <message role="system">the initial system message for your chat history</message>
+            <message role="user">the user's initial message</message>
+            """;
+
+        string? response = await kernel.InvokePromptAsync<string>(prompt);
+        Console.WriteLine($"Output: {response}");
+
+        // Alteratively, send a prompt to your model through the chat completion service
+        // First, initialize a chat history with your initial system message
+        string systemMessage = "<the initial system message for your chat history>";
+        Console.WriteLine($"System Prompt: {systemMessage}");
+        var chatHistory = new ChatHistory(systemMessage);
+
+        // Add the user's input to your chat history
+        string userRequest = "<the user's initial message>";
+        Console.WriteLine($"User: {userRequest}");
+        chatHistory.AddUserMessage(userRequest);
+
+        // Get the models response and add it to the chat history
+        IChatCompletionService service = kernel.GetRequiredService<IChatCompletionService>();
+        ChatMessageContent responseMessage = await service.GetChatMessageContentAsync(
+            chatHistory,
+            executionSettings
+        );
+        Console.WriteLine($"Assistant: {responseMessage.Content}");
+        chatHistory.Add(responseMessage);
+
+        // Continue sending and receiving messages between the user and model
+        // ...
+        // </useChatService>
+    }
+}
diff --git a/docs/ai/how-to/snippets/semantic-kernel/Program.cs b/docs/ai/how-to/snippets/semantic-kernel/Program.cs
@@ -0,0 +1 @@
+await LocalModelExamples.Examples();
diff --git a/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs b/docs/ai/how-to/snippets/semantic-kernel/model/MyModelRequest.cs
@@ -0,0 +1,23 @@
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.ChatCompletion;
+
+// This is a simple mock request to provide the needed function signatures for the snippet code
+class MyModelRequest
+{
+    public required string Request { get; set; }
+    public PromptExecutionSettings? Settings { get; set; }
+    public bool Stream { get; set; } = true;
+
+    public static MyModelRequest FromChatHistory(
+        ChatHistory history,
+        PromptExecutionSettings? settings
+    )
+    {
+        return new MyModelRequest() { Request = history.Last().Content!, Settings = settings };
+    }
+
+    public static MyModelRequest FromPrompt(string prompt, PromptExecutionSettings? settings)
+    {
+        return new MyModelRequest() { Request = prompt, Settings = settings };
+    }
+}
diff --git a/docs/ai/how-to/snippets/semantic-kernel/model/MyModelResponse.cs b/docs/ai/how-to/snippets/semantic-kernel/model/MyModelResponse.cs
@@ -0,0 +1,5 @@
+// This is a simple mock response to provide the needed function signatures for the snippet code
+class MyModelResponse
+{
+    public IReadOnlyList<string> Completions { get; init; } = [];
+}
diff --git a/docs/ai/how-to/snippets/semantic-kernel/semantic-kernel.csproj b/docs/ai/how-to/snippets/semantic-kernel/semantic-kernel.csproj
@@ -0,0 +1,15 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <RootNamespace>semantic_kernel</RootNamespace>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.SemanticKernel" Version="1.7.1" />
+  </ItemGroup>
+
+</Project>
diff --git a/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs b/docs/ai/how-to/snippets/semantic-kernel/services/MyChatCompletionService.cs
@@ -0,0 +1,102 @@
+using System.Collections.Immutable;
+using System.Net.Http.Json;
+using System.Runtime.CompilerServices;
+using System.Text.Json;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.ChatCompletion;
+
+// <service>
+class MyChatCompletionService : IChatCompletionService
+{
+    private IReadOnlyDictionary<string, object?>? _attributes;
+    public IReadOnlyDictionary<string, object?> Attributes =>
+        _attributes ??= new Dictionary<string, object?>();
+
+    public string ModelUrl { get; init; } = "<default url to your model's Chat API>";
+    public required string ModelApiKey { get; init; }
+
+    public async Task<IReadOnlyList<ChatMessageContent>> GetChatMessageContentsAsync(
+        ChatHistory chatHistory,
+        PromptExecutionSettings? executionSettings = null,
+        Kernel? kernel = null,
+        CancellationToken cancellationToken = default
+    )
+    {
+        // Build your model's request object
+        MyModelRequest request = MyModelRequest.FromChatHistory(chatHistory, executionSettings);
+
+        // Send the completion request via HTTP
+        using var httpClient = new HttpClient();
+
+        // Send a POST to your model with the serialized request in the body
+        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(
+            ModelUrl,
+            request,
+            cancellationToken
+        );
+
+        // Verify the request was completed successfully
+        httpResponse.EnsureSuccessStatusCode();
+
+        // Deserialize the response body to your model's response object
+        // Handle when the deserialization fails and returns null
+        MyModelResponse response =
+            await httpResponse.Content.ReadFromJsonAsync<MyModelResponse>(cancellationToken)
+            ?? throw new Exception("Failed to deserialize response from model");
+
+        // Convert your model's response into a list of ChatMessageContent
+        return response
+            .Completions.Select<string, ChatMessageContent>(completion =>
+                new(AuthorRole.Assistant, completion)
+            )
+            .ToImmutableList();
+    }
+
+    public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessageContentsAsync(
+        ChatHistory chatHistory,
+        PromptExecutionSettings? executionSettings = null,
+        Kernel? kernel = null,
+        [EnumeratorCancellation] CancellationToken cancellationToken = default
+    )
+    {
+        // Build your model's request object, specify that streaming is requested
+        MyModelRequest request = MyModelRequest.FromChatHistory(chatHistory, executionSettings);
+        request.Stream = true;
+
+        // Send the completion request via HTTP
+        using var httpClient = new HttpClient();
+
+        // Send a POST to your model with the serialized request in the body
+        using HttpResponseMessage httpResponse = await httpClient.PostAsJsonAsync(
+            ModelUrl,
+            request,
+            cancellationToken
+        );
+
+        // Verify the request was completed successfully
+        httpResponse.EnsureSuccessStatusCode();
+
+        // Read your models response as a stream
+        using StreamReader reader =
+            new(await httpResponse.Content.ReadAsStreamAsync(cancellationToken));
+
+        // Iteratively read a chunk of the response until the end of the stream
+        // It is more efficient to use a buffer that is the same size as the internal buffer of the stream
+        // If the size of the internal buffer was unspecified when the stream was constructed, its default size is 4 kilobytes (2048 UTF-16 characters)
+        char[] buffer = new char[2048];
+        while (!reader.EndOfStream)
+        {
+            // Check the cancellation token with each iteration
+            cancellationToken.ThrowIfCancellationRequested();
+
+            // Fill the buffer with the next set of characters, track how many characters were read
+            int readCount = reader.Read(buffer, 0, buffer.Length);
+
+            // Convert the character buffer to a string, only include as many characters as were just read
+            string chunk = new(buffer, 0, readCount);
+
+            yield return new StreamingChatMessageContent(AuthorRole.Assistant, chunk);
+        }
+    }
+}
+// </service>