Skip to content

Commit

Permalink
added some unit tests for ITextTokenizer.GetTokens implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
SpaceAntelope committed Jul 22, 2024
1 parent 578bfa7 commit 4a9b822
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 0 deletions.
81 changes: 81 additions & 0 deletions LLama.Unittest/KernelMemory/ITextTokenizerTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
using LLama.Common;
using LLamaSharp.KernelMemory;
using Microsoft.KernelMemory.AI;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Xunit.Abstractions;

namespace LLama.Unittest.KernelMemory
{
public abstract class ITextTokenizerTests
{
private readonly ITestOutputHelper _testOutputHelper;

#pragma warning disable KMEXP00 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
protected ITextTokenizer? _generator;
#pragma warning restore KMEXP00 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.

protected InferenceParams _infParams;
protected LLamaSharpConfig _lsConfig;

public ITextTokenizerTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;

_infParams = new() { AntiPrompts = ["\n\n"] };
_lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams };

testOutputHelper.WriteLine($"Using model {Path.GetFileName(_lsConfig.ModelPath)}");
}

[Theory]
[InlineData("The quick brown fox jumps over the lazy dog")]
[InlineData("Well, here're some special characters!!!")]
[InlineData("And a little bit of unicode για να κρατήσουμε τα πράγματα ενδιαφέροντα")]
[InlineData(" \n \r\n \t ")]
public void GetTokens_ShouldReturnListOfTokensForInputString(string? text)
{
var tokens = _generator!.GetTokens(text);
var tokensCount = _generator.CountTokens(text);

var expected = " " + text; // the placement of the space corresponding to BOS will vary by model
var actual = string.Join("", tokens);

_testOutputHelper.WriteLine($"Tokens for '{text}':");
_testOutputHelper.WriteLine(string.Join("", tokens.Select(x => $"({x})")));

Assert.Equal(expected, actual);
Assert.Equal(tokensCount, tokens.Count);
}

[Fact]
public void GetToken_ShouldThrowForNull()
{
string? text = null;

Assert.Throws<ArgumentNullException>(() => { _generator!.GetTokens(text!); });
}

[Fact]
public void GetToken_EmptyStringYieldsOneEmptyToken()
{
var text = "";
var expected = "";

var tokens = _generator!.GetTokens(text);
var tokensCount = _generator.CountTokens(text);
var actual = tokens.Single();

_testOutputHelper.WriteLine($"Tokens for '{text}':");
_testOutputHelper.WriteLine(string.Join("", tokens.Select(x => $"({x})")));

Assert.Equal(expected, actual);
Assert.Equal(tokensCount, tokens.Count);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using LLama.Common;
using LLamaSharp.KernelMemory;
using Microsoft.KernelMemory.AI;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Xunit.Abstractions;

namespace LLama.Unittest.KernelMemory
{
public class LLamaSharpTextEmbeddingGeneratorTests : ITextTokenizerTests, IDisposable
{
private readonly LLamaSharpTextEmbeddingGenerator _embeddingGenerator;

public LLamaSharpTextEmbeddingGeneratorTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper)
{
_embeddingGenerator = new LLamaSharpTextEmbeddingGenerator(_lsConfig);

_generator = _embeddingGenerator;
}

public void Dispose()
{
_embeddingGenerator.Dispose();
}
}
}
34 changes: 34 additions & 0 deletions LLama.Unittest/KernelMemory/LlamaSharpTextGeneratorTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
using LLama.Common;
using LLamaSharp.KernelMemory;
using Microsoft.KernelMemory.AI;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Reflection.Emit;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Xunit.Abstractions;
using Xunit.Sdk;
using static System.Net.Mime.MediaTypeNames;

namespace LLama.Unittest.KernelMemory
{
public class LlamaSharpTextGeneratorTests : ITextTokenizerTests, IDisposable
{
private readonly LlamaSharpTextGenerator _textGenerator;

public LlamaSharpTextGeneratorTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper)
{
_textGenerator = new LlamaSharpTextGenerator(_lsConfig);

_generator = _textGenerator;
}

public void Dispose()
{
_textGenerator.Dispose();
}
}
}

0 comments on commit 4a9b822

Please sign in to comment.