-
Notifications
You must be signed in to change notification settings - Fork 341
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added some unit tests for ITextTokenizer.GetTokens implementation
- Loading branch information
1 parent
578bfa7
commit 4a9b822
Showing
3 changed files
with
145 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
using LLama.Common; | ||
using LLamaSharp.KernelMemory; | ||
using Microsoft.KernelMemory.AI; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Runtime.CompilerServices; | ||
using System.Text; | ||
using System.Text.RegularExpressions; | ||
using System.Threading.Tasks; | ||
using Xunit.Abstractions; | ||
|
||
namespace LLama.Unittest.KernelMemory | ||
{ | ||
public abstract class ITextTokenizerTests | ||
{ | ||
private readonly ITestOutputHelper _testOutputHelper; | ||
|
||
#pragma warning disable KMEXP00 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. | ||
protected ITextTokenizer? _generator; | ||
#pragma warning restore KMEXP00 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed. | ||
|
||
protected InferenceParams _infParams; | ||
protected LLamaSharpConfig _lsConfig; | ||
|
||
public ITextTokenizerTests(ITestOutputHelper testOutputHelper) | ||
{ | ||
_testOutputHelper = testOutputHelper; | ||
|
||
_infParams = new() { AntiPrompts = ["\n\n"] }; | ||
_lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams }; | ||
|
||
testOutputHelper.WriteLine($"Using model {Path.GetFileName(_lsConfig.ModelPath)}"); | ||
} | ||
|
||
[Theory] | ||
[InlineData("The quick brown fox jumps over the lazy dog")] | ||
[InlineData("Well, here're some special characters!!!")] | ||
[InlineData("And a little bit of unicode για να κρατήσουμε τα πράγματα ενδιαφέροντα")] | ||
[InlineData(" \n \r\n \t ")] | ||
public void GetTokens_ShouldReturnListOfTokensForInputString(string? text) | ||
{ | ||
var tokens = _generator!.GetTokens(text); | ||
var tokensCount = _generator.CountTokens(text); | ||
|
||
var expected = " " + text; // the placement of the space corresponding to BOS will vary by model | ||
var actual = string.Join("", tokens); | ||
|
||
_testOutputHelper.WriteLine($"Tokens for '{text}':"); | ||
_testOutputHelper.WriteLine(string.Join("", tokens.Select(x => $"({x})"))); | ||
|
||
Assert.Equal(expected, actual); | ||
Assert.Equal(tokensCount, tokens.Count); | ||
} | ||
|
||
[Fact] | ||
public void GetToken_ShouldThrowForNull() | ||
{ | ||
string? text = null; | ||
|
||
Assert.Throws<ArgumentNullException>(() => { _generator!.GetTokens(text!); }); | ||
} | ||
|
||
[Fact] | ||
public void GetToken_EmptyStringYieldsOneEmptyToken() | ||
{ | ||
var text = ""; | ||
var expected = ""; | ||
|
||
var tokens = _generator!.GetTokens(text); | ||
var tokensCount = _generator.CountTokens(text); | ||
var actual = tokens.Single(); | ||
|
||
_testOutputHelper.WriteLine($"Tokens for '{text}':"); | ||
_testOutputHelper.WriteLine(string.Join("", tokens.Select(x => $"({x})"))); | ||
|
||
Assert.Equal(expected, actual); | ||
Assert.Equal(tokensCount, tokens.Count); | ||
} | ||
} | ||
} |
30 changes: 30 additions & 0 deletions
30
LLama.Unittest/KernelMemory/LLamaSharpTextEmbeddingGeneratorTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
using LLama.Common; | ||
using LLamaSharp.KernelMemory; | ||
using Microsoft.KernelMemory.AI; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Text.RegularExpressions; | ||
using System.Threading.Tasks; | ||
using Xunit.Abstractions; | ||
|
||
namespace LLama.Unittest.KernelMemory | ||
{ | ||
public class LLamaSharpTextEmbeddingGeneratorTests : ITextTokenizerTests, IDisposable | ||
{ | ||
private readonly LLamaSharpTextEmbeddingGenerator _embeddingGenerator; | ||
|
||
public LLamaSharpTextEmbeddingGeneratorTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper) | ||
{ | ||
_embeddingGenerator = new LLamaSharpTextEmbeddingGenerator(_lsConfig); | ||
|
||
_generator = _embeddingGenerator; | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
_embeddingGenerator.Dispose(); | ||
} | ||
} | ||
} |
34 changes: 34 additions & 0 deletions
34
LLama.Unittest/KernelMemory/LlamaSharpTextGeneratorTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
using LLama.Common; | ||
using LLamaSharp.KernelMemory; | ||
using Microsoft.KernelMemory.AI; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Diagnostics; | ||
using System.Linq; | ||
using System.Reflection.Emit; | ||
using System.Text; | ||
using System.Text.RegularExpressions; | ||
using System.Threading.Tasks; | ||
using Xunit.Abstractions; | ||
using Xunit.Sdk; | ||
using static System.Net.Mime.MediaTypeNames; | ||
|
||
namespace LLama.Unittest.KernelMemory | ||
{ | ||
public class LlamaSharpTextGeneratorTests : ITextTokenizerTests, IDisposable | ||
{ | ||
private readonly LlamaSharpTextGenerator _textGenerator; | ||
|
||
public LlamaSharpTextGeneratorTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper) | ||
{ | ||
_textGenerator = new LlamaSharpTextGenerator(_lsConfig); | ||
|
||
_generator = _textGenerator; | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
_textGenerator.Dispose(); | ||
} | ||
} | ||
} |