Skip to content

Commit

Permalink
Bug in SplitAny
Browse files Browse the repository at this point in the history
  • Loading branch information
clipperhouse committed Jul 31, 2024
1 parent 027cde3 commit 67ffaf2
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 22 deletions.
13 changes: 11 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,11 @@
# Split.net

A more efficient splitter for bytes and strings, with a focus on zero allocation, in C#.

### Usage

```
⚠️ _Not on Nuget yet, you'll need to clone for now_
dotnet add package Split.net
```
```
```
4 changes: 2 additions & 2 deletions Split/Enumerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ internal Enumerator(ReadOnlySpan<T> source, ReadOnlySpan<T> separator, bool trea
en = MemoryExtensions.Split(source, separator);
}

internal Enumerator(ReadOnlySpan<T> source, ReadOnlySpan<T> separator)
internal Enumerator(ReadOnlySpan<T> source, ReadOnlySpan<T> separators)
{
en = MemoryExtensions.SplitAny(source, separator);
en = MemoryExtensions.SplitAny(source, separators);
}

internal Enumerator(ReadOnlySpan<T> source, SearchValues<T> searchValues)
Expand Down
7 changes: 4 additions & 3 deletions Split/SpanSplitEnumerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,20 @@ internal SpanSplitEnumerator(ReadOnlySpan<T> input, SearchValues<T> searchValues
/// it will instead use <see cref="SpanSplitEnumeratorMode.SearchValues"/> with a cached <see cref="SearchValues{Char}"/>
/// for all whitespace characters.
/// </remarks>
internal SpanSplitEnumerator(ReadOnlySpan<T> span, ReadOnlySpan<T> separators)
internal SpanSplitEnumerator(ReadOnlySpan<T> input, ReadOnlySpan<T> separators)
{
this.input = input;
this.separators = separators;
mode = SpanSplitEnumeratorMode.Any;
}

/// <summary>Initializes the enumerator for <see cref="SpanSplitEnumeratorMode.Sequence"/> (or <see cref="SpanSplitEnumeratorMode.EmptySequence"/> if the separator is empty).</summary>
/// <remarks><paramref name="treatAsSingleSeparator"/> must be true.</remarks>
internal SpanSplitEnumerator(ReadOnlySpan<T> span, ReadOnlySpan<T> separator, bool treatAsSingleSeparator)
internal SpanSplitEnumerator(ReadOnlySpan<T> input, ReadOnlySpan<T> separator, bool treatAsSingleSeparator)
{
Debug.Assert(treatAsSingleSeparator, "Should only ever be called as true; exists to differentiate from separators overload");

input = span;
this.input = input;
separators = separator;
mode = separator.Length == 0 ?
SpanSplitEnumeratorMode.EmptySequence :
Expand Down
21 changes: 6 additions & 15 deletions Tests/APIs.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,15 @@ public void Readme()
}

/*
Hello
,
🌏
world
.
.
Hello,🌏
world.
你好,
世界.
*/

var bytes = Encoding.UTF8.GetBytes(example);
var splits2 = bytes.SplitOn((byte)' ');
var separators = " ,."u8.ToArray();
var splits2 = bytes.SplitOnAny(separators);

foreach (var split2 in splits2)
{
Expand Down
24 changes: 24 additions & 0 deletions Tests/Split.Test.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
namespace Tests;

using System.ComponentModel;
using System.Text;
using Split.Extensions;
using Xunit;
Expand All @@ -21,4 +22,27 @@ public void MatchStlibStringSplit()
}
}
}

[Fact]
public void Any()
{
Console.WriteLine("Any:");
var example = "Hello, 🌏 world. 你好, 世界. ";
var seps = " ,.".ToCharArray();

var splits = example.SplitOnAny(seps).ToArray();

// Note, when two seaprators are adjacent in the string, there is an empty split between them
string[] expecteds = ["Hello", "", "🌏", "world", "", "你好", "", "世界", "", "",];

var i = 0;
foreach (var s in splits)
{
var actual = new string(s);
var expected = expecteds[i];
Assert.Equal(expected, actual);
i++;
}

}
}

0 comments on commit 67ffaf2

Please sign in to comment.