From 67ffaf25d5ae42918d7290deb2337b154586f84b Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Wed, 31 Jul 2024 17:23:49 -0400 Subject: [PATCH] Bug in SplitAny --- README.md | 13 +++++++++++-- Split/Enumerator.cs | 4 ++-- Split/SpanSplitEnumerator.cs | 7 ++++--- Tests/APIs.cs | 21 ++++++--------------- Tests/Split.Test.cs | 24 ++++++++++++++++++++++++ 5 files changed, 47 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index a40d7fc..bb5b885 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,11 @@ -# Split.net - +A more efficient splitter for bytes and strings, with a focus on zero allocation, in C#. + +### Usage + +``` +⚠️ _Not on Nuget yet, you'll need to clone for now_ +dotnet add package Split.net +``` +``` + +``` diff --git a/Split/Enumerator.cs b/Split/Enumerator.cs index ca1c2c2..b8eb1ee 100644 --- a/Split/Enumerator.cs +++ b/Split/Enumerator.cs @@ -17,9 +17,9 @@ internal Enumerator(ReadOnlySpan source, ReadOnlySpan separator, bool trea en = MemoryExtensions.Split(source, separator); } - internal Enumerator(ReadOnlySpan source, ReadOnlySpan separator) + internal Enumerator(ReadOnlySpan source, ReadOnlySpan separators) { - en = MemoryExtensions.SplitAny(source, separator); + en = MemoryExtensions.SplitAny(source, separators); } internal Enumerator(ReadOnlySpan source, SearchValues searchValues) diff --git a/Split/SpanSplitEnumerator.cs b/Split/SpanSplitEnumerator.cs index dc492c4..1962547 100644 --- a/Split/SpanSplitEnumerator.cs +++ b/Split/SpanSplitEnumerator.cs @@ -62,19 +62,20 @@ internal SpanSplitEnumerator(ReadOnlySpan input, SearchValues searchValues /// it will instead use with a cached /// for all whitespace characters. /// - internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separators) + internal SpanSplitEnumerator(ReadOnlySpan input, ReadOnlySpan separators) { + this.input = input; this.separators = separators; mode = SpanSplitEnumeratorMode.Any; } /// Initializes the enumerator for (or if the separator is empty). /// must be true. - internal SpanSplitEnumerator(ReadOnlySpan span, ReadOnlySpan separator, bool treatAsSingleSeparator) + internal SpanSplitEnumerator(ReadOnlySpan input, ReadOnlySpan separator, bool treatAsSingleSeparator) { Debug.Assert(treatAsSingleSeparator, "Should only ever be called as true; exists to differentiate from separators overload"); - input = span; + this.input = input; separators = separator; mode = separator.Length == 0 ? SpanSplitEnumeratorMode.EmptySequence : diff --git a/Tests/APIs.cs b/Tests/APIs.cs index 5e79aef..972cbb1 100644 --- a/Tests/APIs.cs +++ b/Tests/APIs.cs @@ -20,24 +20,15 @@ public void Readme() } /* - Hello - , - - 🌏 - - world - . - - 你 - 好 - , - 世 - 界 - . + Hello,🌏 + world. + 你好, + 世界. */ var bytes = Encoding.UTF8.GetBytes(example); - var splits2 = bytes.SplitOn((byte)' '); + var separators = " ,."u8.ToArray(); + var splits2 = bytes.SplitOnAny(separators); foreach (var split2 in splits2) { diff --git a/Tests/Split.Test.cs b/Tests/Split.Test.cs index c773a74..67c4057 100644 --- a/Tests/Split.Test.cs +++ b/Tests/Split.Test.cs @@ -1,5 +1,6 @@ namespace Tests; +using System.ComponentModel; using System.Text; using Split.Extensions; using Xunit; @@ -21,4 +22,27 @@ public void MatchStlibStringSplit() } } } + + [Fact] + public void Any() + { + Console.WriteLine("Any:"); + var example = "Hello, 🌏 world. 你好, 世界. "; + var seps = " ,.".ToCharArray(); + + var splits = example.SplitOnAny(seps).ToArray(); + + // Note, when two seaprators are adjacent in the string, there is an empty split between them + string[] expecteds = ["Hello", "", "🌏", "world", "", "你好", "", "世界", "", "",]; + + var i = 0; + foreach (var s in splits) + { + var actual = new string(s); + var expected = expecteds[i]; + Assert.Equal(expected, actual); + i++; + } + + } }