Skip to content

Commit 719b85f

Browse files
authored
Merge pull request #4 from simdutf/scalar_getFirstNonASCII
scalar GetIndexOfFirstNonAsciiByte
2 parents 4ac63e5 + c59fd98 commit 719b85f

File tree

7 files changed

+2738
-14
lines changed

7 files changed

+2738
-14
lines changed

benchmark/Benchmark.cs

Lines changed: 100 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,20 @@ namespace SimdUnicodeBenchmarks
1515
public class Checker
1616
{
1717
List<char[]> names;
18-
List<bool> results;
19-
public static bool RuntimeIsAsciiApproach(ReadOnlySpan<char> s)
20-
{
21-
// The runtime as of NET 8.0 has a dedicated method for this, but
22-
// it is not available prior to that, so let us branch.
18+
List<byte[]> AsciiBytes;
19+
List<char[]> nonAsciichars;
20+
public List<byte[]> nonAsciiBytes; // Declare at the class level
21+
22+
List<bool> results;
23+
24+
public static bool RuntimeIsAsciiApproach(ReadOnlySpan<char> s)
25+
{
26+
27+
// The runtime as of NET 8.0 has a dedicated method for this, but
28+
// it is not available prior to that, so let us branch.
2329
#if NET8_0_OR_GREATER
24-
return Ascii.IsValid(s);
30+
return System.Text.Ascii.IsValid(s);
31+
2532
#else
2633
foreach (char c in s)
2734
{
@@ -34,6 +41,8 @@ public static bool RuntimeIsAsciiApproach(ReadOnlySpan<char> s)
3441
return true;
3542
#endif
3643
}
44+
45+
3746
public static char[] GetRandomASCIIString(uint n)
3847
{
3948
var allowedChars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ01234567é89";
@@ -49,23 +58,49 @@ public static char[] GetRandomASCIIString(uint n)
4958
return chars;
5059
}
5160

61+
public static char[] GetRandomNonASCIIString(uint n)
62+
{
63+
// Chose a few Latin Extended-A and Latin Extended-B characters alongside ASCII chars
64+
var allowedChars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ01234567é89šžŸũŭůűųŷŹźŻżŽ";
65+
66+
var chars = new char[n];
67+
var rd = new Random(12345); // fixed seed
5268

53-
[Params(100, 200, 500)]
69+
for (var i = 0; i < n; i++)
70+
{
71+
chars[i] = allowedChars[rd.Next(0, allowedChars.Length)];
72+
}
73+
74+
return chars;
75+
}
76+
77+
78+
79+
[Params(100, 200, 500, 1000, 2000)]
5480
public uint N;
5581

82+
5683
[GlobalSetup]
5784
public void Setup()
5885
{
5986
names = new List<char[]>();
87+
nonAsciiBytes = new List<byte[]>(); // Initialize the list of byte arrays
6088
results = new List<bool>();
6189

6290
for (int i = 0; i < 100; i++)
6391
{
6492
names.Add(GetRandomASCIIString(N));
93+
char[] nonAsciiChars = GetRandomNonASCIIString(N);
94+
nonAsciiBytes.Add(Encoding.UTF8.GetBytes(nonAsciiChars)); // Convert to byte array and store
6595
results.Add(false);
6696
}
97+
98+
AsciiBytes = names
99+
.Select(name => System.Text.Encoding.ASCII.GetBytes(name))
100+
.ToList();
67101
}
68102

103+
69104
[Benchmark]
70105
public void FastUnicodeIsAscii()
71106
{
@@ -98,7 +133,65 @@ public void RuntimeIsAscii()
98133
count += 1;
99134
}
100135
}
136+
[Benchmark]
137+
public void Error_GetIndexOfFirstNonAsciiByte()
138+
{
139+
foreach (byte[] nonAsciiByte in nonAsciiBytes) // Use nonAsciiBytes directly
140+
{
141+
unsafe
142+
{
143+
fixed (byte* pNonAscii = nonAsciiByte)
144+
{
145+
nuint result = SimdUnicode.Ascii.GetIndexOfFirstNonAsciiByte(pNonAscii, (nuint)nonAsciiByte.Length);
146+
}
147+
}
148+
}
149+
}
150+
151+
[Benchmark]
152+
public void Error_Runtime_GetIndexOfFirstNonAsciiByte()
153+
{
154+
foreach (byte[] nonAsciiByte in nonAsciiBytes) // Use nonAsciiBytes directly
155+
{
156+
unsafe
157+
{
158+
fixed (byte* pNonAscii = nonAsciiByte)
159+
{
160+
nuint result = Competition.Ascii.GetIndexOfFirstNonAsciiByte(pNonAscii, (nuint)nonAsciiByte.Length);
161+
}
162+
}
163+
}
164+
}
101165

166+
[Benchmark]
167+
public void allAscii_GetIndexOfFirstNonAsciiByte()
168+
{
169+
foreach (byte[] Abyte in AsciiBytes) // Use nonAsciiBytes directly
170+
{
171+
unsafe
172+
{
173+
fixed (byte* pNonAscii = Abyte)
174+
{
175+
nuint result = SimdUnicode.Ascii.GetIndexOfFirstNonAsciiByte(pNonAscii, (nuint)Abyte.Length);
176+
}
177+
}
178+
}
179+
}
180+
181+
[Benchmark]
182+
public void allAscii_Runtime_GetIndexOfFirstNonAsciiByte()
183+
{
184+
foreach (byte[] Abyte in AsciiBytes) // Use nonAsciiBytes directly
185+
{
186+
unsafe
187+
{
188+
fixed (byte* pNonAscii = Abyte)
189+
{
190+
nuint result = Competition.Ascii.GetIndexOfFirstNonAsciiByte(pNonAscii, (nuint)Abyte.Length);
191+
}
192+
}
193+
}
194+
}
102195
}
103196

104197
public class Program

0 commit comments

Comments
 (0)