@@ -15,13 +15,20 @@ namespace SimdUnicodeBenchmarks
15
15
public class Checker
16
16
{
17
17
List < char [ ] > names ;
18
- List < bool > results ;
19
- public static bool RuntimeIsAsciiApproach ( ReadOnlySpan < char > s )
20
- {
21
- // The runtime as of NET 8.0 has a dedicated method for this, but
22
- // it is not available prior to that, so let us branch.
18
+ List < byte [ ] > AsciiBytes ;
19
+ List < char [ ] > nonAsciichars ;
20
+ public List < byte [ ] > nonAsciiBytes ; // Declare at the class level
21
+
22
+ List < bool > results ;
23
+
24
+ public static bool RuntimeIsAsciiApproach ( ReadOnlySpan < char > s )
25
+ {
26
+
27
+ // The runtime as of NET 8.0 has a dedicated method for this, but
28
+ // it is not available prior to that, so let us branch.
23
29
#if NET8_0_OR_GREATER
24
- return Ascii . IsValid ( s ) ;
30
+ return System . Text . Ascii . IsValid ( s ) ;
31
+
25
32
#else
26
33
foreach ( char c in s )
27
34
{
@@ -34,6 +41,8 @@ public static bool RuntimeIsAsciiApproach(ReadOnlySpan<char> s)
34
41
return true ;
35
42
#endif
36
43
}
44
+
45
+
37
46
public static char [ ] GetRandomASCIIString ( uint n )
38
47
{
39
48
var allowedChars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ01234567é89" ;
@@ -49,23 +58,49 @@ public static char[] GetRandomASCIIString(uint n)
49
58
return chars ;
50
59
}
51
60
61
+ public static char [ ] GetRandomNonASCIIString ( uint n )
62
+ {
63
+ // Chose a few Latin Extended-A and Latin Extended-B characters alongside ASCII chars
64
+ var allowedChars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ01234567é89šžŸũŭůűųŷŹźŻżŽ" ;
65
+
66
+ var chars = new char [ n ] ;
67
+ var rd = new Random ( 12345 ) ; // fixed seed
52
68
53
- [ Params ( 100 , 200 , 500 ) ]
69
+ for ( var i = 0 ; i < n ; i ++ )
70
+ {
71
+ chars [ i ] = allowedChars [ rd . Next ( 0 , allowedChars . Length ) ] ;
72
+ }
73
+
74
+ return chars ;
75
+ }
76
+
77
+
78
+
79
+ [ Params ( 100 , 200 , 500 , 1000 , 2000 ) ]
54
80
public uint N ;
55
81
82
+
56
83
[ GlobalSetup ]
57
84
public void Setup ( )
58
85
{
59
86
names = new List < char [ ] > ( ) ;
87
+ nonAsciiBytes = new List < byte [ ] > ( ) ; // Initialize the list of byte arrays
60
88
results = new List < bool > ( ) ;
61
89
62
90
for ( int i = 0 ; i < 100 ; i ++ )
63
91
{
64
92
names . Add ( GetRandomASCIIString ( N ) ) ;
93
+ char [ ] nonAsciiChars = GetRandomNonASCIIString ( N ) ;
94
+ nonAsciiBytes . Add ( Encoding . UTF8 . GetBytes ( nonAsciiChars ) ) ; // Convert to byte array and store
65
95
results . Add ( false ) ;
66
96
}
97
+
98
+ AsciiBytes = names
99
+ . Select ( name => System . Text . Encoding . ASCII . GetBytes ( name ) )
100
+ . ToList ( ) ;
67
101
}
68
102
103
+
69
104
[ Benchmark ]
70
105
public void FastUnicodeIsAscii ( )
71
106
{
@@ -98,7 +133,65 @@ public void RuntimeIsAscii()
98
133
count += 1 ;
99
134
}
100
135
}
136
+ [ Benchmark ]
137
+ public void Error_GetIndexOfFirstNonAsciiByte ( )
138
+ {
139
+ foreach ( byte [ ] nonAsciiByte in nonAsciiBytes ) // Use nonAsciiBytes directly
140
+ {
141
+ unsafe
142
+ {
143
+ fixed ( byte * pNonAscii = nonAsciiByte )
144
+ {
145
+ nuint result = SimdUnicode . Ascii . GetIndexOfFirstNonAsciiByte ( pNonAscii , ( nuint ) nonAsciiByte . Length ) ;
146
+ }
147
+ }
148
+ }
149
+ }
150
+
151
+ [ Benchmark ]
152
+ public void Error_Runtime_GetIndexOfFirstNonAsciiByte ( )
153
+ {
154
+ foreach ( byte [ ] nonAsciiByte in nonAsciiBytes ) // Use nonAsciiBytes directly
155
+ {
156
+ unsafe
157
+ {
158
+ fixed ( byte * pNonAscii = nonAsciiByte )
159
+ {
160
+ nuint result = Competition . Ascii . GetIndexOfFirstNonAsciiByte ( pNonAscii , ( nuint ) nonAsciiByte . Length ) ;
161
+ }
162
+ }
163
+ }
164
+ }
101
165
166
+ [ Benchmark ]
167
+ public void allAscii_GetIndexOfFirstNonAsciiByte ( )
168
+ {
169
+ foreach ( byte [ ] Abyte in AsciiBytes ) // Use nonAsciiBytes directly
170
+ {
171
+ unsafe
172
+ {
173
+ fixed ( byte * pNonAscii = Abyte )
174
+ {
175
+ nuint result = SimdUnicode . Ascii . GetIndexOfFirstNonAsciiByte ( pNonAscii , ( nuint ) Abyte . Length ) ;
176
+ }
177
+ }
178
+ }
179
+ }
180
+
181
+ [ Benchmark ]
182
+ public void allAscii_Runtime_GetIndexOfFirstNonAsciiByte ( )
183
+ {
184
+ foreach ( byte [ ] Abyte in AsciiBytes ) // Use nonAsciiBytes directly
185
+ {
186
+ unsafe
187
+ {
188
+ fixed ( byte * pNonAscii = Abyte )
189
+ {
190
+ nuint result = Competition . Ascii . GetIndexOfFirstNonAsciiByte ( pNonAscii , ( nuint ) Abyte . Length ) ;
191
+ }
192
+ }
193
+ }
194
+ }
102
195
}
103
196
104
197
public class Program
0 commit comments