Skip to content

scalar GetIndexOfFirstNonAsciiByte #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 100 additions & 7 deletions benchmark/Benchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,20 @@ namespace SimdUnicodeBenchmarks
public class Checker
{
List<char[]> names;
List<bool> results;
public static bool RuntimeIsAsciiApproach(ReadOnlySpan<char> s)
{
// The runtime as of NET 8.0 has a dedicated method for this, but
// it is not available prior to that, so let us branch.
List<byte[]> AsciiBytes;
List<char[]> nonAsciichars;
public List<byte[]> nonAsciiBytes; // Declare at the class level

List<bool> results;

public static bool RuntimeIsAsciiApproach(ReadOnlySpan<char> s)
{

// The runtime as of NET 8.0 has a dedicated method for this, but
// it is not available prior to that, so let us branch.
#if NET8_0_OR_GREATER
return Ascii.IsValid(s);
return System.Text.Ascii.IsValid(s);

#else
foreach (char c in s)
{
Expand All @@ -34,6 +41,8 @@ public static bool RuntimeIsAsciiApproach(ReadOnlySpan<char> s)
return true;
#endif
}


public static char[] GetRandomASCIIString(uint n)
{
var allowedChars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ01234567é89";
Expand All @@ -49,23 +58,49 @@ public static char[] GetRandomASCIIString(uint n)
return chars;
}

public static char[] GetRandomNonASCIIString(uint n)
{
// Chose a few Latin Extended-A and Latin Extended-B characters alongside ASCII chars
var allowedChars = "abcdefghijkmnopqrstuvwxyzABCDEFGHJKLMNOPQRSTUVWXYZ01234567é89šžŸũŭůűųŷŹźŻżŽ";

var chars = new char[n];
var rd = new Random(12345); // fixed seed

[Params(100, 200, 500)]
for (var i = 0; i < n; i++)
{
chars[i] = allowedChars[rd.Next(0, allowedChars.Length)];
}

return chars;
}



[Params(100, 200, 500, 1000, 2000)]
public uint N;


[GlobalSetup]
public void Setup()
{
names = new List<char[]>();
nonAsciiBytes = new List<byte[]>(); // Initialize the list of byte arrays
results = new List<bool>();

for (int i = 0; i < 100; i++)
{
names.Add(GetRandomASCIIString(N));
char[] nonAsciiChars = GetRandomNonASCIIString(N);
nonAsciiBytes.Add(Encoding.UTF8.GetBytes(nonAsciiChars)); // Convert to byte array and store
results.Add(false);
}

AsciiBytes = names
.Select(name => System.Text.Encoding.ASCII.GetBytes(name))
.ToList();
}


[Benchmark]
public void FastUnicodeIsAscii()
{
Expand Down Expand Up @@ -98,7 +133,65 @@ public void RuntimeIsAscii()
count += 1;
}
}
[Benchmark]
public void Error_GetIndexOfFirstNonAsciiByte()
{
foreach (byte[] nonAsciiByte in nonAsciiBytes) // Use nonAsciiBytes directly
{
unsafe
{
fixed (byte* pNonAscii = nonAsciiByte)
{
nuint result = SimdUnicode.Ascii.GetIndexOfFirstNonAsciiByte(pNonAscii, (nuint)nonAsciiByte.Length);
}
}
}
}

[Benchmark]
public void Error_Runtime_GetIndexOfFirstNonAsciiByte()
{
foreach (byte[] nonAsciiByte in nonAsciiBytes) // Use nonAsciiBytes directly
{
unsafe
{
fixed (byte* pNonAscii = nonAsciiByte)
{
nuint result = Competition.Ascii.GetIndexOfFirstNonAsciiByte(pNonAscii, (nuint)nonAsciiByte.Length);
}
}
}
}

[Benchmark]
public void allAscii_GetIndexOfFirstNonAsciiByte()
{
foreach (byte[] Abyte in AsciiBytes) // Use nonAsciiBytes directly
{
unsafe
{
fixed (byte* pNonAscii = Abyte)
{
nuint result = SimdUnicode.Ascii.GetIndexOfFirstNonAsciiByte(pNonAscii, (nuint)Abyte.Length);
}
}
}
}

[Benchmark]
public void allAscii_Runtime_GetIndexOfFirstNonAsciiByte()
{
foreach (byte[] Abyte in AsciiBytes) // Use nonAsciiBytes directly
{
unsafe
{
fixed (byte* pNonAscii = Abyte)
{
nuint result = Competition.Ascii.GetIndexOfFirstNonAsciiByte(pNonAscii, (nuint)Abyte.Length);
}
}
}
}
}

public class Program
Expand Down
Loading