Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avx2 decode fix #31

Merged
merged 22 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ really fast base64 decoding function. The initial work that lead to the fast fun
was carried out by [gfoidl](https://github.com/gfoidl/Base64).

- There are accelerated base64 functions for UTF-8 inputs in the .NET runtime, but they are not optimal:
we can make them 50% to 2x or 3x faster.
we can make them 50% faster.
- There is no accelerated base64 functions for UTF-16 inputs (e.g., `string` types). We can be 2x faster
or more.

Expand All @@ -18,6 +18,18 @@ of the presence of allowable white space characters and the need to validate the
inputs are valid for encoding, but only some inputs are valid for decoding. Having to skip white space
characters makes accelerated decoding somewhat difficult.

## Results (SimdBase64 vs. fast .NET functions)

We use the enron base64 data for benchmarking, see benchmark/data/email.
We process the data as UTF-8 (ASCII) using the .NET accelerated functions
as a reference (`System.Buffers.Text.Base64.DecodeFromUtf8`).


| processor | SimdBase64(GB/s) | .NET speed (GB/s) | speed up |
|:----------------|:------------------------|:-------------------|:-------------------|
| Apple M2 processor (ARM) | 6.2 | 3.8 | 1.6 x |
| Intel Ice Lake (AVX2) | 5.3 | 3.4 | 1.6 x |


## Requirements

Expand Down Expand Up @@ -71,7 +83,7 @@ To run just one benchmark, use a filter:

```
cd benchmark
dotnet run --configuration Release --filter "*somefilter*"
dotnet run -c Release --filter "SimdUnicodeBenchmarks.RealDataBenchmark.AVX2DecodingRealDataUTF8(FileName: \"data/email/\")"
```

If you are under macOS or Linux, you may want to run the benchmarks in privileged mode:
Expand Down
165 changes: 133 additions & 32 deletions benchmark/Benchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public string GetValue(Summary summary, BenchmarkCase benchmarkCase)
public string Legend { get; } = "The speed in gigabytes per second";
}

[SimpleJob(launchCount: 1, warmupCount: 5, iterationCount: 5)]
[SimpleJob(launchCount: 1, warmupCount: 10, iterationCount: 10)]
[Config(typeof(Config))]
#pragma warning disable CA1515
public class RealDataBenchmark
Expand Down Expand Up @@ -153,8 +153,8 @@ public Config()
}
// Parameters and variables for real data
[Params(
@"data/email/",
@"data/dns/swedenzonebase.txt"
@"data/email/" //,
//@"data/dns/swedenzonebase.txt"
)]
#pragma warning disable CA1051
public string? FileName;
Expand Down Expand Up @@ -305,17 +305,53 @@ public unsafe void RunSSEDecodingBenchmarkUTF16(string[] data, int[] lengths)
}
}

public unsafe void RunSSEDecodingBenchmarkWithAllocUTF8(string[] data, int[] lengths)
{
for (int i = 0; i < FileContent.Length; i++)
{
Span<byte> base64 = input[i].AsSpan();
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<byte>(base64)];
int bytesConsumed = 0;
int bytesWritten = 0;
SimdBase64.SSE.Base64.DecodeFromBase64SSE(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
if (bytesWritten != lengths[i])
{
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
#pragma warning disable CA2201
throw new Exception("Error");
}
}
}

public unsafe void RunSSEDecodingBenchmarkWithAllocUTF16(string[] data, int[] lengths)
{
for (int i = 0; i < FileContent.Length; i++)
{
string s = FileContent[i];
Span<char> base64 = input16[i].AsSpan();
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<char>(base64)];
int bytesConsumed = 0;
int bytesWritten = 0;
SimdBase64.SSE.Base64.DecodeFromBase64SSE(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
if (bytesWritten != lengths[i])
{
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
#pragma warning disable CA2201
throw new Exception("Error");
}
}
}

public unsafe void RunSSEDecodingBenchmarkWithAllocUTF8(string[] data, int[] lengths)
public unsafe void RunAVX2DecodingBenchmarkUTF8(string[] data, int[] lengths)
{
for (int i = 0; i < FileContent.Length; i++)
{
//string s = FileContent[i];
byte[] base64 = input[i];
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<byte>(base64.AsSpan())];
byte[] dataoutput = output[i];
int bytesConsumed = 0;
int bytesWritten = 0;
SimdBase64.SSE.Base64.DecodeFromBase64SSE(base64.AsSpan(), dataoutput, out bytesConsumed, out bytesWritten, false);
SimdBase64.AVX2.Base64.DecodeFromBase64AVX2(base64.AsSpan(), dataoutput, out bytesConsumed, out bytesWritten, false);
if (bytesWritten != lengths[i])
{
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
Expand All @@ -325,16 +361,53 @@ public unsafe void RunSSEDecodingBenchmarkWithAllocUTF8(string[] data, int[] len
}
}

public unsafe void RunSSEDecodingBenchmarkWithAllocUTF16(string[] data, int[] lengths)
public unsafe void RunAVX2DecodingBenchmarkUTF16(string[] data, int[] lengths)
{
for (int i = 0; i < FileContent.Length; i++)
{
string s = FileContent[i];
char[] base64 = input16[i];
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<char>(base64.AsSpan())];
ReadOnlySpan<char> base64 = s.AsSpan();
byte[] dataoutput = output[i];
int bytesConsumed = 0;
int bytesWritten = 0;
SimdBase64.SSE.Base64.DecodeFromBase64SSE(base64.AsSpan(), dataoutput, out bytesConsumed, out bytesWritten, false);
SimdBase64.AVX2.Base64.DecodeFromBase64AVX2(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
if (bytesWritten != lengths[i])
{
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
#pragma warning disable CA2201
throw new Exception("Error");
}
}
}

public unsafe void RunAVX2DecodingBenchmarkWithAllocUTF8(string[] data, int[] lengths)
{
for (int i = 0; i < FileContent.Length; i++)
{
Span<byte> base64 = input[i].AsSpan();
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<byte>(base64)];
int bytesConsumed = 0;
int bytesWritten = 0;
SimdBase64.AVX2.Base64.DecodeFromBase64AVX2(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
if (bytesWritten != lengths[i])
{
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
#pragma warning disable CA2201
throw new Exception("Error");
}
}
}

public unsafe void RunAVX2DecodingBenchmarkWithAllocUTF16(string[] data, int[] lengths)
{
for (int i = 0; i < FileContent.Length; i++)
{
string s = FileContent[i];
Span<char> base64 = input16[i].AsSpan();
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<char>(base64)];
int bytesConsumed = 0;
int bytesWritten = 0;
SimdBase64.AVX2.Base64.DecodeFromBase64AVX2(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
if (bytesWritten != lengths[i])
{
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
Expand Down Expand Up @@ -383,17 +456,15 @@ public unsafe void RunARMDecodingBenchmarkUTF16(string[] data, int[] lengths)
}
}



public unsafe void RunARMDecodingBenchmarkWithAllocUTF8(string[] data, int[] lengths)
{
for (int i = 0; i < FileContent.Length; i++)
{
byte[] base64 = input[i];
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<byte>(base64.AsSpan())];
Span<byte> base64 = input[i].AsSpan();
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<byte>(base64)];
int bytesConsumed = 0;
int bytesWritten = 0;
SimdBase64.Arm.Base64.DecodeFromBase64ARM(base64.AsSpan(), dataoutput, out bytesConsumed, out bytesWritten, false);
SimdBase64.Arm.Base64.DecodeFromBase64ARM(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
if (bytesWritten != lengths[i])
{
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
Expand All @@ -408,11 +479,11 @@ public unsafe void RunARMDecodingBenchmarkWithAllocUTF16(string[] data, int[] le
for (int i = 0; i < FileContent.Length; i++)
{
string s = FileContent[i];
char[] base64 = input16[i];
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<char>(base64.AsSpan())];
Span<char> base64 = input16[i].AsSpan();
byte[] dataoutput = new byte[SimdBase64.Scalar.Base64.MaximalBinaryLengthFromBase64Scalar<char>(base64)];
int bytesConsumed = 0;
int bytesWritten = 0;
SimdBase64.Arm.Base64.DecodeFromBase64ARM(base64.AsSpan(), dataoutput, out bytesConsumed, out bytesWritten, false);
SimdBase64.Arm.Base64.DecodeFromBase64ARM(base64, dataoutput, out bytesConsumed, out bytesWritten, false);
if (bytesWritten != lengths[i])
{
Console.WriteLine($"Error: {bytesWritten} != {lengths[i]}");
Expand Down Expand Up @@ -474,15 +545,15 @@ public unsafe void DotnetRuntimeSIMDBase64RealDataUTF8()
RunRuntimeSIMDDecodingBenchmarkUTF8(FileContent, DecodedLengths);
}

[Benchmark]
[BenchmarkCategory("default", "runtime")]
//[Benchmark]
//[BenchmarkCategory("default", "runtime")]
public unsafe void DotnetRuntimeSIMDBase64RealDataWithAllocUTF8()
{
RunRuntimeSIMDDecodingBenchmarkWithAllocUTF8(FileContent, DecodedLengths);
}

[Benchmark]
[BenchmarkCategory("default", "runtime")]
//[Benchmark]
//[BenchmarkCategory("default", "runtime")]
public unsafe void DotnetRuntimeBase64RealDataUTF16()
{
RunRuntimeDecodingBenchmarkUTF16(FileContent, DecodedLengths);
Expand All @@ -495,47 +566,77 @@ public unsafe void SSEDecodingRealDataUTF8()
RunSSEDecodingBenchmarkUTF8(FileContent, DecodedLengths);
}

[Benchmark]
[BenchmarkCategory("SSE")]
//[Benchmark]
//[BenchmarkCategory("SSE")]
public unsafe void SSEDecodingRealDataWithAllocUTF8()
{
RunSSEDecodingBenchmarkWithAllocUTF8(FileContent, DecodedLengths);
}

[Benchmark]
[BenchmarkCategory("AVX")]
public unsafe void AVX2DecodingRealDataUTF8()
{
RunAVX2DecodingBenchmarkUTF8(FileContent, DecodedLengths);
}

//[Benchmark]
//[BenchmarkCategory("AVX")]
public unsafe void AVX2DecodingRealDataWithAllocUTF8()
{
RunAVX2DecodingBenchmarkWithAllocUTF8(FileContent, DecodedLengths);
}


[Benchmark]
[BenchmarkCategory("arm64")]
public unsafe void ARMDecodingRealDataUTF8()
{
RunARMDecodingBenchmarkUTF8(FileContent, DecodedLengths);
}

[Benchmark]
[BenchmarkCategory("arm64")]
//[Benchmark]
//[BenchmarkCategory("arm64")]
public unsafe void ARMDecodingRealDataWithAllocUTF8()
{
RunARMDecodingBenchmarkWithAllocUTF8(FileContent, DecodedLengths);
}

[Benchmark]
[BenchmarkCategory("arm64")]
//[Benchmark]
//[BenchmarkCategory("arm64")]
public unsafe void ARMDecodingRealDataUTF16()
{
RunARMDecodingBenchmarkUTF16(FileContent, DecodedLengths);
}

[Benchmark]
[BenchmarkCategory("SSE")]
//[Benchmark]
//[BenchmarkCategory("SSE")]
public unsafe void SSEDecodingRealDataUTF16()
{
RunSSEDecodingBenchmarkUTF16(FileContent, DecodedLengths);
}

[Benchmark]
[BenchmarkCategory("SSE")]
//[Benchmark]
//[BenchmarkCategory("SSE")]
public unsafe void SSEDecodingRealDataWithAllocUTF16()
{
RunSSEDecodingBenchmarkWithAllocUTF16(FileContent, DecodedLengths);
}

//[Benchmark]
//[BenchmarkCategory("AVX")]
public unsafe void AVX2DecodingRealDataUTF16()
{
RunAVX2DecodingBenchmarkUTF16(FileContent, DecodedLengths);
}

//[Benchmark]
//[BenchmarkCategory("AVX")]
public unsafe void AVX2DecodingRealDataWithAllocUTF16()
{
RunAVX2DecodingBenchmarkWithAllocUTF16(FileContent, DecodedLengths);
}

}
#pragma warning disable CA1515
public class Program
Expand Down
2 changes: 0 additions & 2 deletions benchmark/benchmark.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,4 @@
</None>
</ItemGroup>



</Project>
12 changes: 7 additions & 5 deletions src/Base64.cs
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
using System;
using System.Buffers;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;


namespace SimdBase64
{
public static class Base64 {
public static class Base64
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int MaximalBinaryLengthFromBase64<T>(ReadOnlySpan<T> input)
{
return Scalar.Base64.MaximalBinaryLengthFromBase64Scalar(input);
}
public unsafe static OperationStatus DecodeFromBase64(ReadOnlySpan<byte> source, Span<byte> dest, out int bytesConsumed, out int bytesWritten, bool isUrl = false) {
public unsafe static OperationStatus DecodeFromBase64(ReadOnlySpan<byte> source, Span<byte> dest, out int bytesConsumed, out int bytesWritten, bool isUrl = false)
{

if (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)
{
Expand All @@ -38,12 +39,13 @@ public unsafe static OperationStatus DecodeFromBase64(ReadOnlySpan<byte> source,

}

public unsafe static OperationStatus DecodeFromBase64(ReadOnlySpan<char> source, Span<byte> dest, out int bytesConsumed, out int bytesWritten, bool isUrl = false) {
public unsafe static OperationStatus DecodeFromBase64(ReadOnlySpan<char> source, Span<byte> dest, out int bytesConsumed, out int bytesWritten, bool isUrl = false)
{

if (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)
{
return Arm.Base64.DecodeFromBase64ARM(source, dest, out bytesConsumed, out bytesWritten, isUrl);
}
}
// To be comleted
//if (Vector512.IsHardwareAccelerated && Avx512Vbmi.IsSupported)
//{
Expand Down
Loading