Skip to content

Commit

Permalink
Shuffle scalar fallbacks added
Browse files Browse the repository at this point in the history
  • Loading branch information
macaba committed Sep 21, 2024
1 parent d366d5b commit 9b12ce2
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 103 deletions.
6 changes: 3 additions & 3 deletions source/TS.NET.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@

DefaultConfig.Instance.WithOptions(ConfigOptions.JoinSummary);
//_ = BenchmarkRunner.Run(typeof(Program).Assembly);
//_ = BenchmarkRunner.Run<ShuffleI8Benchmark>();
_ = BenchmarkRunner.Run<ShuffleI8Benchmark>();
//_ = BenchmarkRunner.Run<RisingEdgeTriggerBenchmark>();
//_ = BenchmarkRunner.Run<FallingEdgeTriggerBenchmark>();
//_ = BenchmarkRunner.Run<AnyEdgeTriggerBenchmark>();
//_ = BenchmarkRunner.Run<PipelineBenchmark>();
_ = BenchmarkRunner.Run<BoxcarAverageI8Benchmark>();
//_ = BenchmarkRunner.Run<BoxcarAverageI8Benchmark>();
//_ = BenchmarkRunner.Run<SumU8toI32Benchmark>();
_ = BenchmarkRunner.Run<DecimationI8Benchmark>();
//_ = BenchmarkRunner.Run<DecimationI8Benchmark>();
Console.ReadKey();
16 changes: 16 additions & 0 deletions source/TS.NET.Benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
## ShuffleI8

Scalar processing

| Method | Mean | Error | StdDev | Allocated |
|----------------------------------- |---------:|--------:|--------:|----------:|
| 'Four channel shuffle (125 x 8MS)' | 226.7 ms | 0.37 ms | 0.31 ms | 133 B |
| 'Two channel shuffle (125 x 8MS)' | 238.2 ms | 0.12 ms | 0.10 ms | 21 B |

AVX2 processing

| Method | Mean | Error | StdDev | Allocated |
|------------------------------------------------ |---------:|---------:|---------:|----------:|
| 'Four channel shuffle (125 x 8MS)' | 34.85 ms | 0.052 ms | 0.047 ms | 7 B |
| 'Two channel shuffle (125 x 8MS)' | 37.77 ms | 0.099 ms | 0.092 ms | 29 B |

## RisingEdgeTriggerI8

Scalar processing
Expand Down
9 changes: 5 additions & 4 deletions source/TS.NET.Benchmarks/ShuffleI8Benchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@ public class ShuffleI8Benchmark
private const int byteBufferSize = 8000000;
private readonly Memory<sbyte> input = new sbyte[byteBufferSize];
private readonly Memory<sbyte> output = new sbyte[byteBufferSize];
private ShuffleI8 shuffle = new ShuffleI8(false);

[GlobalSetup]
public void Setup()
{
Waveforms.FourChannelCountSignedByte(input.Span);
}

[Benchmark(Description = "Four channel shuffle [production] (125 x 8MS)")]
[Benchmark(Description = "Four channel shuffle (125 x 8MS)")]
public void FourChannels()
{
for (int i = 0; i < 125; i++)
ShuffleI8.FourChannels(input.Span, output.Span);
shuffle.FourChannels(input.Span, output.Span);
}

//[Benchmark(Description = "Four channel shuffle [run length 1, baseline] (125 x 8MS)")]
Expand Down Expand Up @@ -89,11 +90,11 @@ public void FourChannels()
// Shuffle.FourChannelsRunLength32NoSimd(input.Span, output.Span);
//}

[Benchmark(Description = "Two channel shuffle [production] (125 x 8MS)")]
[Benchmark(Description = "Two channel shuffle (125 x 8MS)")]
public void TwoChannels()
{
for (int i = 0; i < 125; i++)
ShuffleI8.TwoChannels(input.Span, output.Span);
shuffle.TwoChannels(input.Span, output.Span);
}

//[Benchmark(Description = "Two channel shuffle [run length 1,variant A] (125 x 8MS)")]
Expand Down
19 changes: 12 additions & 7 deletions source/TS.NET.Engine/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using Microsoft.Extensions.Logging;
using NReco.Logging.File;
using System.CommandLine;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
using TS.NET;
Expand Down Expand Up @@ -71,18 +72,22 @@ static void Start(int deviceIndex, string configurationFilePath)
});
var logger = loggerFactory.CreateLogger("TS.NET.Engine");

// Validation of CPU architecture
if (!Avx2.IsSupported)
if (RuntimeInformation.ProcessArchitecture == Architecture.X86 || RuntimeInformation.ProcessArchitecture == Architecture.X64)
{
if (AdvSimd.Arm64.IsSupported)
if (!Avx2.IsSupported)
{
logger?.LogCritical("AArch64 not yet supported.");
return;
logger?.LogWarning("x86/x64 CPU without AVX2. CPU load will be high.");
}
}
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
{
if (!AdvSimd.Arm64.IsSupported)
{
logger?.LogWarning("AArch64 CPU without Neon. CPU load will be high.");
}
else
{
logger?.LogCritical("CPU does not support AVX2.");
return;
logger?.LogWarning("AArch64 CPU with Neon. Neon hot paths not implemented. CPU load will be high.");
}
}

Expand Down
49 changes: 25 additions & 24 deletions source/TS.NET.Engine/Tasks/ProcessingThread.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,15 @@ private static void Loop(
// Shuffle buffers. Only needed for 2/4 channel modes.
Span<sbyte> shuffleBuffer = new sbyte[ThunderscopeMemory.Length];
// --2 channel buffers
int blockLength_2 = (int)ThunderscopeMemory.Length / 2;
Span<sbyte> postShuffleCh1_2 = shuffleBuffer.Slice(0, blockLength_2);
Span<sbyte> postShuffleCh2_2 = shuffleBuffer.Slice(blockLength_2, blockLength_2);
int blockLength_2Ch = (int)ThunderscopeMemory.Length / 2;
Span<sbyte> shuffleBuffer2Ch_1 = shuffleBuffer.Slice(0, blockLength_2Ch);
Span<sbyte> shuffleBuffer2Ch_2 = shuffleBuffer.Slice(blockLength_2Ch, blockLength_2Ch);
// --4 channel buffers
int blockLength_4 = (int)ThunderscopeMemory.Length / 4;
Span<sbyte> postShuffleCh1_4 = shuffleBuffer.Slice(0, blockLength_4);
Span<sbyte> postShuffleCh2_4 = shuffleBuffer.Slice(blockLength_4, blockLength_4);
Span<sbyte> postShuffleCh3_4 = shuffleBuffer.Slice(blockLength_4 * 2, blockLength_4);
Span<sbyte> postShuffleCh4_4 = shuffleBuffer.Slice(blockLength_4 * 3, blockLength_4);
int blockLength_4Ch = (int)ThunderscopeMemory.Length / 4;
Span<sbyte> shuffleBuffer4Ch_1 = shuffleBuffer.Slice(0, blockLength_4Ch);
Span<sbyte> shuffleBuffer4Ch_2 = shuffleBuffer.Slice(blockLength_4Ch, blockLength_4Ch);
Span<sbyte> shuffleBuffer4Ch_3 = shuffleBuffer.Slice(blockLength_4Ch * 2, blockLength_4Ch);
Span<sbyte> shuffleBuffer4Ch_4 = shuffleBuffer.Slice(blockLength_4Ch * 3, blockLength_4Ch);
Span<uint> captureEndIndices = new uint[ThunderscopeMemory.Length / 1000]; // 1000 samples is the minimum window width

// Periodic debug display variables
Expand Down Expand Up @@ -140,6 +140,7 @@ private static void Loop(

AdcChannelMode cachedAdcChannelMode = AdcChannelMode.Quad;
IEdgeTriggerI8 edgeTriggerI8 = new RisingEdgeTriggerI8();
ShuffleI8 shuffle = new ShuffleI8();
bool runMode = true;
bool forceTriggerLatch = false; // "Latch" because it will reset state back to false. If the force is invoked and a trigger happens anyway, it will be reset (effectively ignoring it and only updating the bridge once).
bool singleTriggerLatch = false; // "Latch" because it will reset state back to false. When reset, runTrigger will be set to false.
Expand Down Expand Up @@ -364,13 +365,13 @@ private static void Loop(
break;
case AdcChannelMode.Dual:
// Shuffle
ShuffleI8.TwoChannels(input: inputDataDto.Memory.SpanI8, output: shuffleBuffer);
shuffle.TwoChannels(input: inputDataDto.Memory.SpanI8, output: shuffleBuffer);
// Finished with the memory, return it
inputChannel.Write(inputDataDto.Memory);
// Write to circular buffer
circularBuffer1.Write(postShuffleCh1_2);
circularBuffer2.Write(postShuffleCh2_2);
streamSampleCounter += postShuffleCh1_2.Length;
circularBuffer1.Write(shuffleBuffer2Ch_1);
circularBuffer2.Write(shuffleBuffer2Ch_2);
streamSampleCounter += shuffleBuffer2Ch_1.Length;
// Trigger
if (runMode)
{
Expand All @@ -381,9 +382,9 @@ private static void Loop(
case TriggerMode.Auto:
if (hardwareConfig.IsTriggerChannelAnEnabledChannel(processingConfig.TriggerChannel))
{
var triggerChannelBuffer = postShuffleCh2_2;
var triggerChannelBuffer = shuffleBuffer2Ch_2;
if (hardwareConfig.DualChannelModeIsTriggerChannelInFirstPosition(processingConfig.TriggerChannel))
triggerChannelBuffer = postShuffleCh1_2;
triggerChannelBuffer = shuffleBuffer2Ch_1;

uint captureEndCount = 0;
edgeTriggerI8.Process(input: triggerChannelBuffer, captureEndIndices: captureEndIndices, out captureEndCount);
Expand Down Expand Up @@ -435,15 +436,15 @@ private static void Loop(
break;
case AdcChannelMode.Quad:
// Shuffle
ShuffleI8.FourChannels(input: inputDataDto.Memory.SpanI8, output: shuffleBuffer);
shuffle.FourChannels(input: inputDataDto.Memory.SpanI8, output: shuffleBuffer);
// Finished with the memory, return it
inputChannel.Write(inputDataDto.Memory);
// Write to circular buffer
circularBuffer1.Write(postShuffleCh1_4);
circularBuffer2.Write(postShuffleCh2_4);
circularBuffer3.Write(postShuffleCh3_4);
circularBuffer4.Write(postShuffleCh4_4);
streamSampleCounter += postShuffleCh1_4.Length;
circularBuffer1.Write(shuffleBuffer4Ch_1);
circularBuffer2.Write(shuffleBuffer4Ch_2);
circularBuffer3.Write(shuffleBuffer4Ch_3);
circularBuffer4.Write(shuffleBuffer4Ch_4);
streamSampleCounter += shuffleBuffer4Ch_1.Length;
// Trigger
if (runMode)
{
Expand All @@ -456,10 +457,10 @@ private static void Loop(
{
var triggerChannelBuffer = processingConfig.TriggerChannel switch
{
TriggerChannel.Channel1 => postShuffleCh1_4,
TriggerChannel.Channel2 => postShuffleCh2_4,
TriggerChannel.Channel3 => postShuffleCh3_4,
TriggerChannel.Channel4 => postShuffleCh4_4,
TriggerChannel.Channel1 => shuffleBuffer4Ch_1,
TriggerChannel.Channel2 => shuffleBuffer4Ch_2,
TriggerChannel.Channel3 => shuffleBuffer4Ch_3,
TriggerChannel.Channel4 => shuffleBuffer4Ch_4,
_ => throw new ArgumentException("Invalid TriggerChannel value")
};

Expand Down
19 changes: 14 additions & 5 deletions source/TS.NET.Tests/ShuffleI8Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,17 @@ namespace TS.NET.Tests
{
public class ShuffleI8Tests
{
const bool forceScalar = false;

[Fact]
public void ShuffleI8_FourChannels_Samples64()
{
const int length = 64;
ReadOnlySpan<sbyte> input = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4];
Span<sbyte> output = new sbyte[length];

ShuffleI8.FourChannels(input, output);
var shuffle = new ShuffleI8(forceScalar);
shuffle.FourChannels(input, output);

Span<sbyte> expectedOutput = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4];

Expand All @@ -36,7 +39,8 @@ public void ShuffleI8_FourChannels_Samples128()
}
Span<sbyte> output = new sbyte[length];

ShuffleI8.FourChannels(input, output);
var shuffle = new ShuffleI8(forceScalar);
shuffle.FourChannels(input, output);

Span<sbyte> expectedOutput = new sbyte[length];
var runLength = length / 4;
Expand Down Expand Up @@ -65,7 +69,8 @@ public void ShuffleI8_FourChannels_Samples8388608()
}
Span<sbyte> output = new sbyte[length];

ShuffleI8.FourChannels(input, output);
var shuffle = new ShuffleI8(forceScalar);
shuffle.FourChannels(input, output);

Span<sbyte> expectedOutput = new sbyte[length];
var runLength = length / 4;
Expand Down Expand Up @@ -94,6 +99,7 @@ public void ShuffleI8_FourChannels_RunLength1_VariantA_Samples128()
}
Span<sbyte> output = new sbyte[length];

var shuffle = new ShuffleI8(forceScalar);
ShuffleI8.FourChannelsRunLength1VariantA(input, output);

Span<sbyte> expectedOutput = new sbyte[length];
Expand Down Expand Up @@ -247,6 +253,7 @@ public void ShuffleI8_FourChannels_RunLength32_Samples1024()
i += 32;
}
Span<sbyte> output = new sbyte[length];
var shuffle = new ShuffleI8(forceScalar);
ShuffleI8.FourChannelsRunLength32(input, output);

for (int i = 0; i < 256; i++)
Expand Down Expand Up @@ -279,7 +286,8 @@ public void ShuffleI8_TwoChannels_Samples64()
}
Span<sbyte> output = new sbyte[length];

ShuffleI8.TwoChannels(input, output);
var shuffle = new ShuffleI8(forceScalar);
shuffle.TwoChannels(input, output);

Span<sbyte> expectedOutput = new sbyte[length];
var runLength = length / 2;
Expand All @@ -304,7 +312,8 @@ public void ShuffleI8_TwoChannels_Samples8388608()
}
Span<sbyte> output = new sbyte[length];

ShuffleI8.TwoChannels(input, output);
var shuffle = new ShuffleI8(forceScalar);
shuffle.TwoChannels(input, output);

Span<sbyte> expectedOutput = new sbyte[length];
var runLength = length / 2;
Expand Down
Loading

0 comments on commit 9b12ce2

Please sign in to comment.