Skip to content

Commit

Permalink
Adding customized min/max implementation using avx/vectors
Browse files Browse the repository at this point in the history
  • Loading branch information
Matias Bjarland committed Nov 7, 2024
1 parent 2803c34 commit 749baef
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 11 deletions.
139 changes: 139 additions & 0 deletions src/SharpGLTF.Core/Schema2/VectorMinMax.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
using System;
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

namespace SharpGLTF.Schema2
{
/// <summary>
/// Somewhat optimized version of finding min/max values in a vector of floats. Please note some effort
/// has been made to test a multi threaded version of this as well but it was not faster than this implementation
/// for the data sets it was tested against. If anybody feels so inclined, please feel free to try and improve
/// this further.
/// </summary>
public static class VectorMinMax
{
public static (float[] min, float[] max) FindMinMax(ReadOnlySpan<float> data, int dimensions) {
if (data.Length % dimensions != 0)
throw new ArgumentException($"Data length must be divisible by {dimensions}");

var min = new float[dimensions];
var max = new float[dimensions];
Array.Fill(min, float.MaxValue);
Array.Fill(max, float.MinValue);

// Just use SIMD without parallelization for each individual call
ProcessSIMD(data, dimensions, min, max);

return (min, max);
}

// ReSharper disable once InconsistentNaming
private static unsafe void ProcessSIMD(ReadOnlySpan<float> data, int dimensions, float[] min, float[] max) {
fixed (float* ptr = data) {
if (Avx2.IsSupported && data.Length >= dimensions * 8) {
// intel processors, 8 floats = 256 bits
ProcessWithAVX(ptr, data.Length, dimensions, min, max);
} else if (Vector.IsHardwareAccelerated && data.Length >= dimensions * Vector<float>.Count) {
// on arm / apple silicon etc, Vector<float>.Count usually == 4. 4 floats = 128 bits
ProcessWithVector(ptr, data.Length, dimensions, min, max);
} else {
// and otherwise fall back to for loops and scalar operations, comparing one float at a time
ProcessScalar(ptr, data.Length, dimensions, min, max);
}
}
}

// ReSharper disable once InconsistentNaming
private static unsafe void ProcessWithAVX(float* ptr, int length, int dimensions, float[] min, float[] max) {
var minVecs = new Vector256<float>[dimensions];
var maxVecs = new Vector256<float>[dimensions];

for (int d = 0; d < dimensions; d++) {
minVecs[d] = Vector256.Create(float.MaxValue);
maxVecs[d] = Vector256.Create(float.MinValue);
}

int i = 0;
int vectorizedLength = length - (length % (dimensions * 8));

for (; i < vectorizedLength; i += dimensions * 8) {
for (int d = 0; d < dimensions; d++) {
var vec = Avx.LoadVector256(ptr + i + d * 8);
minVecs[d] = Avx.Min(minVecs[d], vec);
maxVecs[d] = Avx.Max(maxVecs[d], vec);
}
}

var temp = stackalloc float[8];
for (int d = 0; d < dimensions; d++) {
Avx.Store(temp, minVecs[d]);
for (int j = 0; j < 8; j++) {
min[d] = Math.Min(min[d], temp[j]);
}

Avx.Store(temp, maxVecs[d]);
for (int j = 0; j < 8; j++) {
max[d] = Math.Max(max[d], temp[j]);
}
}

ProcessRemainingElements(ptr, i, length, dimensions, min, max);
}

private static unsafe void ProcessWithVector(float* ptr, int length, int dimensions, float[] min, float[] max) {
var minVecs = new Vector<float>[dimensions];
var maxVecs = new Vector<float>[dimensions];
int vectorSize = Vector<float>.Count;

for (int d = 0; d < dimensions; d++) {
minVecs[d] = new Vector<float>(float.MaxValue);
maxVecs[d] = new Vector<float>(float.MinValue);
}

int i = 0;
int vectorizedLength = length - (length % (dimensions * vectorSize));

// Main vectorized loop
for (; i < vectorizedLength; i += dimensions * vectorSize) {
for (int d = 0; d < dimensions; d++) {
var span = new ReadOnlySpan<float>(ptr + i + d * vectorSize, vectorSize);
var vec = new Vector<float>(span);
minVecs[d] = Vector.Min(minVecs[d], vec);
maxVecs[d] = Vector.Max(maxVecs[d], vec);
}
}

// Reduce vectors to scalar values
for (int d = 0; d < dimensions; d++) {
min[d] = float.MaxValue;
max[d] = float.MinValue;

for (int j = 0; j < vectorSize; j++) {
min[d] = Math.Min(min[d], minVecs[d][j]);
max[d] = Math.Max(max[d], maxVecs[d][j]);
}
}

ProcessRemainingElements(ptr, i, length, dimensions, min, max);
}

private static unsafe void ProcessScalar(float* ptr, int length, int dimensions, float[] min, float[] max) {
for (int i = 0; i < length; i += dimensions) {
for (int d = 0; d < dimensions; d++) {
min[d] = Math.Min(min[d], ptr[i + d]);
max[d] = Math.Max(max[d], ptr[i + d]);
}
}
}

private static unsafe void ProcessRemainingElements(float* ptr, int start, int length, int dimensions, float[] min, float[] max) {
for (int i = start; i < length; i += dimensions) {
for (int d = 0; d < dimensions; d++) {
min[d] = Math.Min(min[d], ptr[i + d]);
max[d] = Math.Max(max[d], ptr[i + d]);
}
}
}
}
}
47 changes: 36 additions & 11 deletions src/SharpGLTF.Core/Schema2/gltf.Accessors.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
using System.Collections.Generic;
using System.Linq;
using System.Numerics;

using System.Runtime.InteropServices;
using SharpGLTF.Memory;

using VALIDATIONCTX = SharpGLTF.Validation.ValidationContext;
Expand Down Expand Up @@ -149,7 +149,32 @@ protected override IEnumerable<ExtraProperties> GetLogicalChildren()
return base.GetLogicalChildren().ConcatElements(_sparse);
}

public void UpdateBounds()
public void UpdateBounds()
{
this._min.Clear();
this._max.Clear();

if (this.Count == 0) return;

// With the current limitations of the serializer, we can only handle floating point values.
if (this.Encoding != EncodingType.FLOAT) return;

// https://github.com/KhronosGroup/glTF-Validator/issues/79

var dimensions = this.Dimensions.DimCount();

// Interpret SourceBufferView.Content as a Span<float> without copying
var floatSpan = MemoryMarshal.Cast<byte, float>(this.SourceBufferView.Content);
(float[] min, float[] max) = VectorMinMax.FindMinMax(floatSpan, dimensions: dimensions);


for (var i = 0; i < min.Length; i++) {
_min.Add(min[i]);
_max.Add(max[i]);
}
}

public void UpdateBoundsOld()
{
this._min.Clear();
this._max.Clear();
Expand All @@ -169,18 +194,18 @@ public void UpdateBounds()
this._max.Add(double.MinValue);
}

var array = new MultiArray(this.SourceBufferView.Content, this.ByteOffset, this.Count, this.SourceBufferView.ByteStride, dimensions, this.Encoding, false);
// Interpret SourceBufferView.Content as a Span<float> without copying
var floatSpan = MemoryMarshal.Cast<byte, float>(this.SourceBufferView.Content);

var current = new float[dimensions];

for (int i = 0; i < array.Count; ++i)
// Iterate over the span directly, assuming the data is in the expected float format
for (int i = 0; i < this.Count; i += dimensions)
{
array.CopyItemTo(i, current);

for (int j = 0; j < current.Length; ++j)
for (int j = 0; j < dimensions; ++j)
{
this._min[j] = Math.Min(this._min[j], current[j]);
this._max[j] = Math.Max(this._max[j], current[j]);
// Calculate the index based on the stride and dimensions
var value = floatSpan[i * dimensions + j];
this._min[j] = Math.Min(this._min[j], value);
this._max[j] = Math.Max(this._max[j], value);
}
}
}
Expand Down

0 comments on commit 749baef

Please sign in to comment.