diff --git a/libs/Microsoft.MixedReality.WebRTC.Native/test/Microsoft.MixedReality.WebRTC.Native.Tests.vcxproj b/libs/Microsoft.MixedReality.WebRTC.Native/test/Microsoft.MixedReality.WebRTC.Native.Tests.vcxproj index 8d02b2341..2124ed080 100644 --- a/libs/Microsoft.MixedReality.WebRTC.Native/test/Microsoft.MixedReality.WebRTC.Native.Tests.vcxproj +++ b/libs/Microsoft.MixedReality.WebRTC.Native/test/Microsoft.MixedReality.WebRTC.Native.Tests.vcxproj @@ -61,6 +61,7 @@ + diff --git a/libs/Microsoft.MixedReality.WebRTC.Native/test/memory_tests.cpp b/libs/Microsoft.MixedReality.WebRTC.Native/test/memory_tests.cpp new file mode 100644 index 000000000..a713d8e00 --- /dev/null +++ b/libs/Microsoft.MixedReality.WebRTC.Native/test/memory_tests.cpp @@ -0,0 +1,172 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in the project root for license +// information. + +#include "pch.h" + +#include "interop/interop_api.h" + +// Test fast path of mrsMemCpyStride() when data is packed. +TEST(MemoryUtils, MemCpyStride_Fast) { + std::vector s, d; + constexpr int kWidth = 32; + constexpr int kStride = kWidth; + constexpr int kHeight = 13; + s.resize(kStride * kHeight); + d.resize(kStride * kHeight); + { + uint8_t* src = s.data(); + for (int j = 0; j < kHeight; ++j) { + for (int i = 0; i < kWidth; ++i) { + *src++ = (rand() & 0xFF); + } + } + } + { + const void* const src = s.data(); + void* const dst = d.data(); + mrsMemCpyStride(dst, kStride, src, kStride, kWidth, kHeight); + // Data is contiguous + ASSERT_EQ(0, memcmp(src, dst, kStride * kHeight)); + } +} + +// Test slow path of mrsMemCpyStride() with stride, without changing the +// packing. +TEST(MemoryUtils, MemCpyStride_Stride) { + std::vector s, d; + constexpr int kWidth = 29; + constexpr int kStride = 32; + constexpr int kHeight = 13; + s.resize(kStride * kHeight); + d.resize(kStride * kHeight); + { + uint8_t* src = s.data(); + for (int j = 0; j < kHeight; ++j) { + for (int i = 0; i < kWidth; ++i) { + *src++ = (rand() & 0xFF); + } + for (int i = kWidth; i < kStride; ++i) { + *src++ = 0xCF; + } + } + } + { + const void* const src = s.data(); + void* const dst = d.data(); + mrsMemCpyStride(dst, kStride, src, kStride, kWidth, kHeight); + } + { + const uint8_t* src = s.data(); + const uint8_t* dst = d.data(); + for (int j = 0; j < kHeight; ++j) { + // Test row + bool row_equal = true; + for (int i = 0; i < kWidth; ++i) { + row_equal = row_equal && (*src == *dst); + ++src; + ++dst; + } + ASSERT_TRUE(row_equal); + // Skip row padding + for (int i = kWidth; i < kStride; ++i) { + ++src; + ++dst; + } + } + } +} + +// Test slow path of mrsMemCpyStride() with stride, expanding the one existing +// in the source buffer. +TEST(MemoryUtils, MemCpyStride_ExpandStride) { + std::vector s, d; + constexpr int kWidth = 29; + constexpr int kSrcStride = 32; + constexpr int kDstStride = 48; + constexpr int kHeight = 13; + s.resize(kSrcStride * kHeight); + d.resize(kDstStride * kHeight); + { + uint8_t* src = s.data(); + for (int j = 0; j < kHeight; ++j) { + for (int i = 0; i < kWidth; ++i) { + *src++ = (rand() & 0xFF); + } + for (int i = kWidth; i < kSrcStride; ++i) { + *src++ = 0xCF; + } + } + } + { + const void* const src = s.data(); + void* const dst = d.data(); + mrsMemCpyStride(dst, kDstStride, src, kSrcStride, kWidth, kHeight); + } + { + const uint8_t* src = s.data(); + const uint8_t* dst = d.data(); + for (int j = 0; j < kHeight; ++j) { + // Test row + bool row_equal = true; + for (int i = 0; i < kWidth; ++i) { + row_equal = row_equal && (*src == *dst); + ++src; + ++dst; + } + ASSERT_TRUE(row_equal); + // Skip row padding + for (int i = kWidth; i < kSrcStride; ++i) { + ++src; + } + for (int i = kWidth; i < kDstStride; ++i) { + ++dst; + } + } + } +} + +// Test slow path of mrsMemCpyStride() with stride, packing the data on output. +TEST(MemoryUtils, MemCpyStride_StrideToPack) { + std::vector s, d; + constexpr int kWidth = 29; + constexpr int kSrcStride = 32; + constexpr int kDstStride = kWidth; + constexpr int kHeight = 13; + s.resize(kSrcStride * kHeight); + d.resize(kDstStride * kHeight); + { + uint8_t* src = s.data(); + for (int j = 0; j < kHeight; ++j) { + for (int i = 0; i < kWidth; ++i) { + *src++ = (rand() & 0xFF); + } + for (int i = kWidth; i < kSrcStride; ++i) { + *src++ = 0xCF; + } + } + } + { + const void* const src = s.data(); + void* const dst = d.data(); + mrsMemCpyStride(dst, kDstStride, src, kSrcStride, kWidth, kHeight); + } + { + const uint8_t* src = s.data(); + const uint8_t* dst = d.data(); + for (int j = 0; j < kHeight; ++j) { + // Test row + bool row_equal = true; + for (int i = 0; i < kWidth; ++i) { + row_equal = row_equal && (*src == *dst); + ++src; + ++dst; + } + ASSERT_TRUE(row_equal); + // Skip row padding + for (int i = kWidth; i < kSrcStride; ++i) { + ++src; + } + } + } +} diff --git a/libs/Microsoft.MixedReality.WebRTC/VideoFrame.cs b/libs/Microsoft.MixedReality.WebRTC/VideoFrame.cs index f116fe356..5a5aa6e6b 100644 --- a/libs/Microsoft.MixedReality.WebRTC/VideoFrame.cs +++ b/libs/Microsoft.MixedReality.WebRTC/VideoFrame.cs @@ -78,22 +78,25 @@ public void CopyTo(byte[] buffer) { fixed (void* ptr = buffer) { + // Destination buffer is packed and contiguous + ulong dstSizeYA = (ulong)width * height; + ulong dstSizeUV = dstSizeYA / 4; + int dstStrideYA = (int)width; + int dstStrideUV = dstStrideYA / 2; + // Note : System.Buffer.MemoryCopy() essentially does the same (without stride), but gets transpiled by IL2CPP // into the C++ corresponding to the IL instead of a single memcpy() call. This results in a large overhead, // especially in Debug config where one can lose 5-10 FPS just because of this. void* dst = ptr; - ulong sizeY = (ulong)strideY * height; - Utils.MemCpyStride(dst, strideY, (void*)dataY, strideY, (int)width, (int)height); - dst = (void*)((ulong)dst + sizeY); - ulong sizeU = (ulong)strideU * height / 2; - Utils.MemCpyStride(dst, strideU, (void*)dataU, strideU, (int)width / 2, (int)height / 2); - dst = (void*)((ulong)dst + sizeU); - ulong sizeV = (ulong)strideV * height / 2; - Utils.MemCpyStride(dst, strideV, (void*)dataV, strideV, (int)width / 2, (int)height / 2); + Utils.MemCpyStride(dst, dstStrideYA, (void*)dataY, strideY, (int)width, (int)height); + dst = (void*)((ulong)dst + dstSizeYA); + Utils.MemCpyStride(dst, dstStrideUV, (void*)dataU, strideU, (int)width / 2, (int)height / 2); + dst = (void*)((ulong)dst + dstSizeUV); + Utils.MemCpyStride(dst, dstStrideUV, (void*)dataV, strideV, (int)width / 2, (int)height / 2); if (dataA.ToPointer() != null) { - dst = (void*)((ulong)dst + sizeV); - Utils.MemCpyStride(dst, strideA, (void*)dataA, strideA, (int)width, (int)height); + dst = (void*)((ulong)dst + dstSizeUV); + Utils.MemCpyStride(dst, dstStrideYA, (void*)dataA, strideA, (int)width, (int)height); } } }