From bd73dc43484ce918d7be547e0ac8da006d37e670 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Thu, 26 Sep 2024 15:45:05 -0700 Subject: [PATCH] temporarily enable intermediate result checks for emulated atomics --- samples/16_floatatomics/main.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/samples/16_floatatomics/main.cpp b/samples/16_floatatomics/main.cpp index 5f66f4b..574fad6 100644 --- a/samples/16_floatatomics/main.cpp +++ b/samples/16_floatatomics/main.cpp @@ -29,6 +29,7 @@ float atomic_add_f(volatile global float* addr, float val) //#pragma message("using AMD atomics") return __builtin_amdgcn_global_atomic_fadd_f32(addr, val); #else // fallback, see: https://forums.developer.nvidia.com/t/atomicadd-float-float-atomicmul-float-float/14639/7 + #if 0 //#pragma message("using emulated float atomics") float old = val; while((old=atomic_xchg(addr, atomic_xchg(addr, 0.0f)+old))!=0.0f); // Note: this emulated version cannot reliably return the previous value! @@ -37,6 +38,12 @@ float atomic_add_f(volatile global float* addr, float val) // A more reliable version would use a compare-exchange loop, though it // would be much slower. return 0.0f; + #else + float old = val; + float ret = 0.0f; + while ((old = atomic_xchg(addr, ret = atomic_xchg(addr, 0.0f) + old)) != 0.0f); + return ret; + #endif #endif } @@ -209,7 +216,7 @@ int main( // intermediate results validation if (check) { - if (emulate) { + if (false && emulate) { printf("Skipping The emulated float atomic add does not support intermediate results.\n"); } else { std::vector test(gwx);