diff --git a/examples/alpaka/nbody/nbody.cpp b/examples/alpaka/nbody/nbody.cpp index 1a3da2f982..1da2d0254b 100644 --- a/examples/alpaka/nbody/nbody.cpp +++ b/examples/alpaka/nbody/nbody.cpp @@ -45,7 +45,7 @@ constexpr auto runUpate = true; // run update step. Useful to disable for benchm constexpr auto elementsPerThread = xsimd::batch<float>::size; constexpr auto threadsPerBlock = 1; constexpr auto sharedElementsPerBlock = 1; -constexpr auto aosoaLanes = xsimd::batch<float>::size; // vectors +constexpr auto aosoaLanes = elementsPerThread; #elif ANY_GPU_ENABLED constexpr auto threadsPerBlock = 256; constexpr auto sharedElementsPerBlock = 512; @@ -144,7 +144,7 @@ LLAMA_FN_HOST_ACC_INLINE void pPInteraction(const Acc& acc, ParticleRefI& pis, P const auto distSqr = +eps2 + dist(tag::X{}) + dist(tag::Y{}) + dist(tag::Z{}); const auto distSixth = distSqr * distSqr * distSqr; const auto invDistCube - = allowRsqrt ? alpaka::math::rsqrt(acc, distSixth) : (1.0f / alpaka::math::sqrt(acc, distSixth)); + = allowRsqrt ? alpaka::math::rsqrt(acc, distSixth) : (FP{1} / alpaka::math::sqrt(acc, distSixth)); const auto sts = (pj(tag::Mass{}) * timestep) * invDistCube; pis(tag::Vel{}) += dist * sts; } diff --git a/examples/cuda/nbody/nbody.cu b/examples/cuda/nbody/nbody.cu index d9f14b9e42..3698c6f5a6 100644 --- a/examples/cuda/nbody/nbody.cu +++ b/examples/cuda/nbody/nbody.cu @@ -84,7 +84,7 @@ __device__ void pPInteraction(ParticleRefI& pi, ParticleRefJ pj) dist *= dist; const FP distSqr = eps2 + dist(tag::X{}) + dist(tag::Y{}) + dist(tag::Z{}); const FP distSixth = distSqr * distSqr * distSqr; - const FP invDistCube = allowRsqrt ? rsqrt(distSixth) : (1.0f / sqrt(distSixth)); + const FP invDistCube = allowRsqrt ? rsqrt(distSixth) : (FP{1} / sqrt(distSixth)); const FP sts = pj(tag::Mass{}) * invDistCube * +timestep; pi(tag::Vel{}) += dist * sts; }