From ec87a17c8e28b1defe6d7381c1a25adee3cceb51 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Wed, 22 Nov 2023 17:27:59 +0100 Subject: [PATCH] Small refactoring --- examples/alpaka/nbody/nbody.cpp | 4 ++-- examples/cuda/nbody/nbody.cu | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/alpaka/nbody/nbody.cpp b/examples/alpaka/nbody/nbody.cpp index 1a3da2f982..1da2d0254b 100644 --- a/examples/alpaka/nbody/nbody.cpp +++ b/examples/alpaka/nbody/nbody.cpp @@ -45,7 +45,7 @@ constexpr auto runUpate = true; // run update step. Useful to disable for benchm constexpr auto elementsPerThread = xsimd::batch::size; constexpr auto threadsPerBlock = 1; constexpr auto sharedElementsPerBlock = 1; -constexpr auto aosoaLanes = xsimd::batch::size; // vectors +constexpr auto aosoaLanes = elementsPerThread; #elif ANY_GPU_ENABLED constexpr auto threadsPerBlock = 256; constexpr auto sharedElementsPerBlock = 512; @@ -144,7 +144,7 @@ LLAMA_FN_HOST_ACC_INLINE void pPInteraction(const Acc& acc, ParticleRefI& pis, P const auto distSqr = +eps2 + dist(tag::X{}) + dist(tag::Y{}) + dist(tag::Z{}); const auto distSixth = distSqr * distSqr * distSqr; const auto invDistCube - = allowRsqrt ? alpaka::math::rsqrt(acc, distSixth) : (1.0f / alpaka::math::sqrt(acc, distSixth)); + = allowRsqrt ? alpaka::math::rsqrt(acc, distSixth) : (FP{1} / alpaka::math::sqrt(acc, distSixth)); const auto sts = (pj(tag::Mass{}) * timestep) * invDistCube; pis(tag::Vel{}) += dist * sts; } diff --git a/examples/cuda/nbody/nbody.cu b/examples/cuda/nbody/nbody.cu index d9f14b9e42..3698c6f5a6 100644 --- a/examples/cuda/nbody/nbody.cu +++ b/examples/cuda/nbody/nbody.cu @@ -84,7 +84,7 @@ __device__ void pPInteraction(ParticleRefI& pi, ParticleRefJ pj) dist *= dist; const FP distSqr = eps2 + dist(tag::X{}) + dist(tag::Y{}) + dist(tag::Z{}); const FP distSixth = distSqr * distSqr * distSqr; - const FP invDistCube = allowRsqrt ? rsqrt(distSixth) : (1.0f / sqrt(distSixth)); + const FP invDistCube = allowRsqrt ? rsqrt(distSixth) : (FP{1} / sqrt(distSixth)); const FP sts = pj(tag::Mass{}) * invDistCube * +timestep; pi(tag::Vel{}) += dist * sts; }