From c23ab7fa9ff347f9a585423ba22d8bfe88a2397b Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 9 Jan 2025 13:49:00 +0100 Subject: [PATCH] Try fast linear indexes for KA --- src/CUDAKernels.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/CUDAKernels.jl b/src/CUDAKernels.jl index 23db958933..5abae875ae 100644 --- a/src/CUDAKernels.jl +++ b/src/CUDAKernels.jl @@ -145,9 +145,10 @@ end end @device_override @inline function KA.__index_Global_Linear(ctx) - I = @inbounds KA.expand(KA.__iterspace(ctx), blockIdx().x, threadIdx().x) + # I = @inbounds KA.expand(KA.__iterspace(ctx), blockIdx().x, threadIdx().x) + return (blockIdx().x-1) * blockDim().x + threadIdx().x # TODO: This is unfortunate, can we get the linear index cheaper - @inbounds LinearIndices(KA.__ndrange(ctx))[I] + # @inbounds LinearIndices(KA.__ndrange(ctx))[I] end @device_override @inline function KA.__index_Local_Cartesian(ctx)