diff --git a/src/host/random.jl b/src/host/random.jl index d79b1a94..2d829760 100644 --- a/src/host/random.jl +++ b/src/host/random.jl @@ -31,14 +31,12 @@ function next_rand(state::NTuple{4, T}) where {T <: Unsigned} end function gpu_rand(::Type{T}, threadid, randstate::AbstractVector{NTuple{4, UInt32}}) where T - threadid = GPUArrays.threadidx(ctx) stateful_rand = next_rand(randstate[threadid]) randstate[threadid] = stateful_rand[1] return make_rand_num(T, stateful_rand[2]) end function gpu_rand(::Type{T}, threadid, randstate::AbstractVector{NTuple{4, UInt32}}) where T <: Integer - threadid = GPUArrays.threadidx(ctx) result = zero(T) if sizeof(T) >= 4 for _ in 1:sizeof(T) >> 2 @@ -86,7 +84,7 @@ end function Random.rand!(rng::RNG, A::AnyGPUArray{T}) where T <: Number @kernel function rand!(a, randstate) idx = @index(Global, Linear) - @inbounds a[idx] = gpu_rand(T, idx, randstate) + @inbounds a[idx] = gpu_rand(T, ((idx-1)%length(randstate)+1), randstate) end rand!(get_backend(A))(A, rng.state, ndrange = size(A)) A @@ -108,7 +106,7 @@ function Random.randn!(rng::RNG, A::AnyGPUArray{T}) where T <: Number end end kernel = randn!(get_backend(A)) - kernel(A, rng.states; ndrange=threads) + kernel(A, rng.state; ndrange=threads) A end diff --git a/src/host/uniformscaling.jl b/src/host/uniformscaling.jl index 7d71f617..f8f8ae5a 100644 --- a/src/host/uniformscaling.jl +++ b/src/host/uniformscaling.jl @@ -12,20 +12,16 @@ const unittriangularwrappers = ( (:UnitLowerTriangular, :LowerTriangular) ) -@kernel function kernel_generic(ctx, B, J, min_size) +@kernel function kernel_generic(B, J) lin_idx = @index(Global, Linear) - if lin_idx <= min_size - @inbounds diag_idx = diagind(B)[lin_idx] - @inbounds B[diag_idx] += J - end + @inbounds diag_idx = diagind(B)[lin_idx] + @inbounds B[diag_idx] += J end -@kernel function kernel_unittriangular(ctx, B, J, diagonal_val, min_size) +@kernel function kernel_unittriangular(B, J, diagonal_val) lin_idx = @index(Global, Linear) - if lin_idx <= min_size - @inbounds diag_idx = diagind(B)[lin_idx] - @inbounds B[diag_idx] = diagonal_val + J - end + @inbounds diag_idx = diagind(B)[lin_idx] + @inbounds B[diag_idx] = diagonal_val + J end for (t1, t2) in unittriangularwrappers @@ -34,8 +30,7 @@ for (t1, t2) in unittriangularwrappers B = similar(parent(A), typeof(oneunit(T) + J)) copyto!(B, parent(A)) min_size = minimum(size(B)) - kernel = kernel_unittriangular(get_backend(B)) - kernel(B, J, one(eltype(B)), min_size; ndrange=min_size) + kernel_unittriangular(get_backend(B))(B, J, one(eltype(B)); ndrange=min_size) return $t2(B) end @@ -43,8 +38,7 @@ for (t1, t2) in unittriangularwrappers B = similar(parent(A), typeof(J - oneunit(T))) B .= .- parent(A) min_size = minimum(size(B)) - kernel = kernel_unittriangular(get_backend(B)) - kernel(B, J, -one(eltype(B)), min_size; ndrange=min_size) + kernel_unittriangular(get_backend(B))(B, J, -one(eltype(B)); ndrange=min_size) return $t2(B) end end @@ -56,8 +50,7 @@ for t in genericwrappers B = similar(parent(A), typeof(oneunit(T) + J)) copyto!(B, parent(A)) min_size = minimum(size(B)) - kernel = kernel_generic(get_backend(B)) - kernel(B, J, min_size; ndrange=min_size) + kernel_generic(get_backend(B))(B, J; ndrange=min_size) return $t(B) end @@ -65,8 +58,7 @@ for t in genericwrappers B = similar(parent(A), typeof(J - oneunit(T))) B .= .- parent(A) min_size = minimum(size(B)) - kernel = kernel_generic(get_backend(B)) - kernel(B, J, min_size; ndrange=min_size) + kernel_generic(get_backend(B))(B, J; ndrange=min_size) return $t(B) end end @@ -77,8 +69,7 @@ function (+)(A::Hermitian{T,<:AbstractGPUMatrix}, J::UniformScaling{<:Complex}) B = similar(parent(A), typeof(oneunit(T) + J)) copyto!(B, parent(A)) min_size = minimum(size(B)) - kernel = kernel_generic(get_backend(B)) - kernel(B, J, min_size; ndrange=min_size) + kernel_generic(get_backend(B))(B, J; ndrange=min_size) return B end @@ -86,8 +77,7 @@ function (-)(J::UniformScaling{<:Complex}, A::Hermitian{T,<:AbstractGPUMatrix}) B = similar(parent(A), typeof(J - oneunit(T))) B .= .-parent(A) min_size = minimum(size(B)) - kernel = kernel_generic(get_backend(B)) - kernel(B, J, min_size; ndrange=min_size) + kernel_generic(get_backend(B))(B, J; ndrange=min_size) return B end @@ -96,8 +86,7 @@ function (+)(A::AbstractGPUMatrix{T}, J::UniformScaling) where T B = similar(A, typeof(oneunit(T) + J)) copyto!(B, A) min_size = minimum(size(B)) - kernel = kernel_generic(get_backend(B)) - kernel(B, J, min_size; ndrange=min_size) + kernel_generic(get_backend(B))(B, J; ndrange=min_size) return B end @@ -105,7 +94,6 @@ function (-)(J::UniformScaling, A::AbstractGPUMatrix{T}) where T B = similar(A, typeof(J - oneunit(T))) B .= .-A min_size = minimum(size(B)) - kernel = kernel_generic(get_backend(B)) - kernel(B, J, min_size; ndrange=min_size) + kernel_generic(get_backend(B))(B, J; ndrange=min_size) return B end diff --git a/test/testsuite.jl b/test/testsuite.jl index bc1192f3..d079f132 100644 --- a/test/testsuite.jl +++ b/test/testsuite.jl @@ -86,7 +86,6 @@ macro testsuite(name, ex) end include("testsuite/construction.jl") -#= include("testsuite/indexing.jl") include("testsuite/base.jl") #include("testsuite/vector.jl") @@ -98,7 +97,6 @@ include("testsuite/random.jl") include("testsuite/uniformscaling.jl") include("testsuite/statistics.jl") -=# """ Runs the entire GPUArrays test suite on array type `AT` """ diff --git a/test/testsuite/broadcasting.jl b/test/testsuite/broadcasting.jl index a12ec466..81b028f3 100644 --- a/test/testsuite/broadcasting.jl +++ b/test/testsuite/broadcasting.jl @@ -201,6 +201,8 @@ Base.size(A::WrapArray) = size(A.data) Adapt.adapt_structure(to, s::WrapArray) = WrapArray(Adapt.adapt(to, s.data)) # For broadcast support Broadcast.BroadcastStyle(::Type{WrapArray{T,N,P}}) where {T,N,P} = Broadcast.BroadcastStyle(P) +KernelAbstractions.get_backend(a::WA) where WA <: WrapArray = get_backend(a.data) + function unknown_wrapper(AT, eltypes) for ET in eltypes