Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests now use new JLArrays.jl package #68

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
Expand Down
377 changes: 6 additions & 371 deletions test/include/jlarray.jl
Original file line number Diff line number Diff line change
@@ -1,385 +1,20 @@
# File adapted from GPUArrays.jl test suite:
# https://raw.githubusercontent.com/JuliaGPU/GPUArrays.jl/master/test/jlarray.jl
#
# The MIT License (MIT)
# Copyright © 2016 Simon Danisch
# Copyright © 2018 JuliaGPU developers
#
# Added functions for PencilArrays tests (these seem to be defined for CuArray
# in CUDA.jl):
# - resize!(::DenseJLVector, n)
# - unsafe_wrap(::Type{JLArray}, ...)
# - rand!(::AbstractRNG, ::JLArray, ...)
import JLArrays

# ============================================================================ #
# Define a few more functions needed for PencilArrays tests
# (these seem to be defined for CuArray in CUDA.jl)
# TODO define these in JLArrays.jl

# reference implementation on the CPU
using Random: Random, AbstractRNG
using JLArrays: DenseJLVector, JLArray

# note that most of the code in this file serves to define a functional array type,
# the actual implementation of GPUArrays-interfaces is much more limited.

module JLArrays

export JLArray, jl

using GPUArrays

using Adapt


#
# Device functionality
#

const MAXTHREADS = 256


## execution

struct JLBackend <: AbstractGPUBackend end

mutable struct JLKernelContext <: AbstractKernelContext
blockdim::Int
griddim::Int
blockidx::Int
threadidx::Int

localmem_counter::Int
localmems::Vector{Vector{Array}}
end

function JLKernelContext(threads::Int, blockdim::Int)
blockcount = prod(blockdim)
lmems = [Vector{Array}() for i in 1:blockcount]
JLKernelContext(threads, blockdim, 1, 1, 0, lmems)
end

function JLKernelContext(ctx::JLKernelContext, threadidx::Int)
JLKernelContext(
ctx.blockdim,
ctx.griddim,
ctx.blockidx,
threadidx,
0,
ctx.localmems
)
end

struct Adaptor end
jlconvert(arg) = adapt(Adaptor(), arg)

# FIXME: add Ref to Adapt.jl (but make sure it doesn't cause ambiguities with CUDAnative's)
struct JlRefValue{T} <: Ref{T}
x::T
end
Base.getindex(r::JlRefValue) = r.x
Adapt.adapt_structure(to::Adaptor, r::Base.RefValue) = JlRefValue(adapt(to, r[]))

function GPUArrays.gpu_call(::JLBackend, f, args, threads::Int, blocks::Int;
name::Union{String,Nothing})
ctx = JLKernelContext(threads, blocks)
device_args = jlconvert.(args)
tasks = Array{Task}(undef, threads)
for blockidx in 1:blocks
ctx.blockidx = blockidx
for threadidx in 1:threads
thread_ctx = JLKernelContext(ctx, threadidx)
tasks[threadidx] = @async f(thread_ctx, device_args...)
# TODO: require 1.3 and use Base.Threads.@spawn for actual multithreading
# (this would require a different synchronization mechanism)
end
for t in tasks
fetch(t)
end
end
return
end


## executed on-device

# array type

struct JLDeviceArray{T, N} <: AbstractDeviceArray{T, N}
data::Array{T, N}
dims::Dims{N}

function JLDeviceArray{T,N}(data::Array{T, N}, dims::Dims{N}) where {T,N}
new(data, dims)
end
end

Base.size(x::JLDeviceArray) = x.dims

@inline Base.getindex(A::JLDeviceArray, index::Integer) = getindex(A.data, index)
@inline Base.setindex!(A::JLDeviceArray, x, index::Integer) = setindex!(A.data, x, index)

# indexing

for f in (:blockidx, :blockdim, :threadidx, :griddim)
@eval GPUArrays.$f(ctx::JLKernelContext) = ctx.$f
end

# memory

function GPUArrays.LocalMemory(ctx::JLKernelContext, ::Type{T}, ::Val{dims}, ::Val{id}) where {T, dims, id}
ctx.localmem_counter += 1
lmems = ctx.localmems[blockidx(ctx)]

# first invocation in block
data = if length(lmems) < ctx.localmem_counter
lmem = fill(zero(T), dims)
push!(lmems, lmem)
lmem
else
lmems[ctx.localmem_counter]
end

N = length(dims)
JLDeviceArray{T,N}(data, tuple(dims...))
end

# synchronization

@inline function GPUArrays.synchronize_threads(::JLKernelContext)
# All threads are getting started asynchronously, so a yield will yield to the next
# execution of the same function, which should call yield at the exact same point in the
# program, leading to a chain of yields effectively syncing the tasks (threads).
yield()
return
end


#
# Host abstractions
#

struct JLArray{T, N} <: AbstractGPUArray{T, N}
data::Array{T, N}
dims::Dims{N}

function JLArray{T,N}(data::Array{T, N}, dims::Dims{N}) where {T,N}
@assert isbitstype(T) "JLArray only supports bits types"
new(data, dims)
end
end


## constructors

# type and dimensionality specified, accepting dims as tuples of Ints
JLArray{T,N}(::UndefInitializer, dims::Dims{N}) where {T,N} =
JLArray{T,N}(Array{T, N}(undef, dims), dims)

# type and dimensionality specified, accepting dims as series of Ints
JLArray{T,N}(::UndefInitializer, dims::Integer...) where {T,N} = JLArray{T,N}(undef, dims)

# type but not dimensionality specified
JLArray{T}(::UndefInitializer, dims::Dims{N}) where {T,N} = JLArray{T,N}(undef, dims)
JLArray{T}(::UndefInitializer, dims::Integer...) where {T} =
JLArray{T}(undef, convert(Tuple{Vararg{Int}}, dims))

# empty vector constructor
JLArray{T,1}() where {T} = JLArray{T,1}(undef, 0)

Base.similar(a::JLArray{T,N}) where {T,N} = JLArray{T,N}(undef, size(a))
Base.similar(a::JLArray{T}, dims::Base.Dims{N}) where {T,N} = JLArray{T,N}(undef, dims)
Base.similar(a::JLArray, ::Type{T}, dims::Base.Dims{N}) where {T,N} = JLArray{T,N}(undef, dims)

Base.copy(a::JLArray{T,N}) where {T,N} = JLArray{T,N}(copy(a.data), size(a))


## derived types

export DenseJLArray, DenseJLVector, DenseJLMatrix, DenseJLVecOrMat,
StridedJLArray, StridedJLVector, StridedJLMatrix, StridedJLVecOrMat,
AnyJLArray, AnyJLVector, AnyJLMatrix, AnyJLVecOrMat

ContiguousSubJLArray{T,N,A<:JLArray} = Base.FastContiguousSubArray{T,N,A}

# dense arrays: stored contiguously in memory
DenseReinterpretJLArray{T,N,A<:Union{JLArray,ContiguousSubJLArray}} =
Base.ReinterpretArray{T,N,S,A} where S
DenseReshapedJLArray{T,N,A<:Union{JLArray,ContiguousSubJLArray,DenseReinterpretJLArray}} =
Base.ReshapedArray{T,N,A}
DenseSubJLArray{T,N,A<:Union{JLArray,DenseReshapedJLArray,DenseReinterpretJLArray}} =
Base.FastContiguousSubArray{T,N,A}
DenseJLArray{T,N} = Union{JLArray{T,N}, DenseSubJLArray{T,N}, DenseReshapedJLArray{T,N},
DenseReinterpretJLArray{T,N}}
DenseJLVector{T} = DenseJLArray{T,1}
DenseJLMatrix{T} = DenseJLArray{T,2}
DenseJLVecOrMat{T} = Union{DenseJLVector{T}, DenseJLMatrix{T}}

# strided arrays
StridedSubJLArray{T,N,A<:Union{JLArray,DenseReshapedJLArray,DenseReinterpretJLArray},
I<:Tuple{Vararg{Union{Base.RangeIndex, Base.ReshapedUnitRange,
Base.AbstractCartesianIndex}}}} = SubArray{T,N,A,I}
StridedJLArray{T,N} = Union{JLArray{T,N}, StridedSubJLArray{T,N}, DenseReshapedJLArray{T,N},
DenseReinterpretJLArray{T,N}}
StridedJLVector{T} = StridedJLArray{T,1}
StridedJLMatrix{T} = StridedJLArray{T,2}
StridedJLVecOrMat{T} = Union{StridedJLVector{T}, StridedJLMatrix{T}}

# anything that's (secretly) backed by a JLArray
AnyJLArray{T,N} = Union{JLArray{T,N}, WrappedArray{T,N,JLArray,JLArray{T,N}}}
AnyJLVector{T} = AnyJLArray{T,1}
AnyJLMatrix{T} = AnyJLArray{T,2}
AnyJLVecOrMat{T} = Union{AnyJLVector{T}, AnyJLMatrix{T}}


## array interface

Base.elsize(::Type{<:JLArray{T}}) where {T} = sizeof(T)

Base.size(x::JLArray) = x.dims
Base.sizeof(x::JLArray) = Base.elsize(x) * length(x)

Base.unsafe_convert(::Type{Ptr{T}}, x::JLArray{T}) where {T} =
Base.unsafe_convert(Ptr{T}, x.data)


## interop with Julia arrays

JLArray{T,N}(x::AbstractArray{<:Any,N}) where {T,N} =
JLArray{T,N}(convert(Array{T}, x), size(x))

# underspecified constructors
JLArray{T}(xs::AbstractArray{S,N}) where {T,N,S} = JLArray{T,N}(xs)
(::Type{JLArray{T,N} where T})(x::AbstractArray{S,N}) where {S,N} = JLArray{S,N}(x)
JLArray(A::AbstractArray{T,N}) where {T,N} = JLArray{T,N}(A)

# idempotency
JLArray{T,N}(xs::JLArray{T,N}) where {T,N} = xs

# adapt for the GPU
jl(xs) = adapt(JLArray, xs)
## don't convert isbits types since they are already considered GPU-compatible
Adapt.adapt_storage(::Type{JLArray}, xs::AbstractArray) =
isbits(xs) ? xs : convert(JLArray, xs)
## if an element type is specified, convert to it
Adapt.adapt_storage(::Type{<:JLArray{T}}, xs::AbstractArray) where {T} =
isbits(xs) ? xs : convert(JLArray{T}, xs)

# adapt back to the CPU
Adapt.adapt_storage(::Type{Array}, xs::JLArray) = convert(Array, xs)


## conversions

Base.convert(::Type{T}, x::T) where T <: JLArray = x


## broadcast

using Base.Broadcast: BroadcastStyle, Broadcasted

struct JLArrayStyle{N} <: AbstractGPUArrayStyle{N} end
JLArrayStyle(::Val{N}) where N = JLArrayStyle{N}()
JLArrayStyle{M}(::Val{N}) where {N,M} = JLArrayStyle{N}()

BroadcastStyle(::Type{JLArray{T,N}}) where {T,N} = JLArrayStyle{N}()

# Allocating the output container
Base.similar(bc::Broadcasted{JLArrayStyle{N}}, ::Type{T}) where {N,T} =
similar(JLArray{T}, axes(bc))
Base.similar(bc::Broadcasted{JLArrayStyle{N}}, ::Type{T}, dims) where {N,T} =
JLArray{T}(undef, dims)


## memory operations

function Base.copyto!(dest::Array{T}, d_offset::Integer,
source::DenseJLArray{T}, s_offset::Integer,
amount::Integer) where T
amount==0 && return dest
@boundscheck checkbounds(dest, d_offset)
@boundscheck checkbounds(dest, d_offset+amount-1)
@boundscheck checkbounds(source, s_offset)
@boundscheck checkbounds(source, s_offset+amount-1)
GC.@preserve dest source Base.unsafe_copyto!(pointer(dest, d_offset),
pointer(source, s_offset), amount)
return dest
end

Base.copyto!(dest::Array{T}, source::DenseJLArray{T}) where {T} =
copyto!(dest, 1, source, 1, length(source))

function Base.copyto!(dest::DenseJLArray{T}, d_offset::Integer,
source::Array{T}, s_offset::Integer,
amount::Integer) where T
amount==0 && return dest
@boundscheck checkbounds(dest, d_offset)
@boundscheck checkbounds(dest, d_offset+amount-1)
@boundscheck checkbounds(source, s_offset)
@boundscheck checkbounds(source, s_offset+amount-1)
GC.@preserve dest source Base.unsafe_copyto!(pointer(dest, d_offset),
pointer(source, s_offset), amount)
return dest
end

Base.copyto!(dest::DenseJLArray{T}, source::Array{T}) where {T} =
copyto!(dest, 1, source, 1, length(source))

function Base.copyto!(dest::DenseJLArray{T}, d_offset::Integer,
source::DenseJLArray{T}, s_offset::Integer,
amount::Integer) where T
amount==0 && return dest
@boundscheck checkbounds(dest, d_offset)
@boundscheck checkbounds(dest, d_offset+amount-1)
@boundscheck checkbounds(source, s_offset)
@boundscheck checkbounds(source, s_offset+amount-1)
GC.@preserve dest source Base.unsafe_copyto!(pointer(dest, d_offset),
pointer(source, s_offset), amount)
return dest
end

Base.copyto!(dest::DenseJLArray{T}, source::DenseJLArray{T}) where {T} =
copyto!(dest, 1, source, 1, length(source))

# Added for PencilArrays tests
Base.resize!(u::DenseJLVector, n) = (resize!(u.data, n); u)

# Added for PencilArrays tests
function Base.unsafe_wrap(::Type{JLArray}, p::Ptr, dims::Union{Integer, Dims}; kws...)
data = unsafe_wrap(Array, p, dims; kws...)
JLArray(data)
end

## random number generation

using Random

const GLOBAL_RNG = Ref{Union{Nothing,GPUArrays.RNG}}(nothing)
function GPUArrays.default_rng(::Type{<:JLArray})
if GLOBAL_RNG[] === nothing
N = MAXTHREADS
state = JLArray{NTuple{4, UInt32}}(undef, N)
rng = GPUArrays.RNG(state)
Random.seed!(rng)
GLOBAL_RNG[] = rng
end
GLOBAL_RNG[]
end

# Added for PencilArrays tests
function Random.rand!(rng::AbstractRNG, u::JLArray, ::Type{X}) where {X}
rand!(rng, u.data, X)
u
end

## GPUArrays interfaces

GPUArrays.backend(::Type{<:JLArray}) = JLBackend()

Adapt.adapt_storage(::Adaptor, x::JLArray{T,N}) where {T,N} =
JLDeviceArray{T,N}(x.data, x.dims)

function GPUArrays.mapreducedim!(f, op, R::AnyJLArray, A::Union{AbstractArray,Broadcast.Broadcasted};
init=nothing)
if init !== nothing
fill!(R, init)
end
@allowscalar Base.reducedim!(op, R.data, map(f, A))
end

end