Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test CUDA@v5 #314

Merged
merged 2 commits into from
Nov 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ oneAPIExt = ["oneAPI"]
[compat]
AMDGPU = "0.4.9"
Adapt = "3"
CUDA = "4.1.0"
CUDA = "4.1.0, 5"
ChainRulesCore = "1"
DiffEqBase = "6.122"
DocStringExtensions = "0.8, 0.9"
Expand Down
42 changes: 21 additions & 21 deletions src/ensemblegpuarray/kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ function Adapt.adapt_structure(to, ps::ParamWrapper{P, T}) where {P, T}
end

@kernel function gpu_kernel(f, du, @Const(u),
@Const(params::AbstractArray{ParamWrapper{P, T}}),
@Const(t)) where {P, T}
@Const(params::AbstractArray{ParamWrapper{P, T}}),
@Const(t)) where {P, T}
i = @index(Global, Linear)
@inbounds p = params[i].params
@inbounds tspan = params[i].data
Expand All @@ -25,8 +25,8 @@ end
end

@kernel function gpu_kernel_oop(f, du, @Const(u),
@Const(params::AbstractArray{ParamWrapper{P, T}}),
@Const(t)) where {P, T}
@Const(params::AbstractArray{ParamWrapper{P, T}}),
@Const(t)) where {P, T}
i = @index(Global, Linear)
@inbounds p = params[i].params
@inbounds tspan = params[i].data
Expand Down Expand Up @@ -59,8 +59,8 @@ end
end

@kernel function jac_kernel(f, J, @Const(u),
@Const(params::AbstractArray{ParamWrapper{P, T}}),
@Const(t)) where {P, T}
@Const(params::AbstractArray{ParamWrapper{P, T}}),
@Const(t)) where {P, T}
i = @index(Global, Linear) - 1
section = (1 + (i * size(u, 1))):((i + 1) * size(u, 1))
@inbounds p = params[i + 1].params
Expand All @@ -73,8 +73,8 @@ end
end

@kernel function jac_kernel_oop(f, J, @Const(u),
@Const(params::AbstractArray{ParamWrapper{P, T}}),
@Const(t)) where {P, T}
@Const(params::AbstractArray{ParamWrapper{P, T}}),
@Const(t)) where {P, T}
i = @index(Global, Linear) - 1
section = (1 + (i * size(u, 1))):((i + 1) * size(u, 1))

Expand Down Expand Up @@ -122,7 +122,7 @@ end
end

@kernel function continuous_condition_kernel(condition, out, @Const(u), @Const(t),
@Const(p))
@Const(p))
i = @index(Global, Linear)
@views @inbounds out[i] = condition(u[:, i], t, FakeIntegrator(u[:, i], t, p[:, i]))
end
Expand All @@ -141,8 +141,8 @@ function workgroupsize(backend, n)
end

@kernel function W_kernel(jac, W, @Const(u),
@Const(params::AbstractArray{ParamWrapper{P, T}}), @Const(gamma),
@Const(t)) where {P, T}
@Const(params::AbstractArray{ParamWrapper{P, T}}), @Const(gamma),
@Const(t)) where {P, T}
i = @index(Global, Linear)
len = size(u, 1)
_W = @inbounds @view(W[:, :, i])
Expand Down Expand Up @@ -176,9 +176,9 @@ end
end

@kernel function W_kernel_oop(jac, W, @Const(u),
@Const(params::AbstractArray{ParamWrapper{P, T}}),
@Const(gamma),
@Const(t)) where {P, T}
@Const(params::AbstractArray{ParamWrapper{P, T}}),
@Const(gamma),
@Const(t)) where {P, T}
i = @index(Global, Linear)
len = size(u, 1)

Expand Down Expand Up @@ -218,7 +218,7 @@ end
end

@kernel function Wt_kernel(f::AbstractArray{T}, W, @Const(u), @Const(p), @Const(gamma),
@Const(t)) where {T}
@Const(t)) where {T}
i = @index(Global, Linear)
len = size(u, 1)
_W = @inbounds @view(W[:, :, i])
Expand All @@ -240,7 +240,7 @@ end
end

@kernel function Wt_kernel_oop(f::AbstractArray{T}, W, @Const(u), @Const(p), @Const(gamma),
@Const(t)) where {T}
@Const(t)) where {T}
i = @index(Global, Linear)
len = size(u, 1)
_W = @inbounds @view(W[:, :, i])
Expand Down Expand Up @@ -268,7 +268,7 @@ end
end

@kernel function gpu_kernel_tgrad(f::AbstractArray{T}, du, @Const(u), @Const(p),
@Const(t)) where {T}
@Const(t)) where {T}
i = @index(Global, Linear)
@inbounds f = f[i].tgrad
if eltype(p) <: Number
Expand All @@ -279,7 +279,7 @@ end
end

@kernel function gpu_kernel_oop_tgrad(f::AbstractArray{T}, du, @Const(u), @Const(p),
@Const(t)) where {T}
@Const(t)) where {T}
i = @index(Global, Linear)
@inbounds f = f[i].tgrad
if eltype(p) <: Number
Expand Down Expand Up @@ -320,13 +320,13 @@ LinSolveGPUSplitFactorize() = LinSolveGPUSplitFactorize(0, 0)
LinearSolve.needs_concrete_A(::LinSolveGPUSplitFactorize) = true

function LinearSolve.init_cacheval(linsol::LinSolveGPUSplitFactorize, A, b, u, Pl, Pr,
maxiters::Int, abstol, reltol, verbose::Bool,
assumptions::LinearSolve.OperatorAssumptions)
maxiters::Int, abstol, reltol, verbose::Bool,
assumptions::LinearSolve.OperatorAssumptions)
LinSolveGPUSplitFactorize(linsol.len, length(u) ÷ linsol.len)
end

function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::LinSolveGPUSplitFactorize,
args...; kwargs...)
args...; kwargs...)
p = cache.cacheval
A = cache.A
b = cache.b
Expand Down
6 changes: 3 additions & 3 deletions src/ensemblegpuarray/lowerlevel_solve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ Only a subset of the common solver arguments are supported.
function vectorized_map_solve end

function vectorized_map_solve(probs, alg,
ensemblealg::Union{EnsembleArrayAlgorithm}, I,
adaptive;
kwargs...)
ensemblealg::Union{EnsembleArrayAlgorithm}, I,
adaptive;
kwargs...)

# @assert all(Base.Fix2((prob1, prob2) -> isequal(prob1.tspan, prob2.tspan),probs[1]),probs)
# u0 = reduce(hcat, Array(probs[i].u0) for i in 1:length(I))
Expand Down
8 changes: 4 additions & 4 deletions src/ensemblegpuarray/problem_generation.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
function generate_problem(prob::SciMLBase.AbstractODEProblem,
u0,
p,
jac_prototype,
colorvec)
u0,
p,
jac_prototype,
colorvec)
_f = let f = prob.f.f, kernel = DiffEqBase.isinplace(prob) ? gpu_kernel : gpu_kernel_oop
function (du, u, p, t)
version = get_backend(u)
Expand Down
60 changes: 30 additions & 30 deletions src/ensemblegpukernel/callbacks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ struct GPUDiscreteCallback{F1, F2, F3, F4, F5} <: SciMLBase.AbstractDiscreteCall
finalize::F4
save_positions::F5
function GPUDiscreteCallback(condition::F1, affect!::F2,
initialize::F3, finalize::F4,
save_positions::F5) where {F1, F2, F3, F4, F5}
initialize::F3, finalize::F4,
save_positions::F5) where {F1, F2, F3, F4, F5}
if save_positions != (false, false)
error("Callback `save_positions` are incompatible with kernel-based GPU ODE solvers due requiring static sizing. Please ensure `save_positions = (false,false)` is set in all callback definitions used with such solvers.")
end
Expand All @@ -15,9 +15,9 @@ struct GPUDiscreteCallback{F1, F2, F3, F4, F5} <: SciMLBase.AbstractDiscreteCall
end
end
function GPUDiscreteCallback(condition, affect!;
initialize = SciMLBase.INITIALIZE_DEFAULT,
finalize = SciMLBase.FINALIZE_DEFAULT,
save_positions = (false, false))
initialize = SciMLBase.INITIALIZE_DEFAULT,
finalize = SciMLBase.FINALIZE_DEFAULT,
save_positions = (false, false))
GPUDiscreteCallback(condition, affect!, initialize, finalize, save_positions)
end

Expand All @@ -42,12 +42,12 @@ struct GPUContinuousCallback{F1, F2, F3, F4, F5, F6, T, T2, T3, I, R} <:
reltol::T2
repeat_nudge::T3
function GPUContinuousCallback(condition::F1, affect!::F2, affect_neg!::F3,
initialize::F4, finalize::F5, idxs::I, rootfind,
interp_points, save_positions::F6, dtrelax::R, abstol::T,
reltol::T2,
repeat_nudge::T3) where {F1, F2, F3, F4, F5, F6, T, T2,
T3, I, R,
}
initialize::F4, finalize::F5, idxs::I, rootfind,
interp_points, save_positions::F6, dtrelax::R, abstol::T,
reltol::T2,
repeat_nudge::T3) where {F1, F2, F3, F4, F5, F6, T, T2,
T3, I, R,
}
if save_positions != (false, false)
error("Callback `save_positions` are incompatible with kernel-based GPU ODE solvers due requiring static sizing. Please ensure `save_positions = (false,false)` is set in all callback definitions used with such solvers.")
end
Expand All @@ -61,15 +61,15 @@ struct GPUContinuousCallback{F1, F2, F3, F4, F5, F6, T, T2, T3, I, R} <:
end

function GPUContinuousCallback(condition, affect!, affect_neg!;
initialize = SciMLBase.INITIALIZE_DEFAULT,
finalize = SciMLBase.FINALIZE_DEFAULT,
idxs = nothing,
rootfind = LeftRootFind,
save_positions = (false, false),
interp_points = 10,
dtrelax = 1,
abstol = 10eps(Float32), reltol = 0,
repeat_nudge = 1 // 100)
initialize = SciMLBase.INITIALIZE_DEFAULT,
finalize = SciMLBase.FINALIZE_DEFAULT,
idxs = nothing,
rootfind = LeftRootFind,
save_positions = (false, false),
interp_points = 10,
dtrelax = 1,
abstol = 10eps(Float32), reltol = 0,
repeat_nudge = 1 // 100)
GPUContinuousCallback(condition, affect!, affect_neg!, initialize, finalize,
idxs,
rootfind, interp_points,
Expand All @@ -78,15 +78,15 @@ function GPUContinuousCallback(condition, affect!, affect_neg!;
end

function GPUContinuousCallback(condition, affect!;
initialize = SciMLBase.INITIALIZE_DEFAULT,
finalize = SciMLBase.FINALIZE_DEFAULT,
idxs = nothing,
rootfind = LeftRootFind,
save_positions = (false, false),
affect_neg! = affect!,
interp_points = 10,
dtrelax = 1,
abstol = 10eps(Float32), reltol = 0, repeat_nudge = 1 // 100)
initialize = SciMLBase.INITIALIZE_DEFAULT,
finalize = SciMLBase.FINALIZE_DEFAULT,
idxs = nothing,
rootfind = LeftRootFind,
save_positions = (false, false),
affect_neg! = affect!,
interp_points = 10,
dtrelax = 1,
abstol = 10eps(Float32), reltol = 0, repeat_nudge = 1 // 100)
GPUContinuousCallback(condition, affect!, affect_neg!, initialize, finalize, idxs,
rootfind, interp_points,
save_positions,
Expand All @@ -101,7 +101,7 @@ function Base.convert(::Type{GPUContinuousCallback}, x::T) where {T <: Continuou
end

function generate_callback(callback::DiscreteCallback, I,
ensemblealg)
ensemblealg)
if ensemblealg isa EnsembleGPUArray
backend = ensemblealg.backend
cur = adapt(backend, [false for i in 1:I])
Expand Down
Loading