SciML · utkarsh530 · Nov 11, 2023 · Nov 10, 2023 · Nov 11, 2023
diff --git a/Project.toml b/Project.toml
@@ -40,7 +40,7 @@ oneAPIExt = ["oneAPI"]
 [compat]
 AMDGPU = "0.4.9"
 Adapt = "3"
-CUDA = "4.1.0"
+CUDA = "4.1.0, 5"
 ChainRulesCore = "1"
 DiffEqBase = "6.122"
 DocStringExtensions = "0.8, 0.9"

diff --git a/src/ensemblegpuarray/kernels.jl b/src/ensemblegpuarray/kernels.jl
@@ -13,8 +13,8 @@ function Adapt.adapt_structure(to, ps::ParamWrapper{P, T}) where {P, T}
 end
 
 @kernel function gpu_kernel(f, du, @Const(u),
-    @Const(params::AbstractArray{ParamWrapper{P, T}}),
-    @Const(t)) where {P, T}
+        @Const(params::AbstractArray{ParamWrapper{P, T}}),
+        @Const(t)) where {P, T}
     i = @index(Global, Linear)
     @inbounds p = params[i].params
     @inbounds tspan = params[i].data
@@ -25,8 +25,8 @@ end
 end
 
 @kernel function gpu_kernel_oop(f, du, @Const(u),
-    @Const(params::AbstractArray{ParamWrapper{P, T}}),
-    @Const(t)) where {P, T}
+        @Const(params::AbstractArray{ParamWrapper{P, T}}),
+        @Const(t)) where {P, T}
     i = @index(Global, Linear)
     @inbounds p = params[i].params
     @inbounds tspan = params[i].data
@@ -59,8 +59,8 @@ end
 end
 
 @kernel function jac_kernel(f, J, @Const(u),
-    @Const(params::AbstractArray{ParamWrapper{P, T}}),
-    @Const(t)) where {P, T}
+        @Const(params::AbstractArray{ParamWrapper{P, T}}),
+        @Const(t)) where {P, T}
     i = @index(Global, Linear) - 1
     section = (1 + (i * size(u, 1))):((i + 1) * size(u, 1))
     @inbounds p = params[i + 1].params
@@ -73,8 +73,8 @@ end
 end
 
 @kernel function jac_kernel_oop(f, J, @Const(u),
-    @Const(params::AbstractArray{ParamWrapper{P, T}}),
-    @Const(t)) where {P, T}
+        @Const(params::AbstractArray{ParamWrapper{P, T}}),
+        @Const(t)) where {P, T}
     i = @index(Global, Linear) - 1
     section = (1 + (i * size(u, 1))):((i + 1) * size(u, 1))
 
@@ -122,7 +122,7 @@ end
 end
 
 @kernel function continuous_condition_kernel(condition, out, @Const(u), @Const(t),
-    @Const(p))
+        @Const(p))
     i = @index(Global, Linear)
     @views @inbounds out[i] = condition(u[:, i], t, FakeIntegrator(u[:, i], t, p[:, i]))
 end
@@ -141,8 +141,8 @@ function workgroupsize(backend, n)
 end
 
 @kernel function W_kernel(jac, W, @Const(u),
-    @Const(params::AbstractArray{ParamWrapper{P, T}}), @Const(gamma),
-    @Const(t)) where {P, T}
+        @Const(params::AbstractArray{ParamWrapper{P, T}}), @Const(gamma),
+        @Const(t)) where {P, T}
     i = @index(Global, Linear)
     len = size(u, 1)
     _W = @inbounds @view(W[:, :, i])
@@ -176,9 +176,9 @@ end
 end
 
 @kernel function W_kernel_oop(jac, W, @Const(u),
-    @Const(params::AbstractArray{ParamWrapper{P, T}}),
-    @Const(gamma),
-    @Const(t)) where {P, T}
+        @Const(params::AbstractArray{ParamWrapper{P, T}}),
+        @Const(gamma),
+        @Const(t)) where {P, T}
     i = @index(Global, Linear)
     len = size(u, 1)
 
@@ -218,7 +218,7 @@ end
 end
 
 @kernel function Wt_kernel(f::AbstractArray{T}, W, @Const(u), @Const(p), @Const(gamma),
-    @Const(t)) where {T}
+        @Const(t)) where {T}
     i = @index(Global, Linear)
     len = size(u, 1)
     _W = @inbounds @view(W[:, :, i])
@@ -240,7 +240,7 @@ end
 end
 
 @kernel function Wt_kernel_oop(f::AbstractArray{T}, W, @Const(u), @Const(p), @Const(gamma),
-    @Const(t)) where {T}
+        @Const(t)) where {T}
     i = @index(Global, Linear)
     len = size(u, 1)
     _W = @inbounds @view(W[:, :, i])
@@ -268,7 +268,7 @@ end
 end
 
 @kernel function gpu_kernel_tgrad(f::AbstractArray{T}, du, @Const(u), @Const(p),
-    @Const(t)) where {T}
+        @Const(t)) where {T}
     i = @index(Global, Linear)
     @inbounds f = f[i].tgrad
     if eltype(p) <: Number
@@ -279,7 +279,7 @@ end
 end
 
 @kernel function gpu_kernel_oop_tgrad(f::AbstractArray{T}, du, @Const(u), @Const(p),
-    @Const(t)) where {T}
+        @Const(t)) where {T}
     i = @index(Global, Linear)
     @inbounds f = f[i].tgrad
     if eltype(p) <: Number
@@ -320,13 +320,13 @@ LinSolveGPUSplitFactorize() = LinSolveGPUSplitFactorize(0, 0)
 LinearSolve.needs_concrete_A(::LinSolveGPUSplitFactorize) = true
 
 function LinearSolve.init_cacheval(linsol::LinSolveGPUSplitFactorize, A, b, u, Pl, Pr,
-    maxiters::Int, abstol, reltol, verbose::Bool,
-    assumptions::LinearSolve.OperatorAssumptions)
+        maxiters::Int, abstol, reltol, verbose::Bool,
+        assumptions::LinearSolve.OperatorAssumptions)
     LinSolveGPUSplitFactorize(linsol.len, length(u) ÷ linsol.len)
 end
 
 function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::LinSolveGPUSplitFactorize,
-    args...; kwargs...)
+        args...; kwargs...)
     p = cache.cacheval
     A = cache.A
     b = cache.b

diff --git a/src/ensemblegpuarray/lowerlevel_solve.jl b/src/ensemblegpuarray/lowerlevel_solve.jl
@@ -24,9 +24,9 @@ Only a subset of the common solver arguments are supported.
 function vectorized_map_solve end
 
 function vectorized_map_solve(probs, alg,
-    ensemblealg::Union{EnsembleArrayAlgorithm}, I,
-    adaptive;
-    kwargs...)
+        ensemblealg::Union{EnsembleArrayAlgorithm}, I,
+        adaptive;
+        kwargs...)
 
     #    @assert all(Base.Fix2((prob1, prob2) -> isequal(prob1.tspan, prob2.tspan),probs[1]),probs)
     # u0 = reduce(hcat, Array(probs[i].u0) for i in 1:length(I))

diff --git a/src/ensemblegpuarray/problem_generation.jl b/src/ensemblegpuarray/problem_generation.jl
@@ -1,8 +1,8 @@
 function generate_problem(prob::SciMLBase.AbstractODEProblem,
-    u0,
-    p,
-    jac_prototype,
-    colorvec)
+        u0,
+        p,
+        jac_prototype,
+        colorvec)
     _f = let f = prob.f.f, kernel = DiffEqBase.isinplace(prob) ? gpu_kernel : gpu_kernel_oop
         function (du, u, p, t)
             version = get_backend(u)

diff --git a/src/ensemblegpukernel/callbacks.jl b/src/ensemblegpukernel/callbacks.jl
@@ -5,8 +5,8 @@ struct GPUDiscreteCallback{F1, F2, F3, F4, F5} <: SciMLBase.AbstractDiscreteCall
     finalize::F4
     save_positions::F5
     function GPUDiscreteCallback(condition::F1, affect!::F2,
-        initialize::F3, finalize::F4,
-        save_positions::F5) where {F1, F2, F3, F4, F5}
+            initialize::F3, finalize::F4,
+            save_positions::F5) where {F1, F2, F3, F4, F5}
         if save_positions != (false, false)
             error("Callback `save_positions` are incompatible with kernel-based GPU ODE solvers due requiring static sizing. Please ensure `save_positions = (false,false)` is set in all callback definitions used with such solvers.")
         end
@@ -15,9 +15,9 @@ struct GPUDiscreteCallback{F1, F2, F3, F4, F5} <: SciMLBase.AbstractDiscreteCall
     end
 end
 function GPUDiscreteCallback(condition, affect!;
-    initialize = SciMLBase.INITIALIZE_DEFAULT,
-    finalize = SciMLBase.FINALIZE_DEFAULT,
-    save_positions = (false, false))
+        initialize = SciMLBase.INITIALIZE_DEFAULT,
+        finalize = SciMLBase.FINALIZE_DEFAULT,
+        save_positions = (false, false))
     GPUDiscreteCallback(condition, affect!, initialize, finalize, save_positions)
 end
 
@@ -42,12 +42,12 @@ struct GPUContinuousCallback{F1, F2, F3, F4, F5, F6, T, T2, T3, I, R} <:
     reltol::T2
     repeat_nudge::T3
     function GPUContinuousCallback(condition::F1, affect!::F2, affect_neg!::F3,
-        initialize::F4, finalize::F5, idxs::I, rootfind,
-        interp_points, save_positions::F6, dtrelax::R, abstol::T,
-        reltol::T2,
-        repeat_nudge::T3) where {F1, F2, F3, F4, F5, F6, T, T2,
-        T3, I, R,
-    }
+            initialize::F4, finalize::F5, idxs::I, rootfind,
+            interp_points, save_positions::F6, dtrelax::R, abstol::T,
+            reltol::T2,
+            repeat_nudge::T3) where {F1, F2, F3, F4, F5, F6, T, T2,
+            T3, I, R,
+        }
         if save_positions != (false, false)
             error("Callback `save_positions` are incompatible with kernel-based GPU ODE solvers due requiring static sizing. Please ensure `save_positions = (false,false)` is set in all callback definitions used with such solvers.")
         end
@@ -61,15 +61,15 @@ struct GPUContinuousCallback{F1, F2, F3, F4, F5, F6, T, T2, T3, I, R} <:
 end
 
 function GPUContinuousCallback(condition, affect!, affect_neg!;
-    initialize = SciMLBase.INITIALIZE_DEFAULT,
-    finalize = SciMLBase.FINALIZE_DEFAULT,
-    idxs = nothing,
-    rootfind = LeftRootFind,
-    save_positions = (false, false),
-    interp_points = 10,
-    dtrelax = 1,
-    abstol = 10eps(Float32), reltol = 0,
-    repeat_nudge = 1 // 100)
+        initialize = SciMLBase.INITIALIZE_DEFAULT,
+        finalize = SciMLBase.FINALIZE_DEFAULT,
+        idxs = nothing,
+        rootfind = LeftRootFind,
+        save_positions = (false, false),
+        interp_points = 10,
+        dtrelax = 1,
+        abstol = 10eps(Float32), reltol = 0,
+        repeat_nudge = 1 // 100)
     GPUContinuousCallback(condition, affect!, affect_neg!, initialize, finalize,
         idxs,
         rootfind, interp_points,
@@ -78,15 +78,15 @@ function GPUContinuousCallback(condition, affect!, affect_neg!;
 end
 
 function GPUContinuousCallback(condition, affect!;
-    initialize = SciMLBase.INITIALIZE_DEFAULT,
-    finalize = SciMLBase.FINALIZE_DEFAULT,
-    idxs = nothing,
-    rootfind = LeftRootFind,
-    save_positions = (false, false),
-    affect_neg! = affect!,
-    interp_points = 10,
-    dtrelax = 1,
-    abstol = 10eps(Float32), reltol = 0, repeat_nudge = 1 // 100)
+        initialize = SciMLBase.INITIALIZE_DEFAULT,
+        finalize = SciMLBase.FINALIZE_DEFAULT,
+        idxs = nothing,
+        rootfind = LeftRootFind,
+        save_positions = (false, false),
+        affect_neg! = affect!,
+        interp_points = 10,
+        dtrelax = 1,
+        abstol = 10eps(Float32), reltol = 0, repeat_nudge = 1 // 100)
     GPUContinuousCallback(condition, affect!, affect_neg!, initialize, finalize, idxs,
         rootfind, interp_points,
         save_positions,
@@ -101,7 +101,7 @@ function Base.convert(::Type{GPUContinuousCallback}, x::T) where {T <: Continuou
 end
 
 function generate_callback(callback::DiscreteCallback, I,
-    ensemblealg)
+        ensemblealg)
     if ensemblealg isa EnsembleGPUArray
         backend = ensemblealg.backend
         cur = adapt(backend, [false for i in 1:I])