Skip to content

Commit

Permalink
reapply formatter
Browse files Browse the repository at this point in the history
  • Loading branch information
ArnoStrouwen committed Feb 25, 2024
1 parent 082e7c3 commit 1d8f874
Show file tree
Hide file tree
Showing 32 changed files with 391 additions and 263 deletions.
3 changes: 2 additions & 1 deletion .JuliaFormatter.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
style = "sciml"
format_markdown = true
format_markdown = true
format_docstrings = true
8 changes: 5 additions & 3 deletions docs/pages.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

pages = ["index.md",
"getting_started.md",
"Tutorials" => Any["GPU Ensembles" => Any["tutorials/gpu_ensemble_basic.md",
"Tutorials" => Any[
"GPU Ensembles" => Any["tutorials/gpu_ensemble_basic.md",
"tutorials/parallel_callbacks.md",
"tutorials/multigpu.md",
"tutorials/lower_level_api.md",
"tutorials/weak_order_conv_sde.md"],
"Within-Method GPU" => Any["tutorials/within_method_gpu.md"]],
"Examples" => Any["GPU Ensembles" => Any["examples/sde.md",
"Examples" => Any[
"GPU Ensembles" => Any["examples/sde.md",
"examples/ad.md",
"examples/reductions.md"],
"Within-Method GPU" => Any["examples/reaction_diffusion.md",
Expand All @@ -17,5 +19,5 @@ pages = ["index.md",
"manual/ensemblegpuarray.md",
"manual/backends.md",
"manual/optimal_trajectories.md",
"manual/choosing_ensembler.md"],
"manual/choosing_ensembler.md"]
]
3 changes: 2 additions & 1 deletion docs/src/examples/sde.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ prob = SDEProblem(lorenz, multiplicative_noise, u0, tspan, p)
const pre_p = [rand(Float32, 3) for i in 1:10_000]
prob_func = (prob, i, repeat) -> remake(prob, p = pre_p[i] .* p)
monteprob = EnsembleProblem(prob, prob_func = prob_func)
sol = solve(monteprob, SOSRI(), EnsembleGPUArray(CUDA.CUDABackend()), trajectories = 10_000,
sol = solve(
monteprob, SOSRI(), EnsembleGPUArray(CUDA.CUDABackend()), trajectories = 10_000,
saveat = 1.0f0)
```
3 changes: 2 additions & 1 deletion docs/src/tutorials/gpu_ensemble_basic.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ Changing this to being GPU-parallelized is as simple as changing the ensemble me
`EnsembleGPUArray`:

```@example lorenz
sol = solve(monteprob, Tsit5(), EnsembleGPUArray(CUDA.CUDABackend()), trajectories = 10_000,
sol = solve(
monteprob, Tsit5(), EnsembleGPUArray(CUDA.CUDABackend()), trajectories = 10_000,
saveat = 1.0f0);
```

Expand Down
6 changes: 4 additions & 2 deletions docs/src/tutorials/lower_level_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,14 @@ end
## Finally use the lower API for faster solves! (Fixed time-stepping)
@time CUDA.@sync sol = DiffEqGPU.vectorized_map_solve(probs, Tsit5(), EnsembleGPUArray(0.0),
@time CUDA.@sync sol = DiffEqGPU.vectorized_map_solve(
probs, Tsit5(), EnsembleGPUArray(0.0),
batch, false, dt = 0.001f0,
save_everystep = false, dense = false)
## Adaptive time-stepping (Notice the boolean argument)
@time CUDA.@sync sol = DiffEqGPU.vectorized_map_solve(probs, Tsit5(), EnsembleGPUArray(0.0),
@time CUDA.@sync sol = DiffEqGPU.vectorized_map_solve(
probs, Tsit5(), EnsembleGPUArray(0.0),
batch, true, dt = 0.001f0,
save_everystep = false, dense = false)
```
21 changes: 11 additions & 10 deletions src/algorithms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct EnsembleCPUArray <: EnsembleArrayAlgorithm end

"""
```julia
EnsembleGPUArray(backend,cpu_offload = 0.2)
EnsembleGPUArray(backend, cpu_offload = 0.2)
```
An `EnsembleArrayAlgorithm` which utilizes the GPU kernels to parallelize each ODE solve
Expand Down Expand Up @@ -73,13 +73,14 @@ function lorenz(du, u, p, t)
du[3] = u[1] * u[2] - p[3] * u[3]
end
u0 = Float32[1.0;0.0;0.0]
tspan = (0.0f0,100.0f0)
p = [10.0f0,28.0f0,8/3f0]
prob = ODEProblem(lorenz,u0,tspan,p)
prob_func = (prob,i,repeat) -> remake(prob,p=rand(Float32,3).*p)
monteprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy=false)
@time sol = solve(monteprob,Tsit5(),EnsembleGPUArray(CUDADevice()),trajectories=10_000,saveat=1.0f0)
u0 = Float32[1.0; 0.0; 0.0]
tspan = (0.0f0, 100.0f0)
p = [10.0f0, 28.0f0, 8 / 3.0f0]
prob = ODEProblem(lorenz, u0, tspan, p)
prob_func = (prob, i, repeat) -> remake(prob, p = rand(Float32, 3) .* p)
monteprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)
@time sol = solve(monteprob, Tsit5(), EnsembleGPUArray(CUDADevice()),
trajectories = 10_000, saveat = 1.0f0)
```
"""
struct EnsembleGPUArray{Backend} <: EnsembleArrayAlgorithm
Expand All @@ -89,7 +90,7 @@ end

"""
```julia
EnsembleGPUKernel(backend,cpu_offload = 0.2)
EnsembleGPUKernel(backend, cpu_offload = 0.2)
```
A massively-parallel ensemble algorithm which generates a unique GPU kernel for the entire
Expand Down Expand Up @@ -146,7 +147,7 @@ prob_func = (prob, i, repeat) -> remake(prob, p = (@SVector rand(Float32, 3)) .*
monteprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy = false)
@time sol = solve(monteprob, GPUTsit5(), EnsembleGPUKernel(), trajectories = 10_000,
adaptive = false, dt = 0.1f0)
adaptive = false, dt = 0.1f0)
```
"""
struct EnsembleGPUKernel{Dev} <: EnsembleKernelAlgorithm
Expand Down
17 changes: 8 additions & 9 deletions src/ensemblegpuarray/lowerlevel_solve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,22 @@ Lower level API for `EnsembleArrayAlgorithm`. Avoids conversion of solution to C
```julia
vectorized_map_solve(probs, alg,
ensemblealg::Union{EnsembleArrayAlgorithm}, I,
adaptive)
ensemblealg::Union{EnsembleArrayAlgorithm}, I,
adaptive)
```
## Arguments
- `probs`: the GPU-setup problems generated by the ensemble.
- `alg`: the kernel-based differential equation solver. Most of the solvers from OrdinaryDiffEq.jl
are supported.
- `ensemblealg`: The `EnsembleGPUArray()` algorithm.
- `I`: The iterator argument. Can be set to for e.g. 1:10_000 to simulate 10,000 trajectories.
- `adaptive`: The Boolean argument for time-stepping. Use `true` to enable adaptive time-stepping.
- `probs`: the GPU-setup problems generated by the ensemble.
- `alg`: the kernel-based differential equation solver. Most of the solvers from OrdinaryDiffEq.jl
are supported.
- `ensemblealg`: The `EnsembleGPUArray()` algorithm.
- `I`: The iterator argument. Can be set to for e.g. 1:10_000 to simulate 10,000 trajectories.
- `adaptive`: The Boolean argument for time-stepping. Use `true` to enable adaptive time-stepping.
## Keyword Arguments
Only a subset of the common solver arguments are supported.
"""
function vectorized_map_solve end

Expand Down
2 changes: 1 addition & 1 deletion src/ensemblegpukernel/callbacks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ struct GPUContinuousCallback{F1, F2, F3, F4, F5, F6, T, T2, T3, I, R} <:
interp_points, save_positions::F6, dtrelax::R, abstol::T,
reltol::T2,
repeat_nudge::T3) where {F1, F2, F3, F4, F5, F6, T, T2,
T3, I, R,
T3, I, R
}
if save_positions != (false, false)
error("Callback `save_positions` are incompatible with kernel-based GPU ODE solvers due requiring static sizing. Please ensure `save_positions = (false,false)` is set in all callback definitions used with such solvers.")
Expand Down
78 changes: 47 additions & 31 deletions src/ensemblegpukernel/integrators/integrator_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ function build_adaptive_controller_cache(alg::A, ::Type{T}) where {A, T}
return beta1, beta2, qmax, qmin, gamma, qoldinit, qold
end

@inline function savevalues!(integrator::DiffEqBase.AbstractODEIntegrator{
@inline function savevalues!(
integrator::DiffEqBase.AbstractODEIntegrator{
AlgType,
IIP,
S,
T,
T
}, ts,
us,
force = false) where {AlgType <: GPUODEAlgorithm, IIP, S, T}
Expand Down Expand Up @@ -45,23 +46,25 @@ end
saved, savedexactly
end

@inline function DiffEqBase.terminate!(integrator::DiffEqBase.AbstractODEIntegrator{AlgType,
@inline function DiffEqBase.terminate!(
integrator::DiffEqBase.AbstractODEIntegrator{AlgType,
IIP, S,
T},
retcode = ReturnCode.Terminated) where {
AlgType <:
GPUODEAlgorithm,
IIP,
S,
T,
T
}
integrator.retcode = retcode
end

@inline function apply_discrete_callback!(integrator::DiffEqBase.AbstractODEIntegrator{
@inline function apply_discrete_callback!(
integrator::DiffEqBase.AbstractODEIntegrator{
AlgType,
IIP,
S, T,
S, T
},
ts, us,
callback::GPUDiscreteCallback) where {
Expand All @@ -79,10 +82,11 @@ end
integrator.u_modified, saved_in_cb
end

@inline function apply_discrete_callback!(integrator::DiffEqBase.AbstractODEIntegrator{
@inline function apply_discrete_callback!(
integrator::DiffEqBase.AbstractODEIntegrator{
AlgType,
IIP,
S, T,
S, T
},
ts, us,
callback::GPUDiscreteCallback,
Expand All @@ -93,10 +97,11 @@ end
args...)
end

@inline function apply_discrete_callback!(integrator::DiffEqBase.AbstractODEIntegrator{
@inline function apply_discrete_callback!(
integrator::DiffEqBase.AbstractODEIntegrator{
AlgType,
IIP,
S, T,
S, T
},
ts, us,
discrete_modified::Bool,
Expand All @@ -110,10 +115,11 @@ end
discrete_modified || bool, saved_in_cb || saved_in_cb2
end

@inline function apply_discrete_callback!(integrator::DiffEqBase.AbstractODEIntegrator{
@inline function apply_discrete_callback!(
integrator::DiffEqBase.AbstractODEIntegrator{
AlgType,
IIP,
S, T,
S, T
},
ts, us,
discrete_modified::Bool,
Expand All @@ -126,11 +132,12 @@ end
discrete_modified || bool, saved_in_cb || saved_in_cb2
end

@inline function interpolate(integrator::DiffEqBase.AbstractODEIntegrator{
@inline function interpolate(
integrator::DiffEqBase.AbstractODEIntegrator{
AlgType,
IIP,
S,
T,
T
},
t) where {AlgType <: GPUODEAlgorithm, IIP, S, T}
θ = (t - integrator.tprev) / integrator.dt
Expand All @@ -142,11 +149,12 @@ end
b7θ * integrator.k7)
end

@inline function _change_t_via_interpolation!(integrator::DiffEqBase.AbstractODEIntegrator{
@inline function _change_t_via_interpolation!(
integrator::DiffEqBase.AbstractODEIntegrator{
AlgType,
IIP,
S,
T,
T
},
t,
modify_save_endpoint::Type{Val{T1}}) where {
Expand All @@ -155,7 +163,7 @@ end
IIP,
S,
T,
T1,
T1
}
# Can get rid of an allocation here with a function
# get_tmp_arr(integrator.cache) which gives a pointer to some
Expand All @@ -169,11 +177,12 @@ end
#integrator.dt = integrator.t - integrator.tprev
end
end
@inline function DiffEqBase.change_t_via_interpolation!(integrator::DiffEqBase.AbstractODEIntegrator{
@inline function DiffEqBase.change_t_via_interpolation!(
integrator::DiffEqBase.AbstractODEIntegrator{
AlgType,
IIP,
S,
T,
T
},
t,
modify_save_endpoint::Type{Val{T1}} = Val{
Expand All @@ -184,12 +193,13 @@ end
IIP,
S,
T,
T1,
T1
}
_change_t_via_interpolation!(integrator, t, modify_save_endpoint)
end

@inline function apply_callback!(integrator::DiffEqBase.AbstractODEIntegrator{AlgType, IIP,
@inline function apply_callback!(
integrator::DiffEqBase.AbstractODEIntegrator{AlgType, IIP,
S, T},
callback::GPUContinuousCallback,
cb_time, prev_sign, event_idx, ts,
Expand Down Expand Up @@ -219,7 +229,8 @@ end
true, saved_in_cb
end

@inline function handle_callbacks!(integrator::DiffEqBase.AbstractODEIntegrator{AlgType,
@inline function handle_callbacks!(
integrator::DiffEqBase.AbstractODEIntegrator{AlgType,
IIP, S, T},
ts, us) where {AlgType <: GPUODEAlgorithm, IIP, S, T}
discrete_callbacks = integrator.callback.discrete_callbacks
Expand All @@ -232,7 +243,8 @@ end
if !(continuous_callbacks isa Tuple{})
event_occurred = false

time, upcrossing, event_occurred, event_idx, idx, counter = DiffEqBase.find_first_continuous_callback(integrator,
time, upcrossing, event_occurred, event_idx, idx, counter = DiffEqBase.find_first_continuous_callback(
integrator,
continuous_callbacks...)

if event_occurred
Expand All @@ -256,16 +268,18 @@ end
return false, saved_in_cb
end

@inline function DiffEqBase.find_callback_time(integrator::DiffEqBase.AbstractODEIntegrator{
@inline function DiffEqBase.find_callback_time(
integrator::DiffEqBase.AbstractODEIntegrator{
AlgType,
IIP,
S,
T,
T
},
callback::DiffEqGPU.GPUContinuousCallback,
counter) where {AlgType <: GPUODEAlgorithm,
IIP, S, T}
event_occurred, interp_index, prev_sign, prev_sign_index, event_idx = DiffEqBase.determine_event_occurance(integrator,
event_occurred, interp_index, prev_sign, prev_sign_index, event_idx = DiffEqBase.determine_event_occurance(
integrator,
callback,
counter)

Expand Down Expand Up @@ -321,15 +335,16 @@ end
GPUODEAlgorithm,
IIP,
S,
T,
T
}
return nothing
end

@inline function DiffEqBase.get_condition(integrator::DiffEqBase.AbstractODEIntegrator{
@inline function DiffEqBase.get_condition(
integrator::DiffEqBase.AbstractODEIntegrator{
AlgType,
IIP,
S, T,
S, T
},
callback,
abst) where {AlgType <: GPUODEAlgorithm, IIP, S, T
Expand All @@ -345,11 +360,12 @@ end
end

# interp_points = 0 or equivalently nothing
@inline function DiffEqBase.determine_event_occurance(integrator::DiffEqBase.AbstractODEIntegrator{
@inline function DiffEqBase.determine_event_occurance(
integrator::DiffEqBase.AbstractODEIntegrator{
AlgType,
IIP,
S,
T,
T
},
callback::DiffEqGPU.GPUContinuousCallback,
counter) where {
Expand Down
Loading

0 comments on commit 1d8f874

Please sign in to comment.