Closed
Description
Some quick tests show that when using broadcast ArrayPartition
s do quite well:
using RecursiveArrayTools
u0 = zeros(100)
v0 = ones(100)
t1 = ArrayPartition((u0,v0))
t2 = [u0;v0]
t12 = ArrayPartition((similar(u0),similar(v0)))
t22 = similar(t2)
using BenchmarkTools
function f(t12,t1)
for i in eachindex(t12.x)
t12.x[i] .= t1.x[i] .+ t1.x[i]
end
end
@benchmark f(t12,t1)
#=
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 70.867 ns (0.00% GC)
median time: 74.457 ns (0.00% GC)
mean time: 81.489 ns (0.00% GC)
maximum time: 249.982 ns (0.00% GC)
--------------
samples: 10000
evals/sample: 979
=#
function g(t22,t2)
t22 .= t2 .+ t2
end
@benchmark g(t22,t2)
#=
BenchmarkTools.Trial:
memory estimate: 64 bytes
allocs estimate: 3
--------------
minimum time: 247.830 ns (0.00% GC)
median time: 264.466 ns (0.00% GC)
mean time: 294.141 ns (1.59% GC)
maximum time: 6.066 μs (91.20% GC)
--------------
samples: 10000
evals/sample: 352
=#
add_idxs(x,expr) = expr
add_idxs{T<:ArrayPartition}(::Type{T},expr) = :($(expr).x[i])
@generated function Base.broadcast!(f,A::ArrayPartition,B...)
exs = ((add_idxs(B[i],:(B[$i])) for i in eachindex(B))...)
:(for i in eachindex(A.x)
broadcast!(f,A.x[i],$(exs...))
end)
end
function h(t12,t1)
t12 .= t1 .+ t1
end
@benchmark h(t12,t1)
#=
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 60.261 ns (0.00% GC)
median time: 62.329 ns (0.00% GC)
mean time: 68.939 ns (0.00% GC)
maximum time: 225.685 ns (0.00% GC)
--------------
samples: 10000
evals/sample: 991
=#
u0 = zeros(2)
v0 = ones(2)
t1 = ArrayPartition((u0,v0))
t2 = [u0;v0]
t12 = ArrayPartition((similar(u0),similar(v0)))
t22 = similar(t2)
@benchmark f(t12,t1)
#=
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 36.886 ns (0.00% GC)
median time: 39.520 ns (0.00% GC)
mean time: 43.894 ns (0.00% GC)
maximum time: 173.009 ns (0.00% GC)
--------------
samples: 10000
evals/sample: 1000
=#
@benchmark g(t22,t2)
#=
BenchmarkTools.Trial:
memory estimate: 64 bytes
allocs estimate: 3
--------------
minimum time: 211.612 ns (0.00% GC)
median time: 226.932 ns (0.00% GC)
mean time: 253.315 ns (1.97% GC)
maximum time: 4.982 μs (92.19% GC)
--------------
samples: 10000
evals/sample: 516
=#
@benchmark h(t12,t1)
#=
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 27.224 ns (0.00% GC)
median time: 28.689 ns (0.00% GC)
mean time: 32.268 ns (0.00% GC)
maximum time: 168.033 ns (0.00% GC)
--------------
samples: 10000
evals/sample: 1000
=#
This is great because this allows for heterogeneously typed arrays and forms the basis of the partitioned methods. But how does it do inside the full ODE solver?
using DiffEqBase, OrdinaryDiffEq, Base.Test, RecursiveArrayTools, DiffEqDevTools
using BenchmarkTools
u0 = zeros(100)
v0 = ones(100)
f1 = function (t,u,v,du)
du .= v
end
f2 = function (t,u,v,dv)
dv .= -u
end
function (::typeof(f2))(::Type{Val{:analytic}}, x, y0)
u0, v0 = y0
ArrayPartition(u0*cos(x) + v0*sin(x), -u0*sin(x) + v0*cos(x))
end
prob = DynamicalODEProblem(f1,f2,u0,v0,(0.0,5.0))
@benchmark solve(prob,RK4(),dt=1/2,save_everystep=false)
#=
BenchmarkTools.Trial:
memory estimate: 82.59 KiB
allocs estimate: 565
--------------
minimum time: 126.171 μs (0.00% GC)
median time: 137.295 μs (0.00% GC)
mean time: 159.308 μs (4.95% GC)
maximum time: 3.158 ms (88.39% GC)
--------------
samples: 10000
evals/sample: 1
=#
function f(t,u,du)
uu = @view u[1:100]
v = @view u[101:end]
du[1:100] .= uu
du[101:200] .= v
end
u02 = [zeros(100);ones(100)]
tspan=(0.0,5.0)
prob = ODEProblem(f,u02,tspan)
@benchmark sol = solve(prob,RK4(),dt=1/2,save_everystep=false)
#=
BenchmarkTools.Trial:
memory estimate: 43.98 KiB
allocs estimate: 408
--------------
minimum time: 65.574 μs (0.00% GC)
median time: 71.722 μs (0.00% GC)
mean time: 83.817 μs (6.20% GC)
maximum time: 3.776 ms (93.39% GC)
--------------
samples: 10000
evals/sample: 1
=#
This matters now because the dynamical ODEs generate a problem like this which can now be used with the general ODE solvers as a fallback, so it would be nice to reduce the overhead of that fallback, especially given that there doesn't seem to be some fundamental reason that the type's elementwise operations are slower.
That timing is with a modification to RK4 to make it use broadcast.
Metadata
Metadata
Assignees
Labels
No labels