From 2e73c6edfb9a6150256898d8a6f35e9d5d535480 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Mon, 24 Jun 2024 17:50:46 -0400 Subject: [PATCH 1/2] Optimize sparse AD --- src/sparse_hessian.jl | 46 ++++++++++++++++++++++++++++-------------- src/sparse_jacobian.jl | 24 ++++++++++++++-------- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/sparse_hessian.jl b/src/sparse_hessian.jl index 8d899666..4b9486bb 100644 --- a/src/sparse_hessian.jl +++ b/src/sparse_hessian.jl @@ -4,6 +4,7 @@ struct SparseADHessian{Tag, GT, S, T} <: ADNLPModels.ADBackend colptr::Vector{Int} colors::Vector{Int} ncolors::Int + dcolors::Dict{Int, Vector{Int}} res::S lz::Vector{ForwardDiff.Dual{Tag, T, 1}} glz::Vector{ForwardDiff.Dual{Tag, T, 1}} @@ -37,6 +38,14 @@ function SparseADHessian( rowval = trilH.rowval colptr = trilH.colptr + # The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`. + dcolors = Dict{Int, Vector{Int}}(i => Int[] for i=1:ncolors) + for (i, color) in enumerate(colors) + for k = colptr[i]:(colptr[i + 1] - 1) + push!(dcolors[color], k) + end + end + # prepare directional derivatives res = similar(x0) @@ -65,7 +74,7 @@ function SparseADHessian( Hvp = fill!(S(undef, ntotal), 0) y = fill!(S(undef, ncon), 0) - return SparseADHessian(d, rowval, colptr, colors, ncolors, res, lz, glz, sol, longv, Hvp, ∇φ!, y) + return SparseADHessian(d, rowval, colptr, colors, ncolors, dcolors, res, lz, glz, sol, longv, Hvp, ∇φ!, y) end struct SparseReverseADHessian{T, S, Tagf, F, Tagψ, P} <: ADNLPModels.ADBackend @@ -74,6 +83,7 @@ struct SparseReverseADHessian{T, S, Tagf, F, Tagψ, P} <: ADNLPModels.ADBackend colptr::Vector{Int} colors::Vector{Int} ncolors::Int + dcolors::Dict{Int, Vector{Int}} res::S z::Vector{ForwardDiff.Dual{Tagf, T, 1}} gz::Vector{ForwardDiff.Dual{Tagf, T, 1}} @@ -109,6 +119,14 @@ function SparseReverseADHessian( rowval = trilH.rowval colptr = trilH.colptr + # The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`. + dcolors = Dict{Int, Vector{Int}}(i => Int[] for i=1:ncolors) + for (i, color) in enumerate(colors) + for k = colptr[i]:(colptr[i + 1] - 1) + push!(dcolors[color], k) + end + end + # prepare directional derivatives res = similar(x0) @@ -147,6 +165,7 @@ function SparseReverseADHessian( colptr, colors, ncolors, + dcolors, res, z, gz, @@ -213,13 +232,12 @@ function sparse_hess_coord!( b.∇φ!(b.glz, b.lz) ForwardDiff.extract_derivative!(Tag, b.Hvp, b.glz) b.res .= view(b.Hvp, (ncon + 1):(ncon + nvar)) - for j = 1:nvar - if b.colors[j] == icol - for k = b.colptr[j]:(b.colptr[j + 1] - 1) - i = b.rowval[k] - vals[k] = b.res[i] - end - end + + # Update the vector vals + index_vals = b.dcolors[icol] + for k in index_vals + row = b.rowval[k] + vals[k] = b.res[row] end end @@ -251,13 +269,11 @@ function sparse_hess_coord!( ForwardDiff.extract_derivative!(Tagψ, b.Hv_temp, b.gzψ) b.res .+= b.Hv_temp - for j = 1:nvar - if b.colors[j] == icol - for k = b.colptr[j]:(b.colptr[j + 1] - 1) - i = b.rowval[k] - vals[k] = b.res[i] - end - end + # Update the vector vals + index_vals = b.dcolors[icol] + for k in index_vals + row = b.rowval[k] + vals[k] = b.res[row] end end diff --git a/src/sparse_jacobian.jl b/src/sparse_jacobian.jl index 371524d3..43d6320a 100644 --- a/src/sparse_jacobian.jl +++ b/src/sparse_jacobian.jl @@ -4,6 +4,7 @@ struct SparseADJacobian{T, Tag, S} <: ADBackend colptr::Vector{Int} colors::Vector{Int} ncolors::Int + dcolors::Dict{Int, Vector{Int}} z::Vector{ForwardDiff.Dual{Tag, T, 1}} cz::Vector{ForwardDiff.Dual{Tag, T, 1}} res::S @@ -31,13 +32,21 @@ function SparseADJacobian( rowval = J.rowval colptr = J.colptr + # The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`. + dcolors = Dict{Int, Vector{Int}}(i => Int[] for i=1:ncolors) + for (i, color) in enumerate(colors) + for k = colptr[i]:(colptr[i + 1] - 1) + push!(dcolors[color], k) + end + end + tag = ForwardDiff.Tag{typeof(c!), T} z = Vector{ForwardDiff.Dual{tag, T, 1}}(undef, nvar) cz = similar(z, ncon) res = similar(x0, ncon) - SparseADJacobian(d, rowval, colptr, colors, ncolors, z, cz, res) + SparseADJacobian(d, rowval, colptr, colors, ncolors, dcolors, z, cz, res) end function get_nln_nnzj(b::SparseADJacobian, nvar, ncon) @@ -71,13 +80,12 @@ function sparse_jac_coord!( map!(ForwardDiff.Dual{Tag}, b.z, x, b.d) # x + ε * v ℓ!(b.cz, b.z) # c!(cz, x + ε * v) ForwardDiff.extract_derivative!(Tag, b.res, b.cz) # ∇c!(cx, x)ᵀv - for j = 1:nvar - if b.colors[j] == icol - for k = b.colptr[j]:(b.colptr[j + 1] - 1) - i = b.rowval[k] - vals[k] = b.res[i] - end - end + + # Update the vector vals + index_vals = b.dcolors[icol] + for k in index_vals + row = b.rowval[k] + vals[k] = b.res[row] end end return vals From b11f4a4740a6b7b20a394ec23b48bbd1e762444d Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Mon, 24 Jun 2024 18:33:16 -0400 Subject: [PATCH 2/2] Use UnitRange{Int} in dcolors to use less storage --- src/sparse_hessian.jl | 40 ++++++++++++++++++++-------------------- src/sparse_jacobian.jl | 20 ++++++++++---------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/sparse_hessian.jl b/src/sparse_hessian.jl index 4b9486bb..f9412a24 100644 --- a/src/sparse_hessian.jl +++ b/src/sparse_hessian.jl @@ -4,7 +4,7 @@ struct SparseADHessian{Tag, GT, S, T} <: ADNLPModels.ADBackend colptr::Vector{Int} colors::Vector{Int} ncolors::Int - dcolors::Dict{Int, Vector{Int}} + dcolors::Dict{Int, Vector{UnitRange{Int}}} res::S lz::Vector{ForwardDiff.Dual{Tag, T, 1}} glz::Vector{ForwardDiff.Dual{Tag, T, 1}} @@ -39,11 +39,10 @@ function SparseADHessian( colptr = trilH.colptr # The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`. - dcolors = Dict{Int, Vector{Int}}(i => Int[] for i=1:ncolors) + dcolors = Dict(i => UnitRange{Int}[] for i=1:ncolors) for (i, color) in enumerate(colors) - for k = colptr[i]:(colptr[i + 1] - 1) - push!(dcolors[color], k) - end + range_vals = colptr[i]:(colptr[i + 1] - 1) + push!(dcolors[color], range_vals) end # prepare directional derivatives @@ -83,7 +82,7 @@ struct SparseReverseADHessian{T, S, Tagf, F, Tagψ, P} <: ADNLPModels.ADBackend colptr::Vector{Int} colors::Vector{Int} ncolors::Int - dcolors::Dict{Int, Vector{Int}} + dcolors::Dict{Int, Vector{UnitRange{Int}}} res::S z::Vector{ForwardDiff.Dual{Tagf, T, 1}} gz::Vector{ForwardDiff.Dual{Tagf, T, 1}} @@ -120,11 +119,10 @@ function SparseReverseADHessian( colptr = trilH.colptr # The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`. - dcolors = Dict{Int, Vector{Int}}(i => Int[] for i=1:ncolors) + dcolors = Dict(i => UnitRange{Int}[] for i=1:ncolors) for (i, color) in enumerate(colors) - for k = colptr[i]:(colptr[i + 1] - 1) - push!(dcolors[color], k) - end + range_vals = colptr[i]:(colptr[i + 1] - 1) + push!(dcolors[color], range_vals) end # prepare directional derivatives @@ -233,11 +231,12 @@ function sparse_hess_coord!( ForwardDiff.extract_derivative!(Tag, b.Hvp, b.glz) b.res .= view(b.Hvp, (ncon + 1):(ncon + nvar)) - # Update the vector vals - index_vals = b.dcolors[icol] - for k in index_vals - row = b.rowval[k] - vals[k] = b.res[row] + # Store in `vals` the nonzeros of each column of the Hessian computed with color `icol` + for range_vals in b.dcolors[icol] + for k in range_vals + row = b.rowval[k] + vals[k] = b.res[row] + end end end @@ -269,11 +268,12 @@ function sparse_hess_coord!( ForwardDiff.extract_derivative!(Tagψ, b.Hv_temp, b.gzψ) b.res .+= b.Hv_temp - # Update the vector vals - index_vals = b.dcolors[icol] - for k in index_vals - row = b.rowval[k] - vals[k] = b.res[row] + # Store in `vals` the nonzeros of each column of the Hessian computed with color `icol` + for range_vals in b.dcolors[icol] + for k in range_vals + row = b.rowval[k] + vals[k] = b.res[row] + end end end diff --git a/src/sparse_jacobian.jl b/src/sparse_jacobian.jl index 43d6320a..f5091479 100644 --- a/src/sparse_jacobian.jl +++ b/src/sparse_jacobian.jl @@ -4,7 +4,7 @@ struct SparseADJacobian{T, Tag, S} <: ADBackend colptr::Vector{Int} colors::Vector{Int} ncolors::Int - dcolors::Dict{Int, Vector{Int}} + dcolors::Dict{Int, Vector{UnitRange{Int}}} z::Vector{ForwardDiff.Dual{Tag, T, 1}} cz::Vector{ForwardDiff.Dual{Tag, T, 1}} res::S @@ -33,11 +33,10 @@ function SparseADJacobian( colptr = J.colptr # The indices of the nonzero elements in `vals` that will be processed by color `c` are stored in `dcolors[c]`. - dcolors = Dict{Int, Vector{Int}}(i => Int[] for i=1:ncolors) + dcolors = Dict(i => UnitRange{Int}[] for i=1:ncolors) for (i, color) in enumerate(colors) - for k = colptr[i]:(colptr[i + 1] - 1) - push!(dcolors[color], k) - end + range_vals = colptr[i]:(colptr[i + 1] - 1) + push!(dcolors[color], range_vals) end tag = ForwardDiff.Tag{typeof(c!), T} @@ -81,11 +80,12 @@ function sparse_jac_coord!( ℓ!(b.cz, b.z) # c!(cz, x + ε * v) ForwardDiff.extract_derivative!(Tag, b.res, b.cz) # ∇c!(cx, x)ᵀv - # Update the vector vals - index_vals = b.dcolors[icol] - for k in index_vals - row = b.rowval[k] - vals[k] = b.res[row] + # Store in `vals` the nonzeros of each column of the Jacobian computed with color `icol` + for range_vals in b.dcolors[icol] + for k in range_vals + row = b.rowval[k] + vals[k] = b.res[row] + end end end return vals