diff --git a/src/ad_api.jl b/src/ad_api.jl index c9867b25..a142f93a 100644 --- a/src/ad_api.jl +++ b/src/ad_api.jl @@ -82,13 +82,18 @@ end """ get_residual_nnzj(b::ADModelBackend, nvar, nequ) + get_residual_nnzj(nls::AbstractNLSModel, nvar, nequ) -Return `get_nln_nnzj(b.jacobian_residual_backend, nvar, nequ)`. +Return the number of nonzeros elements in the residual Jacobians. """ function get_residual_nnzj(b::ADModelBackend, nvar, nequ) get_nln_nnzj(b.jacobian_residual_backend, nvar, nequ) end +function get_residual_nnzj(nls::AbstractNLSModel, nvar, nequ) + nls.nls_meta.nnzj +end + function get_residual_nnzj( b::ADModelBackend{GB, HvB, JvB, JtvB, JB, HB, GHJ, HvBLS, JvBLS, JtvBLS, JBLS, HBLS}, nvar, @@ -118,6 +123,27 @@ function get_nln_nnzh(nlp::AbstractNLPModel, nvar) nlp.meta.nnzh end +""" + get_residual_nnzh(b::ADModelBackend, nvar) + get_residual_nnzh(nls::AbstractNLSModel, nvar) + +Return the number of nonzeros elements in the residual Hessians. +""" +function get_residual_nnzh(b::ADModelBackend, nvar) + get_nln_nnzh(b.hessian_residual_backend, nvar) +end + +function get_residual_nnzh(nls::AbstractNLSModel, nvar) + nls.nls_meta.nnzh +end + +function get_residual_nnzh( + b::ADModelBackend{GB, HvB, JvB, JtvB, JB, HB, GHJ, HvBLS, JvBLS, JtvBLS, JBLS, HBLS}, + nvar) where {GB, HvB, JvB, JtvB, JB, HB, GHJ, HvBLS, JvBLS, JtvBLS, JBLS, HBLS <: AbstractNLPModel} + nls = b.hessian_residual_backend + nls.nls_meta.nnzh +end + throw_error(b) = throw(ArgumentError("The AD backend $b is not loaded. Please load the corresponding AD package.")) gradient(b::ADBackend, ::Any, ::Any) = throw_error(b) @@ -289,6 +315,43 @@ function NLPModels.hess_coord!( return vals end +function NLPModels.hess_structure_residual!( + b::ADBackend, + nls::AbstractADNLSModel, + rows::AbstractVector{<:Integer}, + cols::AbstractVector{<:Integer}, +) + n = nls.meta.nvar + pos = 0 + for j = 1:n + for i = j:n + pos += 1 + rows[pos] = i + cols[pos] = j + end + end + return rows, cols +end + +function NLPModels.hess_coord_residual!( + b::ADBackend, + nls::AbstractADNLSModel, + x::AbstractVector, + v::AbstractVector, + vals::AbstractVector, +) + F = get_F(nls, b) + Hx = hessian(b, x -> dot(F(x), v), x) + k = 1 + for j = 1:(nls.meta.nvar) + for i = j:(nls.meta.nvar) + vals[k] = Hx[i, j] + k += 1 + end + end + return vals +end + function NLPModels.hprod!( b::ADBackend, nlp::ADModel, diff --git a/src/nls.jl b/src/nls.jl index c7c81e7b..0edcfef2 100644 --- a/src/nls.jl +++ b/src/nls.jl @@ -165,8 +165,9 @@ function ADNLSModel!( meta = NLPModelMeta{T, S}(nvar, x0 = x0, nnzh = nnzh, name = name, minimize = minimize) nls_nnzj = get_residual_nnzj(adbackend, nvar, nequ) + nls_nnzh = get_residual_nnzh(adbackend, nvar) nls_meta = - NLSMeta{T, S}(nequ, nvar, nnzj = nls_nnzj, nnzh = div(nvar * (nvar + 1), 2), lin = linequ) + NLSMeta{T, S}(nequ, nvar, nnzj = nls_nnzj, nnzh = nls_nnzh, lin = linequ) return ADNLSModel(meta, nls_meta, NLSCounters(), adbackend, F!, (cx, x) -> cx) end @@ -210,8 +211,9 @@ function ADNLSModel!( minimize = minimize, ) nls_nnzj = get_residual_nnzj(adbackend, nvar, nequ) + nls_nnzh = get_residual_nnzh(adbackend, nvar) nls_meta = - NLSMeta{T, S}(nequ, nvar, nnzj = nls_nnzj, nnzh = div(nvar * (nvar + 1), 2), lin = linequ) + NLSMeta{T, S}(nequ, nvar, nnzj = nls_nnzj, nnzh = nls_nnzh, lin = linequ) return ADNLSModel(meta, nls_meta, NLSCounters(), adbackend, F!, (cx, x) -> cx) end @@ -272,8 +274,9 @@ function ADNLSModel!( minimize = minimize, ) nls_nnzj = get_residual_nnzj(adbackend, nvar, nequ) + nls_nnzh = get_residual_nnzh(adbackend, nvar) nls_meta = - NLSMeta{T, S}(nequ, nvar, nnzj = nls_nnzj, nnzh = div(nvar * (nvar + 1), 2), lin = linequ) + NLSMeta{T, S}(nequ, nvar, nnzj = nls_nnzj, nnzh = nls_nnzh, lin = linequ) return ADNLSModel(meta, nls_meta, NLSCounters(), adbackend, F!, c!) end @@ -422,8 +425,9 @@ function ADNLSModel!( minimize = minimize, ) nls_nnzj = get_residual_nnzj(adbackend, nvar, nequ) + nls_nnzh = get_residual_nnzh(adbackend, nvar) nls_meta = - NLSMeta{T, S}(nequ, nvar, nnzj = nls_nnzj, nnzh = div(nvar * (nvar + 1), 2), lin = linequ) + NLSMeta{T, S}(nequ, nvar, nnzj = nls_nnzj, nnzh = nls_nnzh, lin = linequ) return ADNLSModel(meta, nls_meta, NLSCounters(), adbackend, F!, clinrows, clincols, clinvals, c!) end @@ -639,8 +643,9 @@ function ADNLSModel!( minimize = minimize, ) nls_nnzj = get_residual_nnzj(adbackend, nvar, nequ) + nls_nnzh = get_residual_nnzh(adbackend, nvar) nls_meta = - NLSMeta{T, S}(nequ, nvar, nnzj = nls_nnzj, nnzh = div(nvar * (nvar + 1), 2), lin = linequ) + NLSMeta{T, S}(nequ, nvar, nnzj = nls_nnzj, nnzh = nls_nnzh, lin = linequ) return ADNLSModel(meta, nls_meta, NLSCounters(), adbackend, F!, c!) end @@ -744,8 +749,9 @@ function ADNLSModel!( minimize = minimize, ) nls_nnzj = get_residual_nnzj(adbackend, nvar, nequ) + nls_nnzh = get_residual_nnzh(adbackend, nvar) nls_meta = - NLSMeta{T, S}(nequ, nvar, nnzj = nls_nnzj, nnzh = div(nvar * (nvar + 1), 2), lin = linequ) + NLSMeta{T, S}(nequ, nvar, nnzj = nls_nnzj, nnzh = nls_nnzh, lin = linequ) return ADNLSModel(meta, nls_meta, NLSCounters(), adbackend, F!, clinrows, clincols, clinvals, c!) end @@ -864,11 +870,7 @@ function NLPModels.hess_structure_residual!( cols::AbstractVector{<:Integer}, ) @lencheck nls.nls_meta.nnzh rows cols - n = nls.meta.nvar - I = ((i, j) for i = 1:n, j = 1:n if i ≥ j) - rows .= getindex.(I, 1) - cols .= getindex.(I, 2) - return rows, cols + return hess_structure_residual!(nls.adbackend.hessian_residual_backend, nls, rows, cols) end function NLPModels.hess_coord_residual!( @@ -881,23 +883,7 @@ function NLPModels.hess_coord_residual!( @lencheck nls.nls_meta.nequ v @lencheck nls.nls_meta.nnzh vals increment!(nls, :neval_hess_residual) - F = get_F(nls, nls.adbackend.hessian_residual_backend) - Hx = hessian(nls.adbackend.hessian_residual_backend, x -> dot(F(x), v), x) - k = 1 - for j = 1:(nls.meta.nvar) - for i = j:(nls.meta.nvar) - vals[k] = Hx[i, j] - k += 1 - end - end - return vals -end - -function NLPModels.jth_hess_residual(nls::ADNLSModel, x::AbstractVector, i::Int) - @lencheck nls.meta.nvar x - increment!(nls, :neval_jhess_residual) - F = get_F(nls, nls.adbackend.hessian_residual_backend) - return Symmetric(hessian(nls.adbackend.hessian_residual_backend, x -> F(x)[i], x), :L) + return hess_coord_residual!(nls.adbackend.hessian_residual_backend, nls, x, v, vals) end function NLPModels.hprod_residual!( diff --git a/src/predefined_backend.jl b/src/predefined_backend.jl index 6e2d0ffd..740fde7e 100644 --- a/src/predefined_backend.jl +++ b/src/predefined_backend.jl @@ -3,14 +3,14 @@ default_backend = Dict( :hprod_backend => ForwardDiffADHvprod, :jprod_backend => ForwardDiffADJprod, :jtprod_backend => ForwardDiffADJtprod, - :jacobian_backend => SparseADJacobian, # ForwardDiffADJacobian - :hessian_backend => SparseADHessian, # ForwardDiffADHessian + :jacobian_backend => SparseADJacobian, + :hessian_backend => SparseADHessian, :ghjvprod_backend => ForwardDiffADGHjvprod, :hprod_residual_backend => ForwardDiffADHvprod, :jprod_residual_backend => ForwardDiffADJprod, :jtprod_residual_backend => ForwardDiffADJtprod, - :jacobian_residual_backend => SparseADJacobian, # ForwardDiffADJacobian, - :hessian_residual_backend => ForwardDiffADHessian, + :jacobian_residual_backend => SparseADJacobian, + :hessian_residual_backend => SparseADHessian, ) optimized = Dict( @@ -18,14 +18,14 @@ optimized = Dict( :hprod_backend => ReverseDiffADHvprod, :jprod_backend => ForwardDiffADJprod, :jtprod_backend => ReverseDiffADJtprod, - :jacobian_backend => SparseADJacobian, # ForwardDiffADJacobian - :hessian_backend => SparseReverseADHessian, # ForwardDiffADHessian, + :jacobian_backend => SparseADJacobian, + :hessian_backend => SparseReverseADHessian, :ghjvprod_backend => ForwardDiffADGHjvprod, :hprod_residual_backend => ReverseDiffADHvprod, :jprod_residual_backend => ForwardDiffADJprod, :jtprod_residual_backend => ReverseDiffADJtprod, - :jacobian_residual_backend => SparseADJacobian, # ForwardDiffADJacobian - :hessian_residual_backend => ForwardDiffADHessian, + :jacobian_residual_backend => SparseADJacobian, + :hessian_residual_backend => SparseReverseADHessian, ) generic = Dict( diff --git a/src/sparse_hessian.jl b/src/sparse_hessian.jl index 4140762b..8d899666 100644 --- a/src/sparse_hessian.jl +++ b/src/sparse_hessian.jl @@ -180,8 +180,16 @@ function NLPModels.hess_structure!( return rows, cols end +function NLPModels.hess_structure_residual!( + b::Union{SparseADHessian, SparseReverseADHessian}, + nls::AbstractADNLSModel, + rows::AbstractVector{<:Integer}, + cols::AbstractVector{<:Integer}, +) + return hess_structure!(b, nls, rows, cols) +end + function sparse_hess_coord!( - ℓ::Function, b::SparseADHessian{Tag, GT, S, T}, x::AbstractVector, obj_weight, @@ -219,7 +227,6 @@ function sparse_hess_coord!( end function sparse_hess_coord!( - ℓ::Function, b::SparseReverseADHessian{T, S, Tagf, F, Tagψ, P}, x::AbstractVector, obj_weight, @@ -265,8 +272,7 @@ function NLPModels.hess_coord!( obj_weight::Real, vals::AbstractVector, ) - ℓ = get_lag(nlp, b, obj_weight, y) - sparse_hess_coord!(ℓ, b, x, obj_weight, y, vals) + sparse_hess_coord!(b, x, obj_weight, y, vals) end function NLPModels.hess_coord!( @@ -277,8 +283,7 @@ function NLPModels.hess_coord!( vals::AbstractVector, ) b.y .= 0 - ℓ = get_lag(nlp, b, obj_weight, b.y) - sparse_hess_coord!(ℓ, b, x, obj_weight, b.y, vals) + sparse_hess_coord!(b, x, obj_weight, b.y, vals) end function NLPModels.hess_coord!( @@ -286,13 +291,23 @@ function NLPModels.hess_coord!( nlp::ADModel, x::AbstractVector, j::Integer, - vals::AbstractVector{T}, -) where {T} + vals::AbstractVector, +) for (w, k) in enumerate(nlp.meta.nln) b.y[w] = k == j ? 1 : 0 end - obj_weight = zero(T) - ℓ = get_lag(nlp, b, obj_weight, b.y) - sparse_hess_coord!(ℓ, b, x, obj_weight, b.y, vals) + obj_weight = zero(eltype(x)) + sparse_hess_coord!(b, x, obj_weight, b.y, vals) return vals end + +function NLPModels.hess_coord_residual!( + b::Union{SparseADHessian, SparseReverseADHessian}, + nls::AbstractADNLSModel, + x::AbstractVector, + v::AbstractVector, + vals::AbstractVector, +) + obj_weight = zero(eltype(x)) + sparse_hess_coord!(b, x, obj_weight, v, vals) +end diff --git a/test/runtests.jl b/test/runtests.jl index a69dc634..b98044b0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -29,6 +29,7 @@ end @testset "Basic Hessian derivative test" begin include("sparse_hessian.jl") + include("sparse_hessian_nls.jl") end for problem in NLPModelsTest.nlp_problems ∪ ["GENROSE"] diff --git a/test/sparse_hessian.jl b/test/sparse_hessian.jl index 7c5c0529..15327e97 100644 --- a/test/sparse_hessian.jl +++ b/test/sparse_hessian.jl @@ -1,5 +1,7 @@ list_sparse_hess_backend = - ((ADNLPModels.SparseADHessian, Dict()), (ADNLPModels.ForwardDiffADHessian, Dict())) + ((ADNLPModels.SparseADHessian, Dict()), + (ADNLPModels.SparseReverseADHessian, Dict()), + (ADNLPModels.ForwardDiffADHessian, Dict())) dt = (Float32, Float64) diff --git a/test/sparse_hessian_nls.jl b/test/sparse_hessian_nls.jl new file mode 100644 index 00000000..4cc056ee --- /dev/null +++ b/test/sparse_hessian_nls.jl @@ -0,0 +1,45 @@ +list_sparse_hess_backend = ( + (ADNLPModels.SparseADHessian, Dict()), + (ADNLPModels.SparseReverseADHessian, Dict()), + (ADNLPModels.ForwardDiffADHessian, Dict()), +) + +dt = (Float32, Float64) + +@testset "Basic Hessian of residual derivative with backend=$(backend) and T=$(T)" for T in dt, + (backend, kw) in list_sparse_hess_backend + + F!(Fx, x) = begin + Fx[1] = x[1] - 1 + Fx[2] = 10 * (x[2] - x[1]^2) + Fx[3] = x[2] + 1 + Fx + end + x0 = T[-1.2; 1.0] + nvar = 2 + nequ = 3 + nls = ADNLPModels.ADNLSModel!(F!, x0, 3, hessian_residual_backend = backend; kw...) + + x = rand(T, nvar) + v = rand(T, nequ) + rows, cols = zeros(Int, nls.nls_meta.nnzh), zeros(Int, nls.nls_meta.nnzh) + vals = zeros(T, nls.nls_meta.nnzh) + hess_structure_residual!(nls, rows, cols) + hess_coord_residual!(nls, x, v, vals) + @test eltype(vals) == T + H = Symmetric(sparse(rows, cols, vals, nvar, nvar), :L) + @test H == [-20 * v[2] 0; 0 0] + + # Test also the implementation of the backends + b = nls.adbackend.hessian_residual_backend + @test nls.nls_meta.nnzh == ADNLPModels.get_nln_nnzh(b, nvar) + ADNLPModels.hess_structure_residual!(b, nls, rows, cols) + ADNLPModels.hess_coord_residual!(b, nls, x, v, vals) + @test eltype(vals) == T + H = Symmetric(sparse(rows, cols, vals, nvar, nvar), :L) + @test H == [-20 * v[2] 0; 0 0] + + nls = ADNLPModels.ADNLSModel!(F!, x0, 3, matrix_free = true; kw...) + @test nls.adbackend.hessian_backend isa ADNLPModels.EmptyADbackend + @test nls.adbackend.hessian_residual_backend isa ADNLPModels.EmptyADbackend +end diff --git a/test/sparse_jacobian.jl b/test/sparse_jacobian.jl index 20b4d05e..0a522eb1 100644 --- a/test/sparse_jacobian.jl +++ b/test/sparse_jacobian.jl @@ -1,8 +1,10 @@ list_sparse_jac_backend = ( - (ADNLPModels.SparseADJacobian, Dict()), # default + (ADNLPModels.SparseADJacobian, Dict()), (ADNLPModels.ForwardDiffADJacobian, Dict()), ) + dt = (Float32, Float64) + @testset "Basic Jacobian derivative with backend=$(backend) and T=$(T)" for T in dt, (backend, kw) in list_sparse_jac_backend diff --git a/test/sparse_jacobian_nls.jl b/test/sparse_jacobian_nls.jl index 23df0dc3..0144c50f 100644 --- a/test/sparse_jacobian_nls.jl +++ b/test/sparse_jacobian_nls.jl @@ -1,8 +1,10 @@ list_sparse_jac_backend = ( - (ADNLPModels.SparseADJacobian, Dict()), # default + (ADNLPModels.SparseADJacobian, Dict()), (ADNLPModels.ForwardDiffADJacobian, Dict()), ) + dt = (Float32, Float64) + @testset "Basic Jacobian of residual derivative with backend=$(backend) and T=$(T)" for T in dt, (backend, kw) in list_sparse_jac_backend