Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ScaledModel #123

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/NLPModelsModifiers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ include("feasibility-form-nls.jl")
include("feasibility-residual.jl")
include("quasi-newton.jl")
include("slack-model.jl")
include("scaled-model.jl")
include("model-interaction.jl")

end # module
233 changes: 233 additions & 0 deletions src/scaled-model.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
export ScaledModel

struct ConservativeScaling{T}
max_gradient::T
end

function _set_constraints_scaling!(cons, Ji, Jj, Jx, max_gradient)
# Return a vector storing at index i norm(∇cᵢ, Inf)
for (i, j, x) in zip(Ji, Jj, Jx)
cons[i] = max(cons[i], abs(x))
end
# Compute scaling as min(1, max_gradient / norm(∇cᵢ, Inf) )
for i in eachindex(cons)
cons[i] = min(1.0, max_gradient / cons[i])
end
end

function _set_jacobian_scaling!(Jx, Ji, Jj, cons)
k = 0
for (i, j) in zip(Ji, Jj)
Jx[k += 1] = cons[i]
end
end

function scale_model!(scaling::ConservativeScaling{T}, nlp) where T
n, m = NLPModels.get_nvar(nlp), NLPModels.get_ncon(nlp)
nnzj = NLPModels.get_nnzj(nlp)
x0 = NLPModels.get_x0(nlp)
g = NLPModels.grad(nlp, x0)
scaling_obj = min(one(T), scaling.max_gradient / norm(g, Inf))
scaling_cons = similar(x0, m)
scaling_jac = similar(x0, nnzj)
fill!(scaling_cons, zero(T))
Ji, Jj = NLPModels.jac_structure(nlp)
NLPModels.jac_coord!(nlp, x0, scaling_jac)
_set_constraints_scaling!(scaling_cons, Ji, Jj, scaling_jac, scaling.max_gradient)
_set_jacobian_scaling!(scaling_jac, Ji, Jj, scaling_cons)
return (scaling_obj, scaling_cons, scaling_jac)
end

@doc raw"""
ScaledModel

Scale the nonlinear program
```math
\begin{aligned}
min_x \quad & f(x)\\
\mathrm{s.t.} \quad &  c_L ≤ c(x) ≤ c_U,\\
& ℓ ≤ x ≥ u,
\end{aligned}
```
as
```math
\begin{aligned}
min_x \quad & σf . f(x)\\
\mathrm{s.t.} \quad &  σc . c_L ≤ σc . c(x) ≤ σc . c_U, \\
& ℓ ≤ x ≥ u,
\end{aligned}
```
with ``σf`` a positive scalar defined as
```
σf = min(1, max_gradient / norm(g0, Inf))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is max_gradient?
Do we want to add a max as well to avoid having σf too small? Maybe this bound should depend on the eltype.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added more explanations in the docstring.

```
and ``σc`` a vector whose size is equal to the number of constraints in the model.
For ``i=1, ..., m``,
```
σc[i] = min(1, max_gradient / norm(J0[i, :], Inf))

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change

```

The vector ``g0 = ∇f(x0)`` and the matrix ``J0 = ∇c(x0)`` are resp.
the gradient and the Jacobian evaluated at the initial point ``x0``.
By default, the threshold parameter `max_gradient` is set to 100.0.

"""
struct ScaledModel{T, S, M} <: NLPModels.AbstractNLPModel{T, S}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
struct ScaledModel{T, S, M} <: NLPModels.AbstractNLPModel{T, S}
struct ScaledModel{T, S, M} <: AbstractNLPModel{T, S}

I don't think we need the NLPModels. inside this package.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and same comment throughout the file

nlp::M
meta::NLPModels.NLPModelMeta{T, S}
counters::NLPModels.Counters
scaling_obj::T
scaling_cons::S # [size m]
scaling_jac::S # [size nnzj]
buffer_cons::S # [size m]
tmigot marked this conversation as resolved.
Show resolved Hide resolved
end

function ScaledModel(
nlp::NLPModels.AbstractNLPModel{T, S};
scaling=ConservativeScaling(T(100)),
) where {T, S}
n, m = NLPModels.get_nvar(nlp), NLPModels.get_ncon(nlp)
x0 = NLPModels.get_x0(nlp)
buffer_cons = S(undef, m)
scaling_obj, scaling_cons, scaling_jac = scale_model!(scaling, nlp)
meta = NLPModels.NLPModelMeta(
n;
lvar=NLPModels.get_lvar(nlp),
uvar=NLPModels.get_uvar(nlp),
x0=NLPModels.get_x0(nlp),
y0 = NLPModels.get_y0(nlp) .* scaling_cons,
nnzj=NLPModels.get_nnzj(nlp),
nnzh=NLPModels.get_nnzh(nlp),
ncon=m,
lcon=NLPModels.get_lcon(nlp) .* scaling_cons,
ucon=NLPModels.get_ucon(nlp) .* scaling_cons,
frapac marked this conversation as resolved.
Show resolved Hide resolved
minimize=nlp.meta.minimize,
name="scaled-" * nlp.meta.name,
)

return ScaledModel(
nlp,
meta,
NLPModels.Counters(),
scaling_obj,
scaling_cons,
scaling_jac,
buffer_cons,
)
end

function NLPModels.obj(nlp::ScaledModel{T, S}, x::AbstractVector) where {T, S <: AbstractVector{T}}
@lencheck nlp.meta.nvar x
return nlp.scaling_obj * NLPModels.obj(nlp.nlp, x)
end

function NLPModels.cons!(nlp::ScaledModel, x::AbstractVector, c::AbstractVector)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is more a general comment on future work. We recently split the constraint API to nonlinear and linear. Would it make sense in a future work to have two different scaling for linear and nonlinear constraints?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, anyway it would be better to have cons_lin! and cons_nln! instead of cons!

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer to keep the interface as is. As far as I understand cons! is calling by default cons_lin! and cons_nln! internally, and here the scaling does not depend on the nature of the constraint.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, but calling a solver on a ScaledModel would return an cons_nln! unimplemented.

@lencheck nlp.meta.nvar x
@lencheck nlp.meta.ncon c
NLPModels.cons!(nlp.nlp, x, c)
c .*= nlp.scaling_cons
return c
end

function NLPModels.grad!(nlp::ScaledModel, x::AbstractVector, g::AbstractVector)
@lencheck nlp.meta.nvar x g
NLPModels.grad!(nlp.nlp, x, g)
g .*= nlp.scaling_obj
return g
end

function NLPModels.jprod!(nlp::ScaledModel, x::AbstractVector, v::AbstractVector, Jv::AbstractVector)
@lencheck nlp.meta.nvar x v
@lencheck nlp.meta.ncon Jv
NLPModels.jprod!(nlp.nlp, x, v, Jv)
Jv .*= nlp.scaling_cons
return Jv
end

function NLPModels.jtprod!(nlp::ScaledModel, x::AbstractVector, v::AbstractVector, Jtv::AbstractVector)
@lencheck nlp.meta.nvar x Jtv
@lencheck nlp.meta.ncon v
v_scaled = nlp.buffer_cons
v_scaled .= v .* nlp.scaling_cons
NLPModels.jtprod!(nlp.nlp, x, v_scaled, Jtv)
return Jtv
end

function NLPModels.jac_structure!(nlp::ScaledModel, jrows::AbstractVector, jcols::AbstractVector)
NLPModels.jac_structure!(nlp.nlp, jrows, jcols)
return jrows, jcols
end

function NLPModels.jac_coord!(nlp::ScaledModel, x::AbstractVector, jac::AbstractVector)
NLPModels.jac_coord!(nlp.nlp, x, jac)
jac .*= nlp.scaling_jac
return jac
end

function NLPModels.hess_structure!(nlp::ScaledModel, hrows::AbstractVector, hcols::AbstractVector)
@lencheck nlp.meta.nnzh hrows hcols
NLPModels.hess_structure!(nlp.nlp, hrows, hcols)
return hrows, hcols
end

function NLPModels.hess_coord!(
nlp::ScaledModel,
x::AbstractVector,
vals::AbstractVector;
obj_weight::Real=one(eltype(x)),
)
@lencheck nlp.meta.nvar x
@lencheck nlp.meta.nnzh vals
σ = obj_weight * nlp.scaling_obj
NLPModels.hess_coord!(nlp.nlp, x, vals; obj_weight=σ)
return vals
end

function NLPModels.hess_coord!(
nlp::ScaledModel,
x::AbstractVector,
y::AbstractVector,
vals::AbstractVector;
obj_weight::Real=one(eltype(x)),
)
@lencheck nlp.meta.nvar x
@lencheck nlp.meta.ncon y
@lencheck nlp.meta.nnzh vals
y_scaled = nlp.buffer_cons
y_scaled .= y .* nlp.scaling_cons
σ = obj_weight * nlp.scaling_obj
NLPModels.hess_coord!(nlp.nlp, x, y_scaled, vals; obj_weight=σ)
return vals
end

function NLPModels.hprod!(
nlp::ScaledModel,
x::AbstractVector,
v::AbstractVector,
hv::AbstractVector;
obj_weight::Real = one(eltype(x)),
)
@lencheck nlp.meta.nvar x v hv
σ = obj_weight * nlp.scaling_obj
NLPModels.hprod!(nlp.nlp, x, v, hv; obj_weight = σ)
return hv
end

function NLPModels.hprod!(
nlp::ScaledModel,
x::AbstractVector,
y::AbstractVector,
v::AbstractVector,
hv::AbstractVector;
obj_weight::Real = one(eltype(x)),
)
@lencheck nlp.meta.nvar x v hv
@lencheck nlp.meta.ncon y
y_scaled = nlp.buffer_cons
y_scaled .= y .* nlp.scaling_cons
σ = obj_weight * nlp.scaling_obj
NLPModels.hprod!(nlp.nlp, x, y, v, hv; obj_weight = σ)
return hv
end

38 changes: 38 additions & 0 deletions test/nlp/scaled-model.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
@testset "ScaledModel NLP tests" begin
@testset "API" for T in [Float64, Float32], M in [NLPModelMeta, SimpleNLPMeta]
nlp = ScaledModel(SimpleNLPModel(T, M))
σ_obj, σ_cons = nlp.scaling_obj, nlp.scaling_cons

f(x) = σ_obj * (x[1] - 2)^2 + (x[2] - 1)^2
∇f(x) = [σ_obj * 2 * (x[1] - 2); σ_obj * 2 * (x[2] - 1)]
H(x) = T[(σ_obj * 2.0) 0; 0 (σ_obj * 2.0)]
c(x) = [σ_cons[1] * (x[1] - 2x[2] + 1); σ_cons[2] * (-x[1]^2 / 4 - x[2]^2 + 1)]
J(x) = [σ_cons[1] -2.0*σ_cons[1]; (-0.5 *σ_cons[1] * x[1]) (-2.0*σ_cons[2] * x[2])]
H(x, y) = H(x) + σ_cons[2] * y[2] * T[-0.5 0; 0 -2.0]

n = nlp.meta.nvar
m = nlp.meta.ncon
@test nlp.meta.x0 == T[2; 2]

x = randn(T, n)
y = randn(T, m)
v = randn(T, n)
w = randn(T, m)
Jv = zeros(T, m)
Jtw = zeros(T, n)
Hv = zeros(T, n)
Hvals = zeros(T, nlp.meta.nnzh)

# Basic methods
@test obj(nlp, x) ≈ f(x)
@test grad(nlp, x) ≈ ∇f(x)
@test hess(nlp, x) ≈ H(x)
@test hprod(nlp, x, v) ≈ H(x) * v
@test cons(nlp, x) ≈ c(x)
@test jac(nlp, x) ≈ J(x)
@test jprod(nlp, x, v) ≈ J(x) * v
@test jtprod(nlp, x, w) ≈ J(x)' * w
@test hess(nlp, x, y) ≈ H(x, y)
@test hprod(nlp, x, y, v) ≈ H(x, y) * v
end
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ using LinearOperators, NLPModels, NLPModelsModifiers
include("nlp/simple-model.jl")
include("nlp/quasi-newton.jl")
include("nlp/slack-model.jl")
include("nlp/scaled-model.jl")

include("nls/simple-model.jl")
include("nls/feasibility-form-nls.jl")
Expand Down