Skip to content

Commit

Permalink
Fomo nonmonotone extension (#267)
Browse files Browse the repository at this point in the history
* update find_beta for the nonmonotone case

* nonmonotone extension, remove useless norm computation

* Add unbounded optimality in lbfgs

* Add unbounded optimality for TRUNK

* Add unbounded objective tests (#268)

* add unbounded below obj test. Fix unbounded test in fomo.

* add unbounded below obj test. Fix unbounded test in fomo.

* standardize fomo :unbounded condition,
add objective value test in unbounded tests.

* rename: fk -> f0

* 🤖 Format .jl files (#270)

Co-authored-by: d-monnet <[email protected]>

* fomatting

Co-authored-by: Tangi Migot <[email protected]>

* update find_beta for the nonmonotone case

* nonmonotone extension, remove useless norm computation

* fomatting

Co-authored-by: Tangi Migot <[email protected]>

* fix rebase errors

* add tests, replace `circshift` by index

* fix allocs test

* Update src/fomo.jl

Co-authored-by: Tangi Migot <[email protected]>

* update docstring

* update docstring

* Update src/fomo.jl

Co-authored-by: Tangi Migot <[email protected]>

---------

Co-authored-by: tmigot <[email protected]>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: d-monnet <[email protected]>
  • Loading branch information
4 people authored Apr 13, 2024
1 parent 50d544c commit 866492f
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 23 deletions.
79 changes: 57 additions & 22 deletions src/fomo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax
and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied:
(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (1)
‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (2)
In the nonmonotone case, (1) rewrites
(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk + (fm - fk)/μk ≥ θ1 * ‖∇f(xk)‖²,
with fm the largest objective value over the last M successful iterations, and fk = f(xk).
# Advanced usage
Expand Down Expand Up @@ -49,6 +52,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
- `β = T(0.9) ∈ [0,1)`: target decay rate for the momentum.
- `θ1 = T(0.1)`: momentum contribution parameter for convergence condition (1).
- `θ2 = T(eps(T)^(1/3))`: momentum contribution parameter for convergence condition (2).
- `M = 1` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour.
- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
Expand Down Expand Up @@ -107,28 +111,35 @@ mutable struct FomoSolver{T, V} <: AbstractFirstOrderSolver
m::V
d::V
p::V
o::V
α::T
end

function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
function FomoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V}
x = similar(nlp.meta.x0)
g = similar(nlp.meta.x0)
c = similar(nlp.meta.x0)
m = fill!(similar(nlp.meta.x0), 0)
d = fill!(similar(nlp.meta.x0), 0)
p = similar(nlp.meta.x0)
return FomoSolver{T, V}(x, g, c, m, d, p, T(0))
o = fill!(Vector{T}(undef, M), -Inf)
return FomoSolver{T, V}(x, g, c, m, d, p, o, T(0))
end

@doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
solver = FomoSolver(nlp)
@doc (@doc FomoSolver) function fomo(
nlp::AbstractNLPModel{T, V};
M::Int = 1,
kwargs...,
) where {T, V}
solver = FomoSolver(nlp; M)
solver_specific = Dict(:avgβmax => T(0.0))
stats = GenericExecutionStats(nlp; solver_specific = solver_specific)
return solve!(solver, nlp, stats; kwargs...)
end

function SolverCore.reset!(solver::FomoSolver{T}) where {T}
fill!(solver.m, 0)
fill!(solver.o, -Inf)
solver
end

Expand Down Expand Up @@ -163,6 +174,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
- `max_time::Float64 = 30.0`: maximum time limit in seconds.
- `max_iter::Int = typemax(Int)`: maximum number of iterations.
- `M = 1` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour.
- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
Expand Down Expand Up @@ -201,14 +213,16 @@ mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
x::V
g::V
c::V
o::V
α::T
end

function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
function FoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V}
x = similar(nlp.meta.x0)
g = similar(nlp.meta.x0)
c = similar(nlp.meta.x0)
return FoSolver{T, V}(x, g, c, T(0))
o = fill!(Vector{T}(undef, M), -Inf)
return FoSolver{T, V}(x, g, c, o, T(0))
end

"""
Expand All @@ -218,11 +232,12 @@ mutable struct R2Solver{T, V} <: AbstractOptimizationSolver end

Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(
nlp::AbstractNLPModel;
M = 1,
kwargs...,
)

@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
solver = FoSolver(nlp)
@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; M::Int = 1, kwargs...) where {T, V}
solver = FoSolver(nlp; M)
stats = GenericExecutionStats(nlp)
return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
end
Expand All @@ -236,6 +251,7 @@ end
end

function SolverCore.reset!(solver::FoSolver{T}) where {T}
fill!(solver.o, -Inf)
solver
end

Expand Down Expand Up @@ -281,6 +297,11 @@ function SolverCore.solve!(
set_iter!(stats, 0)
f0 = obj(nlp, x)
set_objective!(stats, f0)
obj_mem = solver.o
M = length(obj_mem)
mem_ind = 0
obj_mem[mem_ind+1] = stats.objective
max_obj_mem = stats.objective

grad!(nlp, x, ∇fk)
norm_∇fk = norm(∇fk)
Expand Down Expand Up @@ -346,13 +367,13 @@ function SolverCore.solve!(
oneT = T(1)
mdot∇f = T(0) # dot(momentum,∇fk)
while !done
λk = step_mult(solver.α, norm_d, step_backend)
c .= x .- λk .* d
μk = step_mult(solver.α, norm_d, step_backend)
c .= x .- μk .* d
step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0
ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk with momentum, ‖∇fk‖²λk without momentum
ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * μk # = dot(d,∇fk) * μk with momentum, ‖∇fk‖²μk without momentum
fck = obj(nlp, c)
unbounded = fck < fmin
ρk = (stats.objective - fck) / ΔTk
ρk = (max_obj_mem - fck) / (max_obj_mem - stats.objective + ΔTk)
# Update regularization parameters
if ρk >= η2
solver.α = min(αmax, γ2 * solver.α)
Expand All @@ -371,13 +392,16 @@ function SolverCore.solve!(
momentum .= ∇fk .* (oneT - β) .+ momentum .* β
end
set_objective!(stats, fck)
mem_ind = (mem_ind+1) % M
obj_mem[mem_ind+1] = stats.objective
max_obj_mem = maximum(obj_mem)

grad!(nlp, x, ∇fk)
norm_∇fk = norm(∇fk)
if use_momentum
mdot∇f = dot(momentum, ∇fk)
p .= momentum .- ∇fk
diff_norm = norm(p)
βmax = find_beta(diff_norm, mdot∇f, norm_∇fk, β, θ1, θ2)
βmax = find_beta(p, mdot∇f, norm_∇fk, μk, stats.objective, max_obj_mem, β, θ1, θ2)
d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
norm_d = norm(d)
avgβmax += βmax
Expand Down Expand Up @@ -432,18 +456,29 @@ function SolverCore.solve!(
end

"""
find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
find_beta(m, mdot∇f, norm_∇f, μk, fk, max_obj_mem, β, θ1, θ2)
Compute value `βmax` that saturates the contribution of the momentum term to the gradient.
`βmax` is computed such that the two gradient-related conditions are ensured:
1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖²
Compute βmax which saturates the contribution of the momentum term to the gradient.
`βmax` is computed such that the two gradient-related conditions (first one is relaxed in the nonmonotone case) are ensured:
1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm + (max_obj_mem - fk)/μk ≥ θ1 * ‖∇f(xk)‖²
2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖
with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm`
with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm`, `fk` the model at s=0, `max_obj_mem` the largest objective value over the last M successful iterations.
"""
function find_beta(diff_norm::T, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T}
function find_beta(
p::V,
mdot∇f::T,
norm_∇f::T,
μk::T,
fk::T,
max_obj_mem::T,
β::T,
θ1::T,
θ2::T,
) where {T, V}
n1 = norm_∇f^2 - mdot∇f
β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / n1 : β
β2 = diff_norm != 0 ? (1 - θ2) * norm_∇f / diff_norm : β
n2 = norm(p)
β1 = n1 > 0 ? ((1 - θ1) * norm_∇f^2 - (fk - max_obj_mem) / μk) / n1 : β
β2 = n2 != 0 ? (1 - θ2) * norm_∇f / n2 : β
return min(β, min(β1, β2))
end

Expand Down
6 changes: 5 additions & 1 deletion test/allocs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ if Sys.isunix()
for model in NLPModelsTest.nlp_problems
nlp = eval(Meta.parse(model))()
if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver))
solver = eval(symsolver)(nlp)
if (symsolver == :FoSolver || symsolver == :FomoSolver)
solver = eval(symsolver)(nlp; M = 2) # nonmonotone configuration allocates extra memory
else
solver = eval(symsolver)(nlp)
end
if symsolver == :FomoSolver
T = eltype(nlp.meta.x0)
stats = GenericExecutionStats(nlp, solver_specific = Dict(:avgβmax => T(0)))
Expand Down
8 changes: 8 additions & 0 deletions test/test_solvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ function tests()
unconstrained_nlp(solver)
multiprecision_nlp(solver, :unc)
end
@testset "$name : nonmonotone configuration" for (name, solver) in [
("R2", (nlp; kwargs...) -> R2(nlp, M = 2; kwargs...)),
("fomo_r2", (nlp; kwargs...) -> fomo(nlp, M = 2; kwargs...)),
("fomo_tr", (nlp; kwargs...) -> fomo(nlp, M = 2, step_backend = JSOSolvers.tr_step(); kwargs...)),
]
unconstrained_nlp(solver)
multiprecision_nlp(solver, :unc)
end
end
@testset "Bound-constrained solvers" begin
@testset "$solver" for solver in [tron]
Expand Down

0 comments on commit 866492f

Please sign in to comment.