Skip to content

Commit 866492f

Browse files
d-monnettmigotgithub-actions[bot]
authored
Fomo nonmonotone extension (#267)
* update find_beta for the nonmonotone case * nonmonotone extension, remove useless norm computation * Add unbounded optimality in lbfgs * Add unbounded optimality for TRUNK * Add unbounded objective tests (#268) * add unbounded below obj test. Fix unbounded test in fomo. * add unbounded below obj test. Fix unbounded test in fomo. * standardize fomo :unbounded condition, add objective value test in unbounded tests. * rename: fk -> f0 * 🤖 Format .jl files (#270) Co-authored-by: d-monnet <[email protected]> * fomatting Co-authored-by: Tangi Migot <[email protected]> * update find_beta for the nonmonotone case * nonmonotone extension, remove useless norm computation * fomatting Co-authored-by: Tangi Migot <[email protected]> * fix rebase errors * add tests, replace `circshift` by index * fix allocs test * Update src/fomo.jl Co-authored-by: Tangi Migot <[email protected]> * update docstring * update docstring * Update src/fomo.jl Co-authored-by: Tangi Migot <[email protected]> --------- Co-authored-by: tmigot <[email protected]> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: d-monnet <[email protected]>
1 parent 50d544c commit 866492f

File tree

3 files changed

+70
-23
lines changed

3 files changed

+70
-23
lines changed

src/fomo.jl

Lines changed: 57 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax
2020
and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied:
2121
(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (1)
2222
‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (2)
23+
In the nonmonotone case, (1) rewrites
24+
(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk + (fm - fk)/μk ≥ θ1 * ‖∇f(xk)‖²,
25+
with fm the largest objective value over the last M successful iterations, and fk = f(xk).
2326
2427
# Advanced usage
2528
@@ -49,6 +52,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
4952
- `β = T(0.9) ∈ [0,1)`: target decay rate for the momentum.
5053
- `θ1 = T(0.1)`: momentum contribution parameter for convergence condition (1).
5154
- `θ2 = T(eps(T)^(1/3))`: momentum contribution parameter for convergence condition (2).
55+
- `M = 1` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour.
5256
- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
5357
- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
5458
@@ -107,28 +111,35 @@ mutable struct FomoSolver{T, V} <: AbstractFirstOrderSolver
107111
m::V
108112
d::V
109113
p::V
114+
o::V
110115
α::T
111116
end
112117

113-
function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
118+
function FomoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V}
114119
x = similar(nlp.meta.x0)
115120
g = similar(nlp.meta.x0)
116121
c = similar(nlp.meta.x0)
117122
m = fill!(similar(nlp.meta.x0), 0)
118123
d = fill!(similar(nlp.meta.x0), 0)
119124
p = similar(nlp.meta.x0)
120-
return FomoSolver{T, V}(x, g, c, m, d, p, T(0))
125+
o = fill!(Vector{T}(undef, M), -Inf)
126+
return FomoSolver{T, V}(x, g, c, m, d, p, o, T(0))
121127
end
122128

123-
@doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
124-
solver = FomoSolver(nlp)
129+
@doc (@doc FomoSolver) function fomo(
130+
nlp::AbstractNLPModel{T, V};
131+
M::Int = 1,
132+
kwargs...,
133+
) where {T, V}
134+
solver = FomoSolver(nlp; M)
125135
solver_specific = Dict(:avgβmax => T(0.0))
126136
stats = GenericExecutionStats(nlp; solver_specific = solver_specific)
127137
return solve!(solver, nlp, stats; kwargs...)
128138
end
129139

130140
function SolverCore.reset!(solver::FomoSolver{T}) where {T}
131141
fill!(solver.m, 0)
142+
fill!(solver.o, -Inf)
132143
solver
133144
end
134145

@@ -163,6 +174,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
163174
- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
164175
- `max_time::Float64 = 30.0`: maximum time limit in seconds.
165176
- `max_iter::Int = typemax(Int)`: maximum number of iterations.
177+
- `M = 1` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour.
166178
- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
167179
- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
168180
@@ -201,14 +213,16 @@ mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
201213
x::V
202214
g::V
203215
c::V
216+
o::V
204217
α::T
205218
end
206219

207-
function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
220+
function FoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V}
208221
x = similar(nlp.meta.x0)
209222
g = similar(nlp.meta.x0)
210223
c = similar(nlp.meta.x0)
211-
return FoSolver{T, V}(x, g, c, T(0))
224+
o = fill!(Vector{T}(undef, M), -Inf)
225+
return FoSolver{T, V}(x, g, c, o, T(0))
212226
end
213227

214228
"""
@@ -218,11 +232,12 @@ mutable struct R2Solver{T, V} <: AbstractOptimizationSolver end
218232

219233
Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(
220234
nlp::AbstractNLPModel;
235+
M = 1,
221236
kwargs...,
222237
)
223238

224-
@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
225-
solver = FoSolver(nlp)
239+
@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; M::Int = 1, kwargs...) where {T, V}
240+
solver = FoSolver(nlp; M)
226241
stats = GenericExecutionStats(nlp)
227242
return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
228243
end
@@ -236,6 +251,7 @@ end
236251
end
237252

238253
function SolverCore.reset!(solver::FoSolver{T}) where {T}
254+
fill!(solver.o, -Inf)
239255
solver
240256
end
241257

@@ -281,6 +297,11 @@ function SolverCore.solve!(
281297
set_iter!(stats, 0)
282298
f0 = obj(nlp, x)
283299
set_objective!(stats, f0)
300+
obj_mem = solver.o
301+
M = length(obj_mem)
302+
mem_ind = 0
303+
obj_mem[mem_ind+1] = stats.objective
304+
max_obj_mem = stats.objective
284305

285306
grad!(nlp, x, ∇fk)
286307
norm_∇fk = norm(∇fk)
@@ -346,13 +367,13 @@ function SolverCore.solve!(
346367
oneT = T(1)
347368
mdot∇f = T(0) # dot(momentum,∇fk)
348369
while !done
349-
λk = step_mult(solver.α, norm_d, step_backend)
350-
c .= x .- λk .* d
370+
μk = step_mult(solver.α, norm_d, step_backend)
371+
c .= x .- μk .* d
351372
step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0
352-
ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk with momentum, ‖∇fk‖²λk without momentum
373+
ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * μk # = dot(d,∇fk) * μk with momentum, ‖∇fk‖²μk without momentum
353374
fck = obj(nlp, c)
354375
unbounded = fck < fmin
355-
ρk = (stats.objective - fck) / ΔTk
376+
ρk = (max_obj_mem - fck) / (max_obj_mem - stats.objective + ΔTk)
356377
# Update regularization parameters
357378
if ρk >= η2
358379
solver.α = min(αmax, γ2 * solver.α)
@@ -371,13 +392,16 @@ function SolverCore.solve!(
371392
momentum .= ∇fk .* (oneT - β) .+ momentum .* β
372393
end
373394
set_objective!(stats, fck)
395+
mem_ind = (mem_ind+1) % M
396+
obj_mem[mem_ind+1] = stats.objective
397+
max_obj_mem = maximum(obj_mem)
398+
374399
grad!(nlp, x, ∇fk)
375400
norm_∇fk = norm(∇fk)
376401
if use_momentum
377402
mdot∇f = dot(momentum, ∇fk)
378403
p .= momentum .- ∇fk
379-
diff_norm = norm(p)
380-
βmax = find_beta(diff_norm, mdot∇f, norm_∇fk, β, θ1, θ2)
404+
βmax = find_beta(p, mdot∇f, norm_∇fk, μk, stats.objective, max_obj_mem, β, θ1, θ2)
381405
d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
382406
norm_d = norm(d)
383407
avgβmax += βmax
@@ -432,18 +456,29 @@ function SolverCore.solve!(
432456
end
433457

434458
"""
435-
find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
459+
find_beta(m, mdot∇f, norm_∇f, μk, fk, max_obj_mem, β, θ1, θ2)
436460
437-
Compute value `βmax` that saturates the contribution of the momentum term to the gradient.
438-
`βmax` is computed such that the two gradient-related conditions are ensured:
439-
1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖²
461+
Compute βmax which saturates the contribution of the momentum term to the gradient.
462+
`βmax` is computed such that the two gradient-related conditions (first one is relaxed in the nonmonotone case) are ensured:
463+
1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm + (max_obj_mem - fk)/μk ≥ θ1 * ‖∇f(xk)‖²
440464
2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖
441-
with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm`
465+
with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm`, `fk` the model at s=0, `max_obj_mem` the largest objective value over the last M successful iterations.
442466
"""
443-
function find_beta(diff_norm::T, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T}
467+
function find_beta(
468+
p::V,
469+
mdot∇f::T,
470+
norm_∇f::T,
471+
μk::T,
472+
fk::T,
473+
max_obj_mem::T,
474+
β::T,
475+
θ1::T,
476+
θ2::T,
477+
) where {T, V}
444478
n1 = norm_∇f^2 - mdot∇f
445-
β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / n1 : β
446-
β2 = diff_norm != 0 ? (1 - θ2) * norm_∇f / diff_norm : β
479+
n2 = norm(p)
480+
β1 = n1 > 0 ? ((1 - θ1) * norm_∇f^2 - (fk - max_obj_mem) / μk) / n1 : β
481+
β2 = n2 != 0 ? (1 - θ2) * norm_∇f / n2 : β
447482
return min(β, min(β1, β2))
448483
end
449484

test/allocs.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,11 @@ if Sys.isunix()
3535
for model in NLPModelsTest.nlp_problems
3636
nlp = eval(Meta.parse(model))()
3737
if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver))
38-
solver = eval(symsolver)(nlp)
38+
if (symsolver == :FoSolver || symsolver == :FomoSolver)
39+
solver = eval(symsolver)(nlp; M = 2) # nonmonotone configuration allocates extra memory
40+
else
41+
solver = eval(symsolver)(nlp)
42+
end
3943
if symsolver == :FomoSolver
4044
T = eltype(nlp.meta.x0)
4145
stats = GenericExecutionStats(nlp, solver_specific = Dict(:avgβmax => T(0)))

test/test_solvers.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ function tests()
1414
unconstrained_nlp(solver)
1515
multiprecision_nlp(solver, :unc)
1616
end
17+
@testset "$name : nonmonotone configuration" for (name, solver) in [
18+
("R2", (nlp; kwargs...) -> R2(nlp, M = 2; kwargs...)),
19+
("fomo_r2", (nlp; kwargs...) -> fomo(nlp, M = 2; kwargs...)),
20+
("fomo_tr", (nlp; kwargs...) -> fomo(nlp, M = 2, step_backend = JSOSolvers.tr_step(); kwargs...)),
21+
]
22+
unconstrained_nlp(solver)
23+
multiprecision_nlp(solver, :unc)
24+
end
1725
end
1826
@testset "Bound-constrained solvers" begin
1927
@testset "$solver" for solver in [tron]

0 commit comments

Comments
 (0)