JuliaSmoothOptimizers · d-monnet · Apr 13, 2024 · Apr 5, 2024 · Apr 5, 2024 · Apr 8, 2024
diff --git a/src/fomo.jl b/src/fomo.jl
@@ -20,6 +20,9 @@ mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax
 and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied:
 (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (1)
 ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖       (2)
+In the nonmonotone case, (1) rewrites
+(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk + (fm - fk)/μk ≥ θ1 * ‖∇f(xk)‖²,
+with fm the greatest objective value over the last M successful iterations, and fk = f(xk).
 
 # Advanced usage
 
@@ -49,6 +52,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `β = T(0.9) ∈ [0,1)`: target decay rate for the momentum.
 - `θ1 = T(0.1)`: momentum contribution parameter for convergence condition (1).
 - `θ2 = T(eps(T)^(1/3))`: momentum contribution parameter for convergence condition (2). 
+- `M = 1` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
@@ -107,28 +111,35 @@ mutable struct FomoSolver{T, V} <: AbstractFirstOrderSolver
   m::V
   d::V
   p::V
+  o::V
   α::T
 end
 
-function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
+function FomoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V}
   x = similar(nlp.meta.x0)
   g = similar(nlp.meta.x0)
   c = similar(nlp.meta.x0)
   m = fill!(similar(nlp.meta.x0), 0)
   d = fill!(similar(nlp.meta.x0), 0)
   p = similar(nlp.meta.x0)
-  return FomoSolver{T, V}(x, g, c, m, d, p, T(0))
+  o = fill!(Vector{T}(undef, M), -Inf)
+  return FomoSolver{T, V}(x, g, c, m, d, p, o, T(0))
 end
 
-@doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
-  solver = FomoSolver(nlp)
+@doc (@doc FomoSolver) function fomo(
+  nlp::AbstractNLPModel{T, V};
+  M::Int = 1,
+  kwargs...,
+) where {T, V}
+  solver = FomoSolver(nlp; M)
   solver_specific = Dict(:avgβmax => T(0.0))
   stats = GenericExecutionStats(nlp; solver_specific = solver_specific)
   return solve!(solver, nlp, stats; kwargs...)
 end
 
 function SolverCore.reset!(solver::FomoSolver{T}) where {T}
   fill!(solver.m, 0)
+  fill!(solver.o, -Inf)
   solver
 end
 
@@ -163,6 +174,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
+- `M = 1` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
@@ -201,14 +213,16 @@ mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
   x::V
   g::V
   c::V
+  o::V
   α::T
 end
 
-function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
+function FoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V}
   x = similar(nlp.meta.x0)
   g = similar(nlp.meta.x0)
   c = similar(nlp.meta.x0)
-  return FoSolver{T, V}(x, g, c, T(0))
+  o = fill!(Vector{T}(undef, M), -Inf)
+  return FoSolver{T, V}(x, g, c, o, T(0))
 end
 
 """
@@ -218,11 +232,12 @@ mutable struct R2Solver{T, V} <: AbstractOptimizationSolver end
 
 Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(
   nlp::AbstractNLPModel;
+  M = 1,
   kwargs...,
 )
 
-@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
-  solver = FoSolver(nlp)
+@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; M::Int = 1, kwargs...) where {T, V}
+  solver = FoSolver(nlp; M)
   stats = GenericExecutionStats(nlp)
   return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
 end
@@ -236,6 +251,7 @@ end
 end
 
 function SolverCore.reset!(solver::FoSolver{T}) where {T}
+  fill!(solver.o, -Inf)
   solver
 end
 
@@ -279,7 +295,13 @@ function SolverCore.solve!(
   d = use_momentum ? solver.d : solver.g # g = d if no momentum
   p = use_momentum ? solver.p : nothing # not used if no momentum
   set_iter!(stats, 0)
-  set_objective!(stats, obj(nlp, x))
+  f0 = obj(nlp, x)
+  set_objective!(stats, f0)
+  obj_mem = solver.o
+  M = length(obj_mem)
+  mem_ind = 0
+  obj_mem[mem_ind+1] = stats.objective
+  max_obj_mem = stats.objective
 
   grad!(nlp, x, ∇fk)
   norm_∇fk = norm(∇fk)
@@ -288,6 +310,9 @@ function SolverCore.solve!(
   solver.α = init_alpha(norm_∇fk, step_backend)
 
   # Stopping criterion: 
+  fmin = min(-one(T), f0) / eps(T)
+  unbounded = f0 < fmin
+
   ϵ = atol + rtol * norm_∇fk
   optimal = norm_∇fk ≤ ϵ
   step_param_name = is_r2 ? "σ" : "Δ"
@@ -321,6 +346,7 @@ function SolverCore.solve!(
       nlp,
       elapsed_time = stats.elapsed_time,
       optimal = optimal,
+      unbounded = unbounded,
       max_eval = max_eval,
       iter = stats.iter,
       max_iter = max_iter,
@@ -341,16 +367,13 @@ function SolverCore.solve!(
   oneT = T(1)
   mdot∇f = T(0) # dot(momentum,∇fk)
   while !done
-    λk = step_mult(solver.α, norm_d, step_backend)
-    c .= x .- λk .* d
+    μk = step_mult(solver.α, norm_d, step_backend)
+    c .= x .- μk .* d
     step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0
-    ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk with momentum, ‖∇fk‖²λk without momentum
+    ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * μk # = dot(d,∇fk) * μk with momentum, ‖∇fk‖²μk without momentum
     fck = obj(nlp, c)
-    if fck == -Inf
-      set_status!(stats, :unbounded)
-      break
-    end
-    ρk = (stats.objective - fck) / ΔTk
+    unbounded = fck < fmin
+    ρk = (max_obj_mem - fck) / (max_obj_mem - stats.objective + ΔTk)
     # Update regularization parameters
     if ρk >= η2
       solver.α = min(αmax, γ2 * solver.α)
@@ -369,13 +392,16 @@ function SolverCore.solve!(
         momentum .= ∇fk .* (oneT - β) .+ momentum .* β
       end
       set_objective!(stats, fck)
+      mem_ind = (mem_ind+1) % M
+      obj_mem[mem_ind+1] = stats.objective
+      max_obj_mem = maximum(obj_mem)
+
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if use_momentum
         mdot∇f = dot(momentum, ∇fk)
         p .= momentum .- ∇fk
-        diff_norm = norm(p)
-        βmax = find_beta(diff_norm, mdot∇f, norm_∇fk, β, θ1, θ2)
+        βmax = find_beta(p, mdot∇f, norm_∇fk, μk, stats.objective, max_obj_mem, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
         avgβmax += βmax
@@ -406,6 +432,7 @@ function SolverCore.solve!(
         nlp,
         elapsed_time = stats.elapsed_time,
         optimal = optimal,
+        unbounded = unbounded,
         max_eval = max_eval,
         iter = stats.iter,
         max_iter = max_iter,
@@ -429,18 +456,29 @@ function SolverCore.solve!(
 end
 
 """
-    find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
+    find_beta(m, mdot∇f, norm_∇f, μk, fk, max_obj_mem, β, θ1, θ2)
 
-Compute value `βmax` that saturates the contribution of the momentum term to the gradient.
-`βmax` is computed such that the two gradient-related conditions are ensured: 
-1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖²
+Compute βmax which saturates the contibution of the momentum term to the gradient.
+`βmax` is computed such that the two gradient-related conditions (first one is relaxed in the nonmonotone case) are ensured: 
+1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm + (max_obj_mem - fk)/μk ≥ θ1 * ‖∇f(xk)‖²
 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖
-with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` 
+with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm`, `fk` the model at s=0, `max_obj_mem` the greatest value of objective over the last M successful iterations.
 """
-function find_beta(diff_norm::T, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T}
+function find_beta(
+  p::V,
+  mdot∇f::T,
+  norm_∇f::T,
+  μk::T,
+  fk::T,
+  max_obj_mem::T,
+  β::T,
+  θ1::T,
+  θ2::T,
+) where {T, V}
   n1 = norm_∇f^2 - mdot∇f
-  β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / n1 : β
-  β2 = diff_norm != 0 ? (1 - θ2) * norm_∇f / diff_norm : β
+  n2 = norm(p)
+  β1 = n1 > 0 ? ((1 - θ1) * norm_∇f^2 - (fk - max_obj_mem) / μk) / n1 : β
+  β2 = n2 != 0 ? (1 - θ2) * norm_∇f / n2 : β
   return min(β, min(β1, β2))
 end
 

diff --git a/src/lbfgs.jl b/src/lbfgs.jl
@@ -153,13 +153,16 @@ function SolverCore.solve!(
   verbose > 0 && @info log_row(Any[stats.iter, f, ∇fNorm, T, Int])
 
   optimal = ∇fNorm ≤ ϵ
+  fmin = min(-one(T), f) / eps(T)
+  unbounded = f < fmin
 
   set_status!(
     stats,
     get_status(
       nlp,
       elapsed_time = stats.elapsed_time,
       optimal = optimal,
+      unbounded = unbounded,
       max_eval = max_eval,
       iter = stats.iter,
       max_iter = max_iter,
@@ -210,13 +213,15 @@ function SolverCore.solve!(
     set_time!(stats, time() - start_time)
     set_dual_residual!(stats, ∇fNorm)
     optimal = ∇fNorm ≤ ϵ
+    unbounded = f < fmin
 
     set_status!(
       stats,
       get_status(
         nlp,
         elapsed_time = stats.elapsed_time,
         optimal = optimal,
+        unbounded = unbounded,
         max_eval = max_eval,
         iter = stats.iter,
         max_iter = max_iter,

diff --git a/src/trunk.jl b/src/trunk.jl
@@ -196,6 +196,8 @@ function SolverCore.solve!(
   set_objective!(stats, f)
   set_dual_residual!(stats, ∇fNorm2)
   optimal = ∇fNorm2 ≤ ϵ
+  fmin = min(-one(T), f) / eps(T)
+  unbounded = f < fmin
 
   verbose > 0 && @info log_header(
     [:iter, :f, :dual, :radius, :ratio, :inner, :bk, :cgstatus],
@@ -210,6 +212,7 @@ function SolverCore.solve!(
       nlp,
       elapsed_time = stats.elapsed_time,
       optimal = optimal,
+      unbounded = unbounded,
       max_eval = max_eval,
       iter = stats.iter,
       max_iter = max_iter,
@@ -381,13 +384,15 @@ function SolverCore.solve!(
     update!(tr, sNorm)
 
     optimal = ∇fNorm2 ≤ ϵ
+    unbounded = f < fmin
 
     set_status!(
       stats,
       get_status(
         nlp,
         elapsed_time = stats.elapsed_time,
         optimal = optimal,
+        unbounded = unbounded,
         max_eval = max_eval,
         iter = stats.iter,
         max_iter = max_iter,

diff --git a/test/allocs.jl b/test/allocs.jl
@@ -35,7 +35,11 @@ if Sys.isunix()
       for model in NLPModelsTest.nlp_problems
         nlp = eval(Meta.parse(model))()
         if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver))
-          solver = eval(symsolver)(nlp)
+          if (symsolver == :FoSolver || symsolver == :FomoSolver)
+            solver = eval(symsolver)(nlp; M = 2) # nonmonotone configuration allocates extra memory
+          else
+            solver = eval(symsolver)(nlp)
+          end
           if symsolver == :FomoSolver
             T = eltype(nlp.meta.x0)
             stats = GenericExecutionStats(nlp, solver_specific = Dict(:avgβmax => T(0)))

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -35,6 +35,19 @@ end
   end
 end
 
+@testset "Test unbounded below" begin
+  @testset "$fun" for fun in (R2, fomo, lbfgs, tron, trunk)
+    T = Float64
+    x0 = [T(0)]
+    f(x) = -exp(x[1])
+    nlp = ADNLPModel(f, x0)
+
+    stats = eval(fun)(nlp)
+    @test stats.status == :unbounded
+    @test stats.objective < -one(T) / eps(T)
+  end
+end
+
 include("restart.jl")
 include("callback.jl")
 include("consistency.jl")

diff --git a/test/test_solvers.jl b/test/test_solvers.jl
@@ -14,6 +14,14 @@ function tests()
         unconstrained_nlp(solver)
         multiprecision_nlp(solver, :unc)
       end
+      @testset "$name : nonmonotone configuration" for (name, solver) in [
+        ("R2", (nlp; kwargs...) -> R2(nlp, M = 2; kwargs...)),
+        ("fomo_r2", (nlp; kwargs...) -> fomo(nlp, M = 2; kwargs...)),
+        ("fomo_tr", (nlp; kwargs...) -> fomo(nlp, M = 2, step_backend = JSOSolvers.tr_step(); kwargs...)),
+      ]
+        unconstrained_nlp(solver)
+        multiprecision_nlp(solver, :unc)
+      end
     end
     @testset "Bound-constrained solvers" begin
       @testset "$solver" for solver in [tron]