From 544c28839cfdaef3238690b92571f7f9b7e9734c Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 21 Dec 2023 12:04:12 -0500
Subject: [PATCH 001/171] add fomo solver

---
 docs/src/solvers.md  |   4 +-
 src/JSOSolvers.jl    |   1 +
 src/fomo.jl          | 288 +++++++++++++++++++++++++++++++++++++++++++
 test/allocs.jl       |   2 +-
 test/callback.jl     |   5 +
 test/consistency.jl  |   5 +-
 test/restart.jl      |   2 +
 test/runtests.jl     |   2 +-
 test/test_solvers.jl |   1 +
 9 files changed, 305 insertions(+), 5 deletions(-)
 create mode 100644 src/fomo.jl

diff --git a/docs/src/solvers.md b/docs/src/solvers.md
index 06fe0eed..322f7c2e 100644
--- a/docs/src/solvers.md
+++ b/docs/src/solvers.md
@@ -6,10 +6,11 @@
 - [`tron`](@ref)
 - [`trunk`](@ref)
 - [`R2`](@ref)
+- [`fomo`](@ref)
 
 | Problem type          | Solvers  |
 | --------------------- | -------- |
-| Unconstrained NLP     | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref)|
+| Unconstrained NLP     | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref), [`fomo`](@ref)|
 | Unconstrained NLS     | [`trunk`](@ref), [`tron`](@ref) |
 | Bound-constrained NLP | [`tron`](@ref) |
 | Bound-constrained NLS | [`tron`](@ref) |
@@ -21,4 +22,5 @@ lbfgs
 tron
 trunk
 R2
+fomo
 ```
diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl
index cd65c9b2..79abace3 100644
--- a/src/JSOSolvers.jl
+++ b/src/JSOSolvers.jl
@@ -14,6 +14,7 @@ export solve!
 include("lbfgs.jl")
 include("trunk.jl")
 include("R2.jl")
+include("fomo.jl")
 
 # Unconstrained solvers for NLS
 include("trunkls.jl")
diff --git a/src/fomo.jl b/src/fomo.jl
new file mode 100644
index 00000000..711e7fa3
--- /dev/null
+++ b/src/fomo.jl
@@ -0,0 +1,288 @@
+export fomo, FomoSolver, tr, qr
+
+abstract type AbstractFomoMethod end
+
+struct tr <: AbstractFomoMethod end
+struct qr <: AbstractFomoMethod end
+
+"""
+    fomo(nlp; kwargs...)
+
+A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods.
+
+For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
+
+    solver = FomoSolver(nlp)
+    solve!(solver, nlp; kwargs...)
+
+# Arguments
+- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
+
+# Keyword arguments 
+- `x::V = nlp.meta.x0`: the initial guess.
+- `atol::T = √eps(T)`: absolute tolerance.
+- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
+- `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters.
+- `κg = T(0.8)` : maximum contribution of momentum term to the gradient, ||∇f-g||≤κg||g|| with g = (1-β)∇f + β m, with m memory of past gradients. Must satisfy 0 < κg < 1 - η2.
+- `γ1 = T(0.8)`, `γ2 = T(1.2)`: regularization update parameters.
+- `αmax = 1/eps(T)`: step parameter for fomo algorithm.
+- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
+- `max_time::Float64 = 30.0`: maximum time limit in seconds.
+- `max_iter::Int = typemax(Int)`: maximum number of iterations.
+- `β = T(0) ∈ [0,1)` : constant in the momentum term.
+- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
+- `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region
+
+# Output
+The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
+
+# Callback
+The callback is called at each iteration.
+The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
+Changing any of the input arguments will affect the subsequent iterations.
+In particular, setting `stats.status = :user` will stop the algorithm.
+All relevant information should be available in `nlp` and `solver`.
+Notably, you can access, and modify, the following:
+- `solver.x`: current iterate;
+- `solver.gx`: current gradient;
+- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things:
+  - `stats.dual_feas`: norm of current gradient;
+  - `stats.iter`: current iteration counter;
+  - `stats.objective`: current objective function value;
+  - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
+  - `stats.elapsed_time`: elapsed time in seconds.
+
+# Examples
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+stats = fomo(nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+solver = FomoSolver(nlp);
+stats = solve!(solver, nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+"""
+mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver
+  x::V
+  g::V
+  c::V
+  m::V
+end
+
+function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
+  x = similar(nlp.meta.x0)
+  g = similar(nlp.meta.x0)
+  c = similar(nlp.meta.x0)
+  m = fill!(similar(nlp.meta.x0), 0)
+  return FomoSolver{T, V}(x, g, c, m)
+end
+
+@doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+  solver = FomoSolver(nlp)
+  return solve!(solver, nlp; kwargs...)
+end
+
+function SolverCore.reset!(solver::FomoSolver{T}) where {T}
+  fill!(solver.m,0)
+  solver
+end
+SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
+
+function SolverCore.solve!(
+  solver::FomoSolver{T, V},
+  nlp::AbstractNLPModel{T, V},
+  stats::GenericExecutionStats{T, V};
+  callback = (args...) -> nothing,
+  x::V = nlp.meta.x0,
+  atol::T = √eps(T),
+  rtol::T = √eps(T),
+  η1 = eps(T)^(1 / 4),
+  η2 = T(0.2),
+  κg = T(0.8),
+  γ1 = T(0.5),
+  γ2 = T(2),
+  αmax = 1/eps(T),
+  max_time::Float64 = 30.0,
+  max_eval::Int = -1,
+  max_iter::Int = typemax(Int),
+  β::T = T(0.9),
+  verbose::Int = 0,
+  backend = qr()
+) where {T, V}
+  unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
+
+  reset!(stats)
+  start_time = time()
+  set_time!(stats, 0.0)
+
+  x = solver.x .= x
+  ∇fk = solver.g
+  c = solver.c
+  m = solver.m
+
+  set_iter!(stats, 0)
+  set_objective!(stats, obj(nlp, x))
+
+  grad!(nlp, x, ∇fk)
+  norm_∇fk = norm(∇fk)
+  set_dual_residual!(stats, norm_∇fk)
+
+  αk = init_alpha(norm_∇fk,backend)
+  
+  # Stopping criterion: 
+  ϵ = atol + rtol * norm_∇fk
+  optimal = norm_∇fk ≤ ϵ
+  if optimal
+    @info("Optimal point found at initial point")
+    @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
+    @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk
+  end
+  if verbose > 0 && mod(stats.iter, verbose) == 0
+    @info @sprintf "%5s  %9s  %7s  %7s  %7s" "iter" "f" "‖∇f‖" "α" "staβ"
+    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN
+  end
+
+  set_status!(
+    stats,
+    get_status(
+      nlp,
+      elapsed_time = stats.elapsed_time,
+      optimal = optimal,
+      max_eval = max_eval,
+      iter = stats.iter,
+      max_iter = max_iter,
+      max_time = max_time,
+    ),
+  )
+
+  callback(nlp, solver, stats)
+
+  done = stats.status != :unknown
+
+  while !done
+    λk = step_mult(αk,norm_∇fk,backend)
+    if β == 0
+      c .= x .- λk .* (∇fk)
+    else
+      satβ = find_beta(β, κg, m, ∇fk)
+      c .= x .- λk .* (∇fk .* (T(1) - satβ) .+ m .* satβ)
+      m .= ∇fk .* (T(1) - β) .+ m .* β
+    end
+    ΔTk = norm_∇fk^2 * λk
+    fck = obj(nlp, c)
+    if fck == -Inf
+      set_status!(stats, :unbounded)
+      break
+    end
+
+    ρk = (stats.objective - fck) / ΔTk
+
+    # Update regularization parameters
+    if ρk >= η2
+      αk = min(αmax, γ2 * αk)
+    elseif ρk < η1
+      αk = αk * γ1
+    end
+
+    # Acceptance of the new candidate
+    if ρk >= η1
+      x .= c
+      set_objective!(stats, fck)
+      grad!(nlp, x, ∇fk)
+      norm_∇fk = norm(∇fk)
+    end
+
+    set_iter!(stats, stats.iter + 1)
+    set_time!(stats, time() - start_time)
+    set_dual_residual!(stats, norm_∇fk)
+    optimal = norm_∇fk ≤ ϵ
+
+    if verbose > 0 && mod(stats.iter, verbose) == 0
+      @info infoline
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk satβ
+    end
+
+    set_status!(
+      stats,
+      get_status(
+        nlp,
+        elapsed_time = stats.elapsed_time,
+        optimal = optimal,
+        max_eval = max_eval,
+        iter = stats.iter,
+        max_iter = max_iter,
+        max_time = max_time,
+      ),
+    )
+
+    callback(nlp, solver, stats)
+
+    done = stats.status != :unknown
+  end
+
+  set_solution!(stats, x)
+  return stats
+end
+
+"""
+  find_beta(β,κg,d,∇f;tol=0.01)
+
+Compute satβ which saturates the contibution of the momentum term to the gradient.
+Use bisection method to solve satβ * ||∇f .- d|| = κg * ||(1-satβ) .* ∇f + satβ .* d|| where d is the momentum term.
+""" 
+function find_beta(β::T,κg::T,d::V,∇f::V;tol=0.01) where {T,V}
+  if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0.
+    return β
+  end
+  a = T(0)
+  b = β 
+  while b-a > tol
+    β = (b+a) / 2
+    if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + b .* d) <= 0     
+      a = β
+    else
+      b = β
+    end
+  end
+  return β
+end
+
+"""
+  init_alpha(norm_∇fk::T, ::qr)
+  init_alpha(norm_∇fk::T, ::tr)
+
+Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
+"""
+function init_alpha(norm_∇fk::T, ::qr) where{T}
+  1/2^round(log2(norm_∇fk + 1))
+end
+
+function init_alpha(norm_∇fk::T, ::tr) where{T}
+  norm_∇fk/2^round(log2(norm_∇fk + 1))
+end
+
+"""
+  step_mult(αk::T, norm_∇fk::T, ::qr)
+  step_mult(αk::T, norm_∇fk::T, ::tr)
+
+Compute step size multiplier: `αk` for quadratic regularization(`::qr`) and `αk/norm_∇fk` for trust region (`::tr`).
+"""
+function step_mult(αk::T, norm_∇fk::T, ::qr) where{T}
+  αk
+end
+
+function step_mult(αk::T, norm_∇fk::T, ::tr) where{T}
+  αk/norm_∇fk
+end
\ No newline at end of file
diff --git a/test/allocs.jl b/test/allocs.jl
index 88a70f5f..5906ef84 100644
--- a/test/allocs.jl
+++ b/test/allocs.jl
@@ -30,7 +30,7 @@ end
 
 if Sys.isunix()
   @testset "Allocation tests" begin
-    @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :TrunkSolver, :TronSolver)
+    @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver)
       for model in NLPModelsTest.nlp_problems
         nlp = eval(Meta.parse(model))()
         if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver))
diff --git a/test/callback.jl b/test/callback.jl
index db6177cc..187e0be1 100644
--- a/test/callback.jl
+++ b/test/callback.jl
@@ -31,6 +31,11 @@ using ADNLPModels, JSOSolvers, LinearAlgebra, Logging #, Plots
     tron(nlp, callback = cb)
   end
   @test stats.iter == 8
+
+  stats = with_logger(NullLogger()) do
+    fomo(nlp, callback = cb)
+  end
+  @test stats.iter == 8
 end
 
 @testset "Test callback for NLS" begin
diff --git a/test/consistency.jl b/test/consistency.jl
index af115661..321f798d 100644
--- a/test/consistency.jl
+++ b/test/consistency.jl
@@ -10,8 +10,9 @@ function consistency()
   @testset "Consistency" begin
     args = Pair{Symbol, Number}[:atol => 1e-6, :rtol => 1e-6, :max_eval => 20000, :max_time => 60.0]
 
-    @testset "NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2]
+    @testset "NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo]
       with_logger(NullLogger()) do
+        reset!(unlp)
         stats = mtd(unlp; args...)
         @test stats isa GenericExecutionStats
         @test stats.status == :first_order
@@ -27,7 +28,7 @@ function consistency()
       end
     end
 
-    @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2]
+    @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo]
       with_logger(NullLogger()) do
         stats = mtd(qnlp; args...)
         @test stats isa GenericExecutionStats
diff --git a/test/restart.jl b/test/restart.jl
index 02d90902..98f82103 100644
--- a/test/restart.jl
+++ b/test/restart.jl
@@ -1,5 +1,6 @@
 @testset "Test restart with a different initial guess: $fun" for (fun, s) in (
   (:R2, :R2Solver),
+  (:fomo, :FomoSolver),
   (:lbfgs, :LBFGSSolver),
   (:tron, :TronSolver),
   (:trunk, :TrunkSolver),
@@ -44,6 +45,7 @@ end
 
 @testset "Test restart with a different problem: $fun" for (fun, s) in (
   (:R2, :R2Solver),
+  (:fomo, :FomoSolver),
   (:lbfgs, :LBFGSSolver),
   (:tron, :TronSolver),
   (:trunk, :TrunkSolver),
diff --git a/test/runtests.jl b/test/runtests.jl
index de0295ed..bb41eeba 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -18,7 +18,7 @@ using JSOSolvers
 end
 
 @testset "Test iteration limit" begin
-  @testset "$fun" for fun in (R2, lbfgs, tron, trunk)
+  @testset "$fun" for fun in (R2, fomo, lbfgs, tron, trunk)
     f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2
     nlp = ADNLPModel(f, [-1.2; 1.0])
 
diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index cb41e83e..ddad51e8 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -8,6 +8,7 @@ function tests()
         ("lbfgs", lbfgs),
         ("tron", tron),
         ("R2", R2),
+        ("fomo", fomo),
       ]
         unconstrained_nlp(solver)
         multiprecision_nlp(solver, :unc)

From 9aeca32232477fbc9d52b6a95c0c065075438204 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 21 Dec 2023 12:32:03 -0500
Subject: [PATCH 002/171] fix consistency test

---
 test/consistency.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/consistency.jl b/test/consistency.jl
index 321f798d..fb725b5b 100644
--- a/test/consistency.jl
+++ b/test/consistency.jl
@@ -30,6 +30,7 @@ function consistency()
 
     @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo]
       with_logger(NullLogger()) do
+        reset!(qnlp)
         stats = mtd(qnlp; args...)
         @test stats isa GenericExecutionStats
         @test stats.status == :first_order

From 2a2bbbf270f9314991acc100fed67eafa7f89e73 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 28 Dec 2023 16:54:39 -0500
Subject: [PATCH 003/171] fix update rule, fix find_beta algo

---
 src/fomo.jl | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 711e7fa3..41e74145 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -151,7 +151,7 @@ function SolverCore.solve!(
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
     @info @sprintf "%5s  %9s  %7s  %7s  %7s" "iter" "f" "‖∇f‖" "α" "staβ"
-    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN
+    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk 0
   end
 
   set_status!(
@@ -171,14 +171,13 @@ function SolverCore.solve!(
 
   done = stats.status != :unknown
 
+  satβ = T(0)
   while !done
     λk = step_mult(αk,norm_∇fk,backend)
     if β == 0
       c .= x .- λk .* (∇fk)
     else
-      satβ = find_beta(β, κg, m, ∇fk)
       c .= x .- λk .* (∇fk .* (T(1) - satβ) .+ m .* satβ)
-      m .= ∇fk .* (T(1) - β) .+ m .* β
     end
     ΔTk = norm_∇fk^2 * λk
     fck = obj(nlp, c)
@@ -186,9 +185,9 @@ function SolverCore.solve!(
       set_status!(stats, :unbounded)
       break
     end
-
+    
     ρk = (stats.objective - fck) / ΔTk
-
+    
     # Update regularization parameters
     if ρk >= η2
       αk = min(αmax, γ2 * αk)
@@ -199,9 +198,15 @@ function SolverCore.solve!(
     # Acceptance of the new candidate
     if ρk >= η1
       x .= c
+      if β!=0
+        m .= ∇fk .* (T(1) - β) .+ m .* β
+      end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
+      if β!=0
+        satβ = find_beta(β, κg, m, ∇fk)
+      end
     end
 
     set_iter!(stats, stats.iter + 1)
@@ -250,13 +255,13 @@ function find_beta(β::T,κg::T,d::V,∇f::V;tol=0.01) where {T,V}
   b = β 
   while b-a > tol
     β = (b+a) / 2
-    if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + b .* d) <= 0     
+    if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0     
       a = β
     else
       b = β
     end
   end
-  return β
+  return a
 end
 
 """

From 52a4e16f06d3005a6801e0d0208b8c207d480037 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Fri, 5 Jan 2024 13:37:59 -0500
Subject: [PATCH 004/171] modify with gradient related strategy

---
 src/fomo.jl | 69 +++++++++++++++++++++++++++++------------------------
 1 file changed, 38 insertions(+), 31 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 41e74145..db77937e 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -23,8 +23,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters.
-- `κg = T(0.8)` : maximum contribution of momentum term to the gradient, ||∇f-g||≤κg||g|| with g = (1-β)∇f + β m, with m memory of past gradients. Must satisfy 0 < κg < 1 - η2.
-- `γ1 = T(0.8)`, `γ2 = T(1.2)`: regularization update parameters.
+- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
 - `αmax = 1/eps(T)`: step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
@@ -79,6 +78,7 @@ mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver
   g::V
   c::V
   m::V
+  d::V
 end
 
 function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
@@ -86,7 +86,8 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   g = similar(nlp.meta.x0)
   c = similar(nlp.meta.x0)
   m = fill!(similar(nlp.meta.x0), 0)
-  return FomoSolver{T, V}(x, g, c, m)
+  d = fill!(similar(nlp.meta.x0), 0)
+  return FomoSolver{T, V}(x, g, c, m, d)
 end
 
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
@@ -109,8 +110,7 @@ function SolverCore.solve!(
   atol::T = √eps(T),
   rtol::T = √eps(T),
   η1 = eps(T)^(1 / 4),
-  η2 = T(0.2),
-  κg = T(0.8),
+  η2 = T(0.95),
   γ1 = T(0.5),
   γ2 = T(2),
   αmax = 1/eps(T),
@@ -131,7 +131,7 @@ function SolverCore.solve!(
   ∇fk = solver.g
   c = solver.c
   m = solver.m
-
+  d = solver.d
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
@@ -171,15 +171,23 @@ function SolverCore.solve!(
 
   done = stats.status != :unknown
 
+  d .= ∇fk
+  norm_d = norm_∇fk
   satβ = T(0)
+  ρk = T(0)
   while !done
-    λk = step_mult(αk,norm_∇fk,backend)
-    if β == 0
-      c .= x .- λk .* (∇fk)
-    else
-      c .= x .- λk .* (∇fk .* (T(1) - satβ) .+ m .* satβ)
-    end
-    ΔTk = norm_∇fk^2 * λk
+    # if β!=0
+    #   satβ = find_beta(β, m, ∇fk, norm_∇fk)
+    #   d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
+    #   m .= ∇fk .* (T(1) - β) .+ m .* β
+    #   norm_d = norm(d)
+    # else
+    #   d .= ∇fk
+    #   norm_d = norm_∇fk
+    # end
+    λk = step_mult(αk,norm_d,backend)
+    c .= x .- λk .* d
+    ΔTk = norm_∇fk^2 *λk
     fck = obj(nlp, c)
     if fck == -Inf
       set_status!(stats, :unbounded)
@@ -187,6 +195,7 @@ function SolverCore.solve!(
     end
     
     ρk = (stats.objective - fck) / ΔTk
+    # ρk = (1-β) * (stats.objective - fck) / ΔTk +β * ρk
     
     # Update regularization parameters
     if ρk >= η2
@@ -204,9 +213,15 @@ function SolverCore.solve!(
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
-      if β!=0
-        satβ = find_beta(β, κg, m, ∇fk)
+      if β!= 0
+        satβ = find_beta(β, m, ∇fk, norm_∇fk)
+        d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
+        norm_d = norm(d)
+      else
+        d .= ∇fk
+        norm_d = norm_∇fk
       end
+      
     end
 
     set_iter!(stats, stats.iter + 1)
@@ -216,7 +231,7 @@ function SolverCore.solve!(
 
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk satβ
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk satβ
     end
 
     set_status!(
@@ -242,26 +257,18 @@ function SolverCore.solve!(
 end
 
 """
-  find_beta(β,κg,d,∇f;tol=0.01)
+  find_beta(β,m,∇f,norm_∇f,θ)
 
 Compute satβ which saturates the contibution of the momentum term to the gradient.
-Use bisection method to solve satβ * ||∇f .- d|| = κg * ||(1-satβ) .* ∇f + satβ .* d|| where d is the momentum term.
+satβ is computed such that m.∇f > θ * norm_∇f^2
 """ 
-function find_beta(β::T,κg::T,d::V,∇f::V;tol=0.01) where {T,V}
-  if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0.
+function find_beta(β::T,m::V,∇f::V,norm_∇f::T;θ = T(1e-1)) where {T,V}
+  dotprod = dot(m,∇f)
+  if dotprod > θ * norm_∇f^2
     return β
+  else
+    return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β)
   end
-  a = T(0)
-  b = β 
-  while b-a > tol
-    β = (b+a) / 2
-    if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0     
-      a = β
-    else
-      b = β
-    end
-  end
-  return a
 end
 
 """

From 1423d51e00502ead41adb720be9df759c6bc7bbd Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Fri, 5 Jan 2024 15:11:20 -0500
Subject: [PATCH 005/171] fix model decrease computation

---
 src/fomo.jl | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index db77937e..e2b582e8 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -176,18 +176,9 @@ function SolverCore.solve!(
   satβ = T(0)
   ρk = T(0)
   while !done
-    # if β!=0
-    #   satβ = find_beta(β, m, ∇fk, norm_∇fk)
-    #   d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
-    #   m .= ∇fk .* (T(1) - β) .+ m .* β
-    #   norm_d = norm(d)
-    # else
-    #   d .= ∇fk
-    #   norm_d = norm_∇fk
-    # end
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
-    ΔTk = norm_∇fk^2 *λk
+    ΔTk = dot(∇fk , d) * λk
     fck = obj(nlp, c)
     if fck == -Inf
       set_status!(stats, :unbounded)

From 7f6727beb399c7abccee4ff682cf192f6b23fb5a Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Mon, 15 Jan 2024 15:48:56 -0500
Subject: [PATCH 006/171] fix find_beta function

---
 src/fomo.jl | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index e2b582e8..ed3a1340 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -175,6 +175,7 @@ function SolverCore.solve!(
   norm_d = norm_∇fk
   satβ = T(0)
   ρk = T(0)
+  #μ = αk
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
@@ -184,10 +185,7 @@ function SolverCore.solve!(
       set_status!(stats, :unbounded)
       break
     end
-    
     ρk = (stats.objective - fck) / ΔTk
-    # ρk = (1-β) * (stats.objective - fck) / ΔTk +β * ρk
-    
     # Update regularization parameters
     if ρk >= η2
       αk = min(αmax, γ2 * αk)
@@ -199,8 +197,11 @@ function SolverCore.solve!(
     if ρk >= η1
       x .= c
       if β!=0
+        #μ = αk * (T(1) - β) + αk * β
+        #m .= (αk/μ) .* ∇fk .* (T(1) - β) .+ m .* β
         m .= ∇fk .* (T(1) - β) .+ m .* β
       end
+      #αk = μ
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
@@ -222,7 +223,7 @@ function SolverCore.solve!(
 
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk satβ
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk satβ
     end
 
     set_status!(
@@ -255,10 +256,11 @@ satβ is computed such that m.∇f > θ * norm_∇f^2
 """ 
 function find_beta(β::T,m::V,∇f::V,norm_∇f::T;θ = T(1e-1)) where {T,V}
   dotprod = dot(m,∇f)
-  if dotprod > θ * norm_∇f^2
+  if (1-β)*norm_∇f^2 + β*dotprod > θ * norm_∇f^2
     return β
   else
-    return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β)
+    return ((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod)
+    #return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β)
   end
 end
 

From 880759d4fd295d380029a2a5653ecd558d33e578 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 25 Jan 2024 11:53:34 -0500
Subject: [PATCH 007/171] fix null step size issue

---
 src/fomo.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index ed3a1340..a0a543ee 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -238,7 +238,8 @@ function SolverCore.solve!(
         max_time = max_time,
       ),
     )
-
+    
+    αk == 0 && set_status!(stats,:exception)
     callback(nlp, solver, stats)
 
     done = stats.status != :unknown

From bd2b8f598ca04e86975ed8898953489f7a2086d5 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 25 Jan 2024 12:33:40 -0500
Subject: [PATCH 008/171] fix test

---
 test/test_solvers.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index ddad51e8..ba182731 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -8,7 +8,8 @@ function tests()
         ("lbfgs", lbfgs),
         ("tron", tron),
         ("R2", R2),
-        ("fomo", fomo),
+        ("fomo_r2", fomo),
+        ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,backend = JSOSolvers.tr(); kwargs...)),
       ]
         unconstrained_nlp(solver)
         multiprecision_nlp(solver, :unc)

From 3ce7b96a2a3fac2191b5f767473043af698a9c55 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 25 Jan 2024 12:49:29 -0500
Subject: [PATCH 009/171] update docstring

---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index a0a543ee..215d2d94 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -28,7 +28,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `β = T(0) ∈ [0,1)` : constant in the momentum term.
+- `β = T(0) ∈ [0,1)` : decay rate for the momentum.
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region
 

From 91890772c105762b75626998d23d0ad1cf753901 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Mon, 29 Jan 2024 15:14:16 -0500
Subject: [PATCH 010/171] add average sat beta to genericexecutionstat

---
 src/fomo.jl | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 215d2d94..af4f521f 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -92,7 +92,9 @@ end
 
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FomoSolver(nlp)
-  return solve!(solver, nlp; kwargs...)
+  solver_specific = Dict(:avgsatβ => T(0.))
+  stats = GenericExecutionStats(nlp;solver_specific=solver_specific)
+  return solve!(solver, nlp, stats; kwargs...)
 end
 
 function SolverCore.reset!(solver::FomoSolver{T}) where {T}
@@ -175,6 +177,8 @@ function SolverCore.solve!(
   norm_d = norm_∇fk
   satβ = T(0)
   ρk = T(0)
+  avgsatβ = T(0.)
+  siter = 0
   #μ = αk
   while !done
     λk = step_mult(αk,norm_d,backend)
@@ -213,7 +217,8 @@ function SolverCore.solve!(
         d .= ∇fk
         norm_d = norm_∇fk
       end
-      
+      avgsatβ += satβ
+      siter += 1
     end
 
     set_iter!(stats, stats.iter + 1)
@@ -245,6 +250,8 @@ function SolverCore.solve!(
     done = stats.status != :unknown
   end
 
+  avgsatβ /= siter
+  stats.solver_specific[:avgsatβ] = avgsatβ
   set_solution!(stats, x)
   return stats
 end

From 2b8e3498e3a2ca07268ecd7c59b681d4402cc51c Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 30 Jan 2024 15:14:18 -0500
Subject: [PATCH 011/171] add theta param as key arg

---
 src/fomo.jl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index af4f521f..a746483e 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -29,6 +29,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0) ∈ [0,1)` : decay rate for the momentum.
+- `θ = T(0.1)` : momentum contribution restriction parameter. [(1-β)∇f(xk) + β mk].[∇f(xk)] ≥ θ||∇f(xk)||², with mk memory of past gradient. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region
 
@@ -120,6 +121,7 @@ function SolverCore.solve!(
   max_eval::Int = -1,
   max_iter::Int = typemax(Int),
   β::T = T(0.9),
+  θ::T = T(0.1),
   verbose::Int = 0,
   backend = qr()
 ) where {T, V}
@@ -210,7 +212,7 @@ function SolverCore.solve!(
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if β!= 0
-        satβ = find_beta(β, m, ∇fk, norm_∇fk)
+        satβ = find_beta(β, m, ∇fk, norm_∇fk, θ)
         d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
         norm_d = norm(d)
       else
@@ -262,13 +264,12 @@ end
 Compute satβ which saturates the contibution of the momentum term to the gradient.
 satβ is computed such that m.∇f > θ * norm_∇f^2
 """ 
-function find_beta(β::T,m::V,∇f::V,norm_∇f::T;θ = T(1e-1)) where {T,V}
+function find_beta(β::T,m::V,∇f::V,norm_∇f::T, θ::T) where {T,V}
   dotprod = dot(m,∇f)
   if (1-β)*norm_∇f^2 + β*dotprod > θ * norm_∇f^2
     return β
   else
     return ((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod)
-    #return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β)
   end
 end
 

From 2ca4813b1a3a9d523213e3b747c8a971444abd24 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 1 Feb 2024 12:25:34 -0500
Subject: [PATCH 012/171] update convergence conditions in find_beta. add
 satbeta decrease strategy if iteration is unsuccessful.

---
 src/fomo.jl | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index a746483e..557b1a29 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -24,12 +24,14 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters.
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
+- `γ3 = T(1/2)` : momentum factor satβ update parameter in case of unsuccessful iteration.
 - `αmax = 1/eps(T)`: step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `β = T(0) ∈ [0,1)` : decay rate for the momentum.
-- `θ = T(0.1)` : momentum contribution restriction parameter. [(1-β)∇f(xk) + β mk].[∇f(xk)] ≥ θ||∇f(xk)||², with mk memory of past gradient. 
+- `β = T(0) ∈ [0,1)` : target decay rate for the momentum.
+- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β].
+- `θ2 = T(1e-5)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region
 
@@ -116,12 +118,14 @@ function SolverCore.solve!(
   η2 = T(0.95),
   γ1 = T(0.5),
   γ2 = T(2),
+  γ3 = T(1/2),
   αmax = 1/eps(T),
   max_time::Float64 = 30.0,
   max_eval::Int = -1,
   max_iter::Int = typemax(Int),
   β::T = T(0.9),
-  θ::T = T(0.1),
+  θ1::T = T(1e-5),
+  θ2::T = T(1e-5),
   verbose::Int = 0,
   backend = qr()
 ) where {T, V}
@@ -181,6 +185,7 @@ function SolverCore.solve!(
   ρk = T(0)
   avgsatβ = T(0.)
   siter = 0
+
   #μ = αk
   while !done
     λk = step_mult(αk,norm_d,backend)
@@ -197,6 +202,8 @@ function SolverCore.solve!(
       αk = min(αmax, γ2 * αk)
     elseif ρk < η1
       αk = αk * γ1
+      satβ *= γ3
+      d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
     end
 
     # Acceptance of the new candidate
@@ -212,7 +219,7 @@ function SolverCore.solve!(
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if β!= 0
-        satβ = find_beta(β, m, ∇fk, norm_∇fk, θ)
+        satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
         norm_d = norm(d)
       else
@@ -259,18 +266,16 @@ function SolverCore.solve!(
 end
 
 """
-  find_beta(β,m,∇f,norm_∇f,θ)
+find_beta(m, ∇f, norm_∇f, β, θ1, θ2)
 
 Compute satβ which saturates the contibution of the momentum term to the gradient.
 satβ is computed such that m.∇f > θ * norm_∇f^2
 """ 
-function find_beta(β::T,m::V,∇f::V,norm_∇f::T, θ::T) where {T,V}
+function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   dotprod = dot(m,∇f)
-  if (1-β)*norm_∇f^2 + β*dotprod > θ * norm_∇f^2
-    return β
-  else
-    return ((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod)
-  end
+  β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β
+  β2 = (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f))
+  return min(β,min(β1,β2)) 
 end
 
 """

From 9d7bac2e2c9037614e757eaaffb7e321052d91ac Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 1 Feb 2024 12:57:44 -0500
Subject: [PATCH 013/171] fix possible 0 division in find_beta

---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 557b1a29..ff24afb9 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -274,7 +274,7 @@ satβ is computed such that m.∇f > θ * norm_∇f^2
 function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   dotprod = dot(m,∇f)
   β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β
-  β2 = (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f))
+  β2 = m != ∇f             ? (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f))      : β
   return min(β,min(β1,β2)) 
 end
 

From b9a3aef83905d484febfdf8ba39514dda52ecc9a Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 3 Feb 2024 00:16:09 -0500
Subject: [PATCH 014/171] Misc improvments: - update docstrings - rename qr ->
 r2 - remove dead code

---
 src/fomo.jl | 62 ++++++++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index ff24afb9..6a7628a3 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,9 +1,9 @@
-export fomo, FomoSolver, tr, qr
+export fomo, FomoSolver, tr, r2
 
 abstract type AbstractFomoMethod end
 
 struct tr <: AbstractFomoMethod end
-struct qr <: AbstractFomoMethod end
+struct r2 <: AbstractFomoMethod end
 
 """
     fomo(nlp; kwargs...)
@@ -22,18 +22,18 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `x::V = nlp.meta.x0`: the initial guess.
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
-- `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters.
+- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
 - `γ3 = T(1/2)` : momentum factor satβ update parameter in case of unsuccessful iteration.
 - `αmax = 1/eps(T)`: step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `β = T(0) ∈ [0,1)` : target decay rate for the momentum.
+- `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
 - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β].
-- `θ2 = T(1e-5)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. 
+- `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-- `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region
+- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region
 
 # Output
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -114,20 +114,20 @@ function SolverCore.solve!(
   x::V = nlp.meta.x0,
   atol::T = √eps(T),
   rtol::T = √eps(T),
-  η1 = eps(T)^(1 / 4),
-  η2 = T(0.95),
-  γ1 = T(0.5),
-  γ2 = T(2),
-  γ3 = T(1/2),
-  αmax = 1/eps(T),
+  η1::T = T(eps(T)^(1 / 4)),
+  η2::T = T(0.95),
+  γ1::T = T(1/2),
+  γ2::T = T(2),
+  γ3::T = T(1/2),
+  αmax::T = 1/eps(T),
   max_time::Float64 = 30.0,
   max_eval::Int = -1,
   max_iter::Int = typemax(Int),
   β::T = T(0.9),
-  θ1::T = T(1e-5),
-  θ2::T = T(1e-5),
+  θ1::T = T(0.1),
+  θ2::T = T(eps(T)^(1/3)),
   verbose::Int = 0,
-  backend = qr()
+  backend = r2()
 ) where {T, V}
   unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
 
@@ -183,10 +183,9 @@ function SolverCore.solve!(
   norm_d = norm_∇fk
   satβ = T(0)
   ρk = T(0)
-  avgsatβ = T(0.)
+  avgsatβ = T(0)
   siter = 0
-
-  #μ = αk
+  oneT = T(1)
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
@@ -203,24 +202,21 @@ function SolverCore.solve!(
     elseif ρk < η1
       αk = αk * γ1
       satβ *= γ3
-      d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
+      d .= ∇fk .* (oneT - satβ) .+ m .* satβ
     end
 
     # Acceptance of the new candidate
     if ρk >= η1
       x .= c
       if β!=0
-        #μ = αk * (T(1) - β) + αk * β
-        #m .= (αk/μ) .* ∇fk .* (T(1) - β) .+ m .* β
-        m .= ∇fk .* (T(1) - β) .+ m .* β
+        m .= ∇fk .* (oneT - β) .+ m .* β
       end
-      #αk = μ
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if β!= 0
         satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2)
-        d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
+        d .= ∇fk .* (oneT - satβ) .+ m .* satβ
         norm_d = norm(d)
       else
         d .= ∇fk
@@ -269,22 +265,26 @@ end
 find_beta(m, ∇f, norm_∇f, β, θ1, θ2)
 
 Compute satβ which saturates the contibution of the momentum term to the gradient.
-satβ is computed such that m.∇f > θ * norm_∇f^2
+`satβ` is computed such that the two gradient-related conditions are ensured: 
+1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||²
+2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||
+with `m` memory of past gradient/
 """ 
 function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   dotprod = dot(m,∇f)
+  diffnorm = norm(m .- ∇f)
   β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β
-  β2 = m != ∇f             ? (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f))      : β
+  β2 = diffnorm != 0       ? (1-θ2)*norm_∇f/(θ2*diffnorm)           : β
   return min(β,min(β1,β2)) 
 end
 
 """
-  init_alpha(norm_∇fk::T, ::qr)
+  init_alpha(norm_∇fk::T, ::r2)
   init_alpha(norm_∇fk::T, ::tr)
 
 Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
 """
-function init_alpha(norm_∇fk::T, ::qr) where{T}
+function init_alpha(norm_∇fk::T, ::r2) where{T}
   1/2^round(log2(norm_∇fk + 1))
 end
 
@@ -293,12 +293,12 @@ function init_alpha(norm_∇fk::T, ::tr) where{T}
 end
 
 """
-  step_mult(αk::T, norm_∇fk::T, ::qr)
+  step_mult(αk::T, norm_∇fk::T, ::r2)
   step_mult(αk::T, norm_∇fk::T, ::tr)
 
-Compute step size multiplier: `αk` for quadratic regularization(`::qr`) and `αk/norm_∇fk` for trust region (`::tr`).
+Compute step size multiplier: `αk` for quadratic regularization(`::r2`) and `αk/norm_∇fk` for trust region (`::tr`).
 """
-function step_mult(αk::T, norm_∇fk::T, ::qr) where{T}
+function step_mult(αk::T, norm_∇fk::T, ::r2) where{T}
   αk
 end
 

From 9acd09bba9f343f39c3e9388dff13081547e7087 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 3 Feb 2024 14:54:29 -0500
Subject: [PATCH 015/171] fix null denominator in find_beta

---
 src/fomo.jl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 6a7628a3..c983205b 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -272,9 +272,10 @@ with `m` memory of past gradient/
 """ 
 function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   dotprod = dot(m,∇f)
-  diffnorm = norm(m .- ∇f)
-  β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β
-  β2 = diffnorm != 0       ? (1-θ2)*norm_∇f/(θ2*diffnorm)           : β
+  n1 = norm_∇f^2 - dotprod
+  n2 = norm(m .- ∇f)
+  β1 = n1 > 0  ? (1-θ1)*norm_∇f^2/(n1)  : β
+  β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β
   return min(β,min(β1,β2)) 
 end
 

From ef258a8e8382d93b245d6ded4942198fe1b3b24a Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 4 Feb 2024 02:41:30 -0500
Subject: [PATCH 016/171] Make R2 and R2Solver interfaces to fomo solver.
 Delete obsolete R2.jl file.

---
 src/JSOSolvers.jl |   1 -
 src/R2.jl         | 231 ----------------------------------------------
 src/fomo.jl       | 150 ++++++++++++++++++++++++++----
 test/allocs.jl    |   2 +-
 test/callback.jl  |  26 +++---
 test/restart.jl   |   4 +-
 6 files changed, 147 insertions(+), 267 deletions(-)
 delete mode 100644 src/R2.jl

diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl
index 79abace3..85afc4fe 100644
--- a/src/JSOSolvers.jl
+++ b/src/JSOSolvers.jl
@@ -13,7 +13,6 @@ export solve!
 # Unconstrained solvers
 include("lbfgs.jl")
 include("trunk.jl")
-include("R2.jl")
 include("fomo.jl")
 
 # Unconstrained solvers for NLS
diff --git a/src/R2.jl b/src/R2.jl
deleted file mode 100644
index 79b7d7c0..00000000
--- a/src/R2.jl
+++ /dev/null
@@ -1,231 +0,0 @@
-export R2, R2Solver
-
-"""
-    R2(nlp; kwargs...)
-
-A first-order quadratic regularization method for unconstrained optimization.
-
-For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`:
-
-    solver = R2Solver(nlp)
-    solve!(solver, nlp; kwargs...)
-
-# Arguments
-- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
-
-# Keyword arguments 
-- `x::V = nlp.meta.x0`: the initial guess.
-- `atol::T = √eps(T)`: absolute tolerance.
-- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
-- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
-- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters.
-- `σmin = eps(T)`: step parameter for R2 algorithm.
-- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
-- `max_time::Float64 = 30.0`: maximum time limit in seconds.
-- `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `β = T(0) ∈ [0,1]` is the constant in the momentum term. If `β == 0`, R2 does not use momentum.
-- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-
-# Output
-The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
-
-# Callback
-The callback is called at each iteration.
-The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
-Changing any of the input arguments will affect the subsequent iterations.
-In particular, setting `stats.status = :user` will stop the algorithm.
-All relevant information should be available in `nlp` and `solver`.
-Notably, you can access, and modify, the following:
-- `solver.x`: current iterate;
-- `solver.gx`: current gradient;
-- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things:
-  - `stats.dual_feas`: norm of current gradient;
-  - `stats.iter`: current iteration counter;
-  - `stats.objective`: current objective function value;
-  - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
-  - `stats.elapsed_time`: elapsed time in seconds.
-
-# Examples
-```jldoctest
-using JSOSolvers, ADNLPModels
-nlp = ADNLPModel(x -> sum(x.^2), ones(3))
-stats = R2(nlp)
-
-# output
-
-"Execution stats: first-order stationary"
-```
-
-```jldoctest
-using JSOSolvers, ADNLPModels
-nlp = ADNLPModel(x -> sum(x.^2), ones(3))
-solver = R2Solver(nlp);
-stats = solve!(solver, nlp)
-
-# output
-
-"Execution stats: first-order stationary"
-```
-"""
-mutable struct R2Solver{T, V} <: AbstractOptimizationSolver
-  x::V
-  gx::V
-  cx::V
-  d::V   # used for momentum term
-  σ::T
-end
-
-function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
-  x = similar(nlp.meta.x0)
-  gx = similar(nlp.meta.x0)
-  cx = similar(nlp.meta.x0)
-  d = fill!(similar(nlp.meta.x0), 0)
-  σ = zero(T) # init it to zero for now 
-  return R2Solver{T, V}(x, gx, cx, d, σ)
-end
-
-@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
-  solver = R2Solver(nlp)
-  return solve!(solver, nlp; kwargs...)
-end
-
-function SolverCore.reset!(solver::R2Solver{T}) where {T}
-  solver.d .= zero(T)
-  solver
-end
-SolverCore.reset!(solver::R2Solver, ::AbstractNLPModel) = reset!(solver)
-
-function SolverCore.solve!(
-  solver::R2Solver{T, V},
-  nlp::AbstractNLPModel{T, V},
-  stats::GenericExecutionStats{T, V};
-  callback = (args...) -> nothing,
-  x::V = nlp.meta.x0,
-  atol::T = √eps(T),
-  rtol::T = √eps(T),
-  η1 = eps(T)^(1 / 4),
-  η2 = T(0.95),
-  γ1 = T(1 / 2),
-  γ2 = 1 / γ1,
-  σmin = zero(T),
-  max_time::Float64 = 30.0,
-  max_eval::Int = -1,
-  max_iter::Int = typemax(Int),
-  β::T = T(0),
-  verbose::Int = 0,
-) where {T, V}
-  unconstrained(nlp) || error("R2 should only be called on unconstrained problems.")
-
-  reset!(stats)
-  start_time = time()
-  set_time!(stats, 0.0)
-
-  x = solver.x .= x
-  ∇fk = solver.gx
-  ck = solver.cx
-  d = solver.d
-  σk = solver.σ
-
-  set_iter!(stats, 0)
-  set_objective!(stats, obj(nlp, x))
-
-  grad!(nlp, x, ∇fk)
-  norm_∇fk = norm(∇fk)
-  set_dual_residual!(stats, norm_∇fk)
-
-  σk = 2^round(log2(norm_∇fk + 1))
-  # Stopping criterion: 
-  ϵ = atol + rtol * norm_∇fk
-  optimal = norm_∇fk ≤ ϵ
-  if optimal
-    @info("Optimal point found at initial point")
-    @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "σ"
-    @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk σk
-  end
-  if verbose > 0 && mod(stats.iter, verbose) == 0
-    @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "σ"
-    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk σk
-  end
-
-  set_status!(
-    stats,
-    get_status(
-      nlp,
-      elapsed_time = stats.elapsed_time,
-      optimal = optimal,
-      max_eval = max_eval,
-      iter = stats.iter,
-      max_iter = max_iter,
-      max_time = max_time,
-    ),
-  )
-
-  solver.σ = σk
-  callback(nlp, solver, stats)
-  σk = solver.σ
-
-  done = stats.status != :unknown
-
-  while !done
-    if β == 0
-      ck .= x .- (∇fk ./ σk)
-    else
-      d .= ∇fk .* (T(1) - β) .+ d .* β
-      ck .= x .- (d ./ σk)
-    end
-    ΔTk = norm_∇fk^2 / σk
-    fck = obj(nlp, ck)
-    if fck == -Inf
-      set_status!(stats, :unbounded)
-      break
-    end
-
-    ρk = (stats.objective - fck) / ΔTk
-
-    # Update regularization parameters
-    if ρk >= η2
-      σk = max(σmin, γ1 * σk)
-    elseif ρk < η1
-      σk = σk * γ2
-    end
-
-    # Acceptance of the new candidate
-    if ρk >= η1
-      x .= ck
-      set_objective!(stats, fck)
-      grad!(nlp, x, ∇fk)
-      norm_∇fk = norm(∇fk)
-    end
-
-    set_iter!(stats, stats.iter + 1)
-    set_time!(stats, time() - start_time)
-    set_dual_residual!(stats, norm_∇fk)
-    optimal = norm_∇fk ≤ ϵ
-
-    if verbose > 0 && mod(stats.iter, verbose) == 0
-      @info infoline
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk σk
-    end
-
-    set_status!(
-      stats,
-      get_status(
-        nlp,
-        elapsed_time = stats.elapsed_time,
-        optimal = optimal,
-        max_eval = max_eval,
-        iter = stats.iter,
-        max_iter = max_iter,
-        max_time = max_time,
-      ),
-    )
-    solver.σ = σk
-    callback(nlp, solver, stats)
-    σk = solver.σ
-
-    done = stats.status != :unknown
-  end
-
-  set_solution!(stats, x)
-  return stats
-end
diff --git a/src/fomo.jl b/src/fomo.jl
index c983205b..78e151d5 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,9 +1,10 @@
-export fomo, FomoSolver, tr, r2
+export fomo, FomoSolver, tr, r2, R2
 
 abstract type AbstractFomoMethod end
 
-struct tr <: AbstractFomoMethod end
-struct r2 <: AbstractFomoMethod end
+struct tr   <: AbstractFomoMethod end
+struct r2   <: AbstractFomoMethod end
+struct R2og <: AbstractFomoMethod end
 
 """
     fomo(nlp; kwargs...)
@@ -100,10 +101,97 @@ end
   return solve!(solver, nlp, stats; kwargs...)
 end
 
+"""
+    R2(nlp; kwargs...)
+
+A first-order quadratic regularization method for unconstrained optimization.
+
+For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`:
+
+    solver = R2Solver(nlp)
+    solve!(solver, nlp; kwargs...)
+Important: `R2` and `R2Solver` are only interfaces to `FomoSolver`, a first order solver that includes momentum strategy. The momentum strategy is ignore with `R2`.
+
+# Arguments
+- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
+
+# Keyword arguments 
+- `x::V = nlp.meta.x0`: the initial guess.
+- `atol::T = √eps(T)`: absolute tolerance.
+- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
+- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
+- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters.
+- `σmin = eps(T)`: step parameter for R2 algorithm.
+- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
+- `max_time::Float64 = 30.0`: maximum time limit in seconds.
+- `max_iter::Int = typemax(Int)`: maximum number of iterations.
+- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
+
+# Output
+The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
+
+# Callback
+The callback is called at each iteration.
+The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
+Changing any of the input arguments will affect the subsequent iterations.
+In particular, setting `stats.status = :user` will stop the algorithm.
+All relevant information should be available in `nlp` and `solver`.
+Notably, you can access, and modify, the following:
+- `solver.x`: current iterate;
+- `solver.gx`: current gradient;
+- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things:
+  - `stats.dual_feas`: norm of current gradient;
+  - `stats.iter`: current iteration counter;
+  - `stats.objective`: current objective function value;
+  - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
+  - `stats.elapsed_time`: elapsed time in seconds.
+
+# Examples
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+stats = R2(nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+solver = R2Solver(nlp);
+stats = solve!(solver, nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+"""
+function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
+  x = similar(nlp.meta.x0)
+  g = similar(nlp.meta.x0)
+  c = similar(nlp.meta.x0)
+  m = Vector{T}()
+  d = g # similar without momentum
+  return FomoSolver{T, V}(x, g, c, m, d)
+end
+
+@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+  solver = R2Solver(nlp)
+  stats = GenericExecutionStats(nlp)
+  if haskey(kwargs,:σmax)
+    return solve!(solver, nlp, stats; β = T(0), backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...)
+  else
+    return solve!(solver, nlp, stats; β = T(0), backend = R2og(), kwargs...)
+  end
+end
+
 function SolverCore.reset!(solver::FomoSolver{T}) where {T}
   fill!(solver.m,0)
   solver
 end
+
 SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
 
 function SolverCore.solve!(
@@ -127,10 +215,13 @@ function SolverCore.solve!(
   θ1::T = T(0.1),
   θ2::T = T(eps(T)^(1/3)),
   verbose::Int = 0,
-  backend = r2()
+  backend = r2(),
+  σmin = nothing # keep consistency with R2 interface. kwargs immutable, can't delete it in `R2`
 ) where {T, V}
-  unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
-
+  r2mode = (backend == R2og())
+  mthname = r2mode ? "R2" : "fomo"
+  unconstrained(nlp) || error("$mthname should only be called on unconstrained problems.")
+  
   reset!(stats)
   start_time = time()
   set_time!(stats, 0.0)
@@ -154,12 +245,24 @@ function SolverCore.solve!(
   optimal = norm_∇fk ≤ ϵ
   if optimal
     @info("Optimal point found at initial point")
-    @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
-    @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk
+    if r2mode
+      @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "σ"
+      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk
+    else
+      @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
+      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk
+    end
+    
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
-    @info @sprintf "%5s  %9s  %7s  %7s  %7s" "iter" "f" "‖∇f‖" "α" "staβ"
+    if r2mode
+      @info @sprintf "%5s  %9s  %7s  %7s" "iter" "f" "‖∇f‖" "σ"
+    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk
+    else
+      @info @sprintf "%5s  %9s  %7s  %7s  %7s" "iter" "f" "‖∇f‖" "α" "staβ"
     infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk 0
+    end
+    
   end
 
   set_status!(
@@ -201,8 +304,10 @@ function SolverCore.solve!(
       αk = min(αmax, γ2 * αk)
     elseif ρk < η1
       αk = αk * γ1
-      satβ *= γ3
-      d .= ∇fk .* (oneT - satβ) .+ m .* satβ
+      if !r2mode
+        satβ *= γ3
+        (d .= ∇fk .* (oneT - satβ) .+ m .* satβ)
+      end
     end
 
     # Acceptance of the new candidate
@@ -222,8 +327,10 @@ function SolverCore.solve!(
         d .= ∇fk
         norm_d = norm_∇fk
       end
-      avgsatβ += satβ
-      siter += 1
+      if !r2mode
+        (avgsatβ += satβ)
+        (siter += 1)
+      end
     end
 
     set_iter!(stats, stats.iter + 1)
@@ -233,7 +340,11 @@ function SolverCore.solve!(
 
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk satβ
+      if r2mode
+        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk
+      else
+        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk satβ
+      end
     end
 
     set_status!(
@@ -254,9 +365,10 @@ function SolverCore.solve!(
 
     done = stats.status != :unknown
   end
-
-  avgsatβ /= siter
-  stats.solver_specific[:avgsatβ] = avgsatβ
+  if !r2mode
+    avgsatβ /= siter
+    stats.solver_specific[:avgsatβ] = avgsatβ
+  end
   set_solution!(stats, x)
   return stats
 end
@@ -285,7 +397,7 @@ end
 
 Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
 """
-function init_alpha(norm_∇fk::T, ::r2) where{T}
+function init_alpha(norm_∇fk::T, ::Union{r2,R2og}) where{T}
   1/2^round(log2(norm_∇fk + 1))
 end
 
@@ -299,7 +411,7 @@ end
 
 Compute step size multiplier: `αk` for quadratic regularization(`::r2`) and `αk/norm_∇fk` for trust region (`::tr`).
 """
-function step_mult(αk::T, norm_∇fk::T, ::r2) where{T}
+function step_mult(αk::T, norm_∇fk::T, ::Union{r2,R2og}) where{T}
   αk
 end
 
diff --git a/test/allocs.jl b/test/allocs.jl
index 5906ef84..42d266ab 100644
--- a/test/allocs.jl
+++ b/test/allocs.jl
@@ -30,7 +30,7 @@ end
 
 if Sys.isunix()
   @testset "Allocation tests" begin
-    @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver)
+    @testset "$symsolver" for symsolver in (:LBFGSSolver, :FomoSolver, :TrunkSolver, :TronSolver)
       for model in NLPModelsTest.nlp_problems
         nlp = eval(Meta.parse(model))()
         if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver))
diff --git a/test/callback.jl b/test/callback.jl
index 187e0be1..2511ee6b 100644
--- a/test/callback.jl
+++ b/test/callback.jl
@@ -58,16 +58,16 @@ end
   @test stats.iter == 8
 end
 
-@testset "Testing Solver Values" begin
-  f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2
-  nlp = ADNLPModel(f, [-1.2; 1.0])
-  function cb(nlp, solver, stats)
-    if stats.iter == 4
-      @test solver.σ > 0.0
-      stats.status = :user
-    end
-  end
-  stats = with_logger(NullLogger()) do
-    R2(nlp, callback = cb)
-  end
-end
+# @testset "Testing Solver Values" begin
+#   f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2
+#   nlp = ADNLPModel(f, [-1.2; 1.0])
+#   function cb(nlp, solver, stats)
+#     if stats.iter == 4
+#       @test solver.σ > 0.0
+#       stats.status = :user
+#     end
+#   end
+#   stats = with_logger(NullLogger()) do
+#     R2(nlp, callback = cb)
+#   end
+# end
diff --git a/test/restart.jl b/test/restart.jl
index 98f82103..eb770739 100644
--- a/test/restart.jl
+++ b/test/restart.jl
@@ -1,5 +1,5 @@
 @testset "Test restart with a different initial guess: $fun" for (fun, s) in (
-  (:R2, :R2Solver),
+  (:R2, :FomoSolver),
   (:fomo, :FomoSolver),
   (:lbfgs, :LBFGSSolver),
   (:tron, :TronSolver),
@@ -44,7 +44,7 @@ end
 end
 
 @testset "Test restart with a different problem: $fun" for (fun, s) in (
-  (:R2, :R2Solver),
+  (:R2, :FomoSolver),
   (:fomo, :FomoSolver),
   (:lbfgs, :LBFGSSolver),
   (:tron, :TronSolver),

From 85022e16a22b83c98abead21b23cc99b2e7fb959 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 4 Feb 2024 10:40:13 -0500
Subject: [PATCH 017/171] Rxport R2Solver (fix doc build issue) and backend for
 R2 classic

---
 src/fomo.jl | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 78e151d5..fcd60978 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,4 +1,4 @@
-export fomo, FomoSolver, tr, r2, R2
+export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og
 
 abstract type AbstractFomoMethod end
 
@@ -34,7 +34,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β].
 - `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region
+- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization ( no momentum, optimized for β = 0).
 
 # Output
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -161,7 +161,7 @@ stats = R2(nlp)
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))
 solver = R2Solver(nlp);
-stats = solve!(solver, nlp)
+stats = solve!(solver, nlp, backend = R2og())
 
 # output
 
@@ -181,9 +181,9 @@ end
   solver = R2Solver(nlp)
   stats = GenericExecutionStats(nlp)
   if haskey(kwargs,:σmax)
-    return solve!(solver, nlp, stats; β = T(0), backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...)
+    return solve!(solver, nlp, stats; backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...)
   else
-    return solve!(solver, nlp, stats; β = T(0), backend = R2og(), kwargs...)
+    return solve!(solver, nlp, stats; backend = R2og(), kwargs...)
   end
 end
 
@@ -292,7 +292,11 @@ function SolverCore.solve!(
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
-    ΔTk = dot(∇fk , d) * λk
+    if r2mode
+      ΔTk = norm_∇fk^2 * λk
+    else  
+      ΔTk = dot(∇fk , d) * λk
+    end
     fck = obj(nlp, c)
     if fck == -Inf
       set_status!(stats, :unbounded)
@@ -306,20 +310,20 @@ function SolverCore.solve!(
       αk = αk * γ1
       if !r2mode
         satβ *= γ3
-        (d .= ∇fk .* (oneT - satβ) .+ m .* satβ)
+        d .= ∇fk .* (oneT - satβ) .+ m .* satβ
       end
     end
 
     # Acceptance of the new candidate
     if ρk >= η1
       x .= c
-      if β!=0
+      if !r2mode
         m .= ∇fk .* (oneT - β) .+ m .* β
       end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
-      if β!= 0
+      if !r2mode
         satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - satβ) .+ m .* satβ
         norm_d = norm(d)
@@ -328,8 +332,8 @@ function SolverCore.solve!(
         norm_d = norm_∇fk
       end
       if !r2mode
-        (avgsatβ += satβ)
-        (siter += 1)
+        avgsatβ += satβ
+        siter += 1
       end
     end
 

From 07e79b16ffa625991cd95bacfe0ed3563792bca7 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 4 Feb 2024 11:09:20 -0500
Subject: [PATCH 018/171] add :smallstep exception (step addition underflow)

---
 src/fomo.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index fcd60978..49b535c6 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -292,6 +292,7 @@ function SolverCore.solve!(
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
+    x == c && set_status!(stats,:smallstep) # step addition underfow on every dimensions, should happen before αk == 0
     if r2mode
       ΔTk = norm_∇fk^2 * λk
     else  
@@ -364,7 +365,8 @@ function SolverCore.solve!(
       ),
     )
     
-    αk == 0 && set_status!(stats,:exception)
+    αk == 0 && set_status!(stats,:exception) # :smallstep exception should happen before
+
     callback(nlp, solver, stats)
 
     done = stats.status != :unknown

From f2e8e678993e992a7f14d87702ef2c902a836e8c Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 4 Feb 2024 11:17:28 -0500
Subject: [PATCH 019/171] fix small_step exception

---
 src/fomo.jl | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 49b535c6..95655ea4 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -292,7 +292,8 @@ function SolverCore.solve!(
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
-    x == c && set_status!(stats,:smallstep) # step addition underfow on every dimensions, should happen before αk == 0
+    uf = x == c # step addition underfow on every dimensions, should happen before αk == 0
+    @show stats.status
     if r2mode
       ΔTk = norm_∇fk^2 * λk
     else  
@@ -365,11 +366,13 @@ function SolverCore.solve!(
       ),
     )
     
-    αk == 0 && set_status!(stats,:exception) # :smallstep exception should happen before
-
     callback(nlp, solver, stats)
 
+    uf      && set_status!(stats,:small_step)
+    αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before
+
     done = stats.status != :unknown
+    @show stats.status αk
   end
   if !r2mode
     avgsatβ /= siter

From ad68ceb6b5edc7b6dde7e73e3b8b27a258be5620 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Mon, 5 Feb 2024 11:20:03 -0500
Subject: [PATCH 020/171] remove terminal ouput

---
 src/fomo.jl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 95655ea4..497bdfb3 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -293,7 +293,6 @@ function SolverCore.solve!(
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
     uf = x == c # step addition underfow on every dimensions, should happen before αk == 0
-    @show stats.status
     if r2mode
       ΔTk = norm_∇fk^2 * λk
     else  
@@ -372,7 +371,6 @@ function SolverCore.solve!(
     αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before
 
     done = stats.status != :unknown
-    @show stats.status αk
   end
   if !r2mode
     avgsatβ /= siter

From 1875d2c831ffc92b6b218145b83c55f8aa75df22 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Mon, 5 Feb 2024 15:27:13 -0500
Subject: [PATCH 021/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 497bdfb3..96c92b4b 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -34,7 +34,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β].
 - `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization ( no momentum, optimized for β = 0).
+- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0).
 
 # Output
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.

From 39d4e4fdda5445bd778147136b76b63d62c43da2 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Mon, 5 Feb 2024 15:35:49 -0500
Subject: [PATCH 022/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 96c92b4b..02f53395 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -389,7 +389,7 @@ Compute satβ which saturates the contibution of the momentum term to the gradie
 2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||
 with `m` memory of past gradient/
 """ 
-function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
+function find_beta(m::V, ∇f::V, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   dotprod = dot(m,∇f)
   n1 = norm_∇f^2 - dotprod
   n2 = norm(m .- ∇f)

From 2f72dad0a45494d4c78aaf05ea2b017ba238fdff Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Mon, 5 Feb 2024 16:24:01 -0500
Subject: [PATCH 023/171] update docstring, add rhok to the output

---
 src/fomo.jl | 134 +++++++++++++++-------------------------------------
 1 file changed, 37 insertions(+), 97 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 497bdfb3..30fc236f 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -8,6 +8,7 @@ struct R2og <: AbstractFomoMethod end
 
 """
     fomo(nlp; kwargs...)
+    R2(nlp; kwargs...)
 
 A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods.
 
@@ -16,6 +17,12 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
     solver = FomoSolver(nlp)
     solve!(solver, nlp; kwargs...)
 
+*Quadratic Regularization (R2)*: if the user do not want to use momentum (β = 0), it is recommended to use the memory-optimized `R2` method.
+For advanced usage:
+
+    solver = R2Solver(nlp)
+    solve!(solver, nlp; kwargs...)
+
 # Arguments
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
@@ -25,16 +32,16 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
-- `γ3 = T(1/2)` : momentum factor satβ update parameter in case of unsuccessful iteration.
+- `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
 - `αmax = 1/eps(T)`: step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
-- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β].
-- `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. 
+- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ||∇f(xk)||², with m memory of past gradient and βmax ∈ [0,β].
+- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))||, with m memory of past gradient and βmax ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization ( no momentum, optimized for β = 0).
+- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0).
 
 # Output
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -96,78 +103,11 @@ end
 
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FomoSolver(nlp)
-  solver_specific = Dict(:avgsatβ => T(0.))
+  solver_specific = Dict(:avgβmax => T(0.))
   stats = GenericExecutionStats(nlp;solver_specific=solver_specific)
   return solve!(solver, nlp, stats; kwargs...)
 end
 
-"""
-    R2(nlp; kwargs...)
-
-A first-order quadratic regularization method for unconstrained optimization.
-
-For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`:
-
-    solver = R2Solver(nlp)
-    solve!(solver, nlp; kwargs...)
-Important: `R2` and `R2Solver` are only interfaces to `FomoSolver`, a first order solver that includes momentum strategy. The momentum strategy is ignore with `R2`.
-
-# Arguments
-- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
-
-# Keyword arguments 
-- `x::V = nlp.meta.x0`: the initial guess.
-- `atol::T = √eps(T)`: absolute tolerance.
-- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
-- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
-- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters.
-- `σmin = eps(T)`: step parameter for R2 algorithm.
-- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
-- `max_time::Float64 = 30.0`: maximum time limit in seconds.
-- `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-
-# Output
-The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
-
-# Callback
-The callback is called at each iteration.
-The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
-Changing any of the input arguments will affect the subsequent iterations.
-In particular, setting `stats.status = :user` will stop the algorithm.
-All relevant information should be available in `nlp` and `solver`.
-Notably, you can access, and modify, the following:
-- `solver.x`: current iterate;
-- `solver.gx`: current gradient;
-- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things:
-  - `stats.dual_feas`: norm of current gradient;
-  - `stats.iter`: current iteration counter;
-  - `stats.objective`: current objective function value;
-  - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
-  - `stats.elapsed_time`: elapsed time in seconds.
-
-# Examples
-```jldoctest
-using JSOSolvers, ADNLPModels
-nlp = ADNLPModel(x -> sum(x.^2), ones(3))
-stats = R2(nlp)
-
-# output
-
-"Execution stats: first-order stationary"
-```
-
-```jldoctest
-using JSOSolvers, ADNLPModels
-nlp = ADNLPModel(x -> sum(x.^2), ones(3))
-solver = R2Solver(nlp);
-stats = solve!(solver, nlp, backend = R2og())
-
-# output
-
-"Execution stats: first-order stationary"
-```
-"""
 function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
   x = similar(nlp.meta.x0)
   g = similar(nlp.meta.x0)
@@ -177,7 +117,7 @@ function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
   return FomoSolver{T, V}(x, g, c, m, d)
 end
 
-@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = R2Solver(nlp)
   stats = GenericExecutionStats(nlp)
   if haskey(kwargs,:σmax)
@@ -256,11 +196,11 @@ function SolverCore.solve!(
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
     if r2mode
-      @info @sprintf "%5s  %9s  %7s  %7s" "iter" "f" "‖∇f‖" "σ"
-    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk
+      @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "σ" "ρk"
+    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN
     else
-      @info @sprintf "%5s  %9s  %7s  %7s  %7s" "iter" "f" "‖∇f‖" "α" "staβ"
-    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk 0
+      @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
+    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0
     end
     
   end
@@ -284,15 +224,15 @@ function SolverCore.solve!(
 
   d .= ∇fk
   norm_d = norm_∇fk
-  satβ = T(0)
+  βmax = T(0)
   ρk = T(0)
-  avgsatβ = T(0)
+  avgβmax = T(0)
   siter = 0
   oneT = T(1)
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
-    uf = x == c # step addition underfow on every dimensions, should happen before αk == 0
+    step_underflow = x == c # step addition underfow on every dimensions, should happen before αk == 0
     if r2mode
       ΔTk = norm_∇fk^2 * λk
     else  
@@ -310,8 +250,8 @@ function SolverCore.solve!(
     elseif ρk < η1
       αk = αk * γ1
       if !r2mode
-        satβ *= γ3
-        d .= ∇fk .* (oneT - satβ) .+ m .* satβ
+        βmax *= γ3
+        d .= ∇fk .* (oneT - βmax) .+ m .* βmax
       end
     end
 
@@ -325,15 +265,15 @@ function SolverCore.solve!(
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if !r2mode
-        satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2)
-        d .= ∇fk .* (oneT - satβ) .+ m .* satβ
+        βmax = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2)
+        d .= ∇fk .* (oneT - βmax) .+ m .* βmax
         norm_d = norm(d)
       else
         d .= ∇fk
         norm_d = norm_∇fk
       end
       if !r2mode
-        avgsatβ += satβ
+        avgβmax += βmax
         siter += 1
       end
     end
@@ -346,9 +286,9 @@ function SolverCore.solve!(
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
       if r2mode
-        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk
+        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk ρk
       else
-        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk satβ
+        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk ρk βmax
       end
     end
 
@@ -367,14 +307,14 @@ function SolverCore.solve!(
     
     callback(nlp, solver, stats)
 
-    uf      && set_status!(stats,:small_step)
-    αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before
+    step_underflow  && set_status!(stats,:small_step)
+    αk == 0         && set_status!(stats,:exception) # :small_nlstep exception should happen before
 
     done = stats.status != :unknown
   end
   if !r2mode
-    avgsatβ /= siter
-    stats.solver_specific[:avgsatβ] = avgsatβ
+    avgβmax /= siter
+    stats.solver_specific[:avgβmax] = avgβmax
   end
   set_solution!(stats, x)
   return stats
@@ -383,11 +323,11 @@ end
 """
 find_beta(m, ∇f, norm_∇f, β, θ1, θ2)
 
-Compute satβ which saturates the contibution of the momentum term to the gradient.
-`satβ` is computed such that the two gradient-related conditions are ensured: 
-1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||²
-2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||
-with `m` memory of past gradient/
+Compute βmax which saturates the contibution of the momentum term to the gradient.
+`βmax` is computed such that the two gradient-related conditions are ensured: 
+1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||²
+2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m.∇f(xk))||
+with `m` memory of past gradient
 """ 
 function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   dotprod = dot(m,∇f)
@@ -395,7 +335,7 @@ function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   n2 = norm(m .- ∇f)
   β1 = n1 > 0  ? (1-θ1)*norm_∇f^2/(n1)  : β
   β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β
-  return min(β,min(β1,β2)) 
+  return min(β,min(β1,β2))
 end
 
 """

From d22d162dbccf026336e70e1c5c572b1784c3dca4 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 6 Feb 2024 12:22:54 -0500
Subject: [PATCH 024/171] - create variable for dot(m,nabla f): avoid
 computation of dot(d, nabla f) in model decrease, is used in find_beta
 (interface updated) - update docstrings

---
 src/fomo.jl | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index cf4697ae..83fe6648 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -21,7 +21,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 For advanced usage:
 
     solver = R2Solver(nlp)
-    solve!(solver, nlp; kwargs...)
+    solve!(solver, nlp; backend = R2og(), kwargs...)
 
 # Arguments
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
@@ -39,10 +39,12 @@ For advanced usage:
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
 - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ||∇f(xk)||², with m memory of past gradient and βmax ∈ [0,β].
-- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))||, with m memory of past gradient and βmax ∈ [0,β]. 
+- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||, with m memory of past gradient and βmax ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0).
 
+*Warning:* `R2og()` backend should be used only for advanced usage as described above.
+
 # Output
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
 
@@ -174,6 +176,7 @@ function SolverCore.solve!(
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
+  
   grad!(nlp, x, ∇fk)
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)
@@ -229,15 +232,12 @@ function SolverCore.solve!(
   avgβmax = T(0)
   siter = 0
   oneT = T(1)
+  mdot∇f = T(0) # dot(m,∇fk)
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
     step_underflow = x == c # step addition underfow on every dimensions, should happen before αk == 0
-    if r2mode
-      ΔTk = norm_∇fk^2 * λk
-    else  
-      ΔTk = dot(∇fk , d) * λk
-    end
+    ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk
     fck = obj(nlp, c)
     if fck == -Inf
       set_status!(stats, :unbounded)
@@ -260,17 +260,15 @@ function SolverCore.solve!(
       x .= c
       if !r2mode
         m .= ∇fk .* (oneT - β) .+ m .* β
+        mdot∇f = dot(m,∇fk)
       end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if !r2mode
-        βmax = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2)
+        βmax = find_beta(m, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ m .* βmax
         norm_d = norm(d)
-      else
-        d .= ∇fk
-        norm_d = norm_∇fk
       end
       if !r2mode
         avgβmax += βmax
@@ -321,17 +319,16 @@ function SolverCore.solve!(
 end
 
 """
-find_beta(m, ∇f, norm_∇f, β, θ1, θ2)
+find_beta(m, md∇f, norm_∇f, β, θ1, θ2)
 
 Compute βmax which saturates the contibution of the momentum term to the gradient.
 `βmax` is computed such that the two gradient-related conditions are ensured: 
 1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||²
-2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m.∇f(xk))||
-with `m` memory of past gradient
+2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||
+with `m` memory of past gradient and `mdot∇f = dot(m,∇f(xk))` 
 """ 
-function find_beta(m::V, ∇f::V, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
-  dotprod = dot(m,∇f)
-  n1 = norm_∇f^2 - dotprod
+function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
+  n1 = norm_∇f^2 - mdot∇f
   n2 = norm(m .- ∇f)
   β1 = n1 > 0  ? (1-θ1)*norm_∇f^2/(n1)  : β
   β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β
@@ -354,9 +351,10 @@ end
 
 """
   step_mult(αk::T, norm_∇fk::T, ::r2)
+  step_mult(αk::T, norm_∇fk::T, ::R2og)
   step_mult(αk::T, norm_∇fk::T, ::tr)
 
-Compute step size multiplier: `αk` for quadratic regularization(`::r2`) and `αk/norm_∇fk` for trust region (`::tr`).
+Compute step size multiplier: `αk` for quadratic regularization(`::r2` and `::R2og`) and `αk/norm_∇fk` for trust region (`::tr`).
 """
 function step_mult(αk::T, norm_∇fk::T, ::Union{r2,R2og}) where{T}
   αk

From ae801991589c7d483fa7b463b3572527e243cee4 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Tue, 6 Feb 2024 12:47:50 -0500
Subject: [PATCH 025/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 83fe6648..45aa9ade 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -200,7 +200,7 @@ function SolverCore.solve!(
   if verbose > 0 && mod(stats.iter, verbose) == 0
     if r2mode
       @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "σ" "ρk"
-    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN
     else
       @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
     infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0

From f56d1839bd578fc3c803578d093d573a52e3f5b5 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Tue, 6 Feb 2024 12:48:10 -0500
Subject: [PATCH 026/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 45aa9ade..6a12c997 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -203,7 +203,7 @@ function SolverCore.solve!(
       infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN
     else
       @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
-    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0
     end
     
   end

From be56ea50545f4e12ec652f61f5be37c949e3e81a Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Tue, 6 Feb 2024 12:48:26 -0500
Subject: [PATCH 027/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 6a12c997..cb3a1f58 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -205,7 +205,6 @@ function SolverCore.solve!(
       @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
       infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0
     end
-    
   end
 
   set_status!(

From bd953325a9c95544331b7231c9f99dd640399586 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 6 Feb 2024 12:54:53 -0500
Subject: [PATCH 028/171] rename `m` as `momentum`

---
 src/fomo.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 83fe6648..b234622e 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -171,7 +171,7 @@ function SolverCore.solve!(
   x = solver.x .= x
   ∇fk = solver.g
   c = solver.c
-  m = solver.m
+  momentum = solver.m
   d = solver.d
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
@@ -232,7 +232,7 @@ function SolverCore.solve!(
   avgβmax = T(0)
   siter = 0
   oneT = T(1)
-  mdot∇f = T(0) # dot(m,∇fk)
+  mdot∇f = T(0) # dot(momentum,∇fk)
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
@@ -251,7 +251,7 @@ function SolverCore.solve!(
       αk = αk * γ1
       if !r2mode
         βmax *= γ3
-        d .= ∇fk .* (oneT - βmax) .+ m .* βmax
+        d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
       end
     end
 
@@ -259,7 +259,7 @@ function SolverCore.solve!(
     if ρk >= η1
       x .= c
       if !r2mode
-        m .= ∇fk .* (oneT - β) .+ m .* β
+        momentum .= ∇fk .* (oneT - β) .+ momentum .* β
         mdot∇f = dot(m,∇fk)
       end
       set_objective!(stats, fck)
@@ -267,7 +267,7 @@ function SolverCore.solve!(
       norm_∇fk = norm(∇fk)
       if !r2mode
         βmax = find_beta(m, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2)
-        d .= ∇fk .* (oneT - βmax) .+ m .* βmax
+        d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
       end
       if !r2mode
@@ -325,7 +325,7 @@ Compute βmax which saturates the contibution of the momentum term to the gradie
 `βmax` is computed such that the two gradient-related conditions are ensured: 
 1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||²
 2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||
-with `m` memory of past gradient and `mdot∇f = dot(m,∇f(xk))` 
+with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` 
 """ 
 function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   n1 = norm_∇f^2 - mdot∇f

From 1c4d3da6117a118f38ebee0cd514560255173bdb Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 6 Feb 2024 13:05:58 -0500
Subject: [PATCH 029/171] update docstring, fix `m` to `momentum` renaming.

---
 src/fomo.jl | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 0a202466..93a5c11a 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -38,8 +38,8 @@ For advanced usage:
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
-- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ||∇f(xk)||², with m memory of past gradient and βmax ∈ [0,β].
-- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||, with m memory of past gradient and βmax ∈ [0,β]. 
+- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β].
+- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖, with m memory of past gradient and βmax ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0).
 
@@ -259,13 +259,13 @@ function SolverCore.solve!(
       x .= c
       if !r2mode
         momentum .= ∇fk .* (oneT - β) .+ momentum .* β
-        mdot∇f = dot(m,∇fk)
+        mdot∇f = dot(momentum,∇fk)
       end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if !r2mode
-        βmax = find_beta(m, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2)
+        βmax = find_beta(momentum, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
       end
@@ -322,8 +322,8 @@ find_beta(m, md∇f, norm_∇f, β, θ1, θ2)
 
 Compute βmax which saturates the contibution of the momentum term to the gradient.
 `βmax` is computed such that the two gradient-related conditions are ensured: 
-1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||²
-2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||
+1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ‖∇f(xk)‖²
+2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖
 with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` 
 """ 
 function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
@@ -336,6 +336,7 @@ end
 
 """
   init_alpha(norm_∇fk::T, ::r2)
+  init_alpha(norm_∇fk::T, ::R2og)
   init_alpha(norm_∇fk::T, ::tr)
 
 Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.

From f952eca5e43d55fdcbf9cc7dc6efd1b9121dcec8 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Fri, 16 Feb 2024 09:31:41 -0500
Subject: [PATCH 030/171] Update src/fomo.jl

Co-authored-by: tmigot <tangi.migot@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 93a5c11a..c04d5725 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -122,7 +122,7 @@ end
 @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = R2Solver(nlp)
   stats = GenericExecutionStats(nlp)
-  if haskey(kwargs,:σmax)
+  if haskey(kwargs,:σmin)
     return solve!(solver, nlp, stats; backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...)
   else
     return solve!(solver, nlp, stats; backend = R2og(), kwargs...)

From ca78bb4a96bc162ab5488737a3a964103c8e1c22 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 17 Feb 2024 17:08:10 -0500
Subject: [PATCH 031/171] Update test/allocs.jl

Co-authored-by: tmigot <tangi.migot@gmail.com>
---
 test/allocs.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/allocs.jl b/test/allocs.jl
index 42d266ab..5906ef84 100644
--- a/test/allocs.jl
+++ b/test/allocs.jl
@@ -30,7 +30,7 @@ end
 
 if Sys.isunix()
   @testset "Allocation tests" begin
-    @testset "$symsolver" for symsolver in (:LBFGSSolver, :FomoSolver, :TrunkSolver, :TronSolver)
+    @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver)
       for model in NLPModelsTest.nlp_problems
         nlp = eval(Meta.parse(model))()
         if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver))

From 58d7dc3b9bb1fe230b6fa423279438e30ca7bc4b Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 17 Feb 2024 17:09:55 -0500
Subject: [PATCH 032/171] Update src/fomo.jl

Co-authored-by: tmigot <tangi.migot@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index c04d5725..b84c8664 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,6 +1,6 @@
 export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og
 
-abstract type AbstractFomoMethod end
+abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
 struct tr   <: AbstractFomoMethod end
 struct r2   <: AbstractFomoMethod end

From 9aac4044acc8b2ce9d8d1c63bbadc262fa7c01e6 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 17 Feb 2024 17:48:27 -0500
Subject: [PATCH 033/171] put alpha in solver structure, uncomment callback
 test

---
 src/fomo.jl      | 47 ++++++++++++++++++++++++-----------------------
 test/callback.jl | 26 +++++++++++++-------------
 2 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index b84c8664..6e2c018a 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,6 +1,6 @@
 export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og
 
-abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
+abstract type AbstractFomoMethod end
 
 struct tr   <: AbstractFomoMethod end
 struct r2   <: AbstractFomoMethod end
@@ -92,6 +92,7 @@ mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver
   c::V
   m::V
   d::V
+  α::T
 end
 
 function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
@@ -100,7 +101,7 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   c = similar(nlp.meta.x0)
   m = fill!(similar(nlp.meta.x0), 0)
   d = fill!(similar(nlp.meta.x0), 0)
-  return FomoSolver{T, V}(x, g, c, m, d)
+  return FomoSolver{T, V}(x, g, c, m, d, T(0))
 end
 
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
@@ -116,7 +117,7 @@ function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
   c = similar(nlp.meta.x0)
   m = Vector{T}()
   d = g # similar without momentum
-  return FomoSolver{T, V}(x, g, c, m, d)
+  return FomoSolver{T, V}(x, g, c, m, d, T(0))
 end
 
 @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
@@ -181,7 +182,7 @@ function SolverCore.solve!(
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)
 
-  αk = init_alpha(norm_∇fk,backend)
+  solver.α = init_alpha(norm_∇fk,backend)
   
   # Stopping criterion: 
   ϵ = atol + rtol * norm_∇fk
@@ -190,20 +191,20 @@ function SolverCore.solve!(
     @info("Optimal point found at initial point")
     if r2mode
       @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "σ"
-      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk
+      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α
     else
       @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
-      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk
+      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α
     end
     
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
     if r2mode
       @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "σ" "ρk"
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α NaN
     else
       @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α NaN 0
     end
   end
 
@@ -233,9 +234,9 @@ function SolverCore.solve!(
   oneT = T(1)
   mdot∇f = T(0) # dot(momentum,∇fk)
   while !done
-    λk = step_mult(αk,norm_d,backend)
+    λk = step_mult(solver.α,norm_d,backend)
     c .= x .- λk .* d
-    step_underflow = x == c # step addition underfow on every dimensions, should happen before αk == 0
+    step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0
     ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk
     fck = obj(nlp, c)
     if fck == -Inf
@@ -245,9 +246,9 @@ function SolverCore.solve!(
     ρk = (stats.objective - fck) / ΔTk
     # Update regularization parameters
     if ρk >= η2
-      αk = min(αmax, γ2 * αk)
+      solver.α = min(αmax, γ2 * solver.α)
     elseif ρk < η1
-      αk = αk * γ1
+      solver.α = solver.α * γ1
       if !r2mode
         βmax *= γ3
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
@@ -283,9 +284,9 @@ function SolverCore.solve!(
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
       if r2mode
-        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk ρk
+        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α ρk
       else
-        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk ρk βmax
+        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α ρk βmax
       end
     end
 
@@ -305,7 +306,7 @@ function SolverCore.solve!(
     callback(nlp, solver, stats)
 
     step_underflow  && set_status!(stats,:small_step)
-    αk == 0         && set_status!(stats,:exception) # :small_nlstep exception should happen before
+    solver.α == 0         && set_status!(stats,:exception) # :small_nlstep exception should happen before
 
     done = stats.status != :unknown
   end
@@ -350,16 +351,16 @@ function init_alpha(norm_∇fk::T, ::tr) where{T}
 end
 
 """
-  step_mult(αk::T, norm_∇fk::T, ::r2)
-  step_mult(αk::T, norm_∇fk::T, ::R2og)
-  step_mult(αk::T, norm_∇fk::T, ::tr)
+  step_mult(α::T, norm_∇fk::T, ::r2)
+  step_mult(α::T, norm_∇fk::T, ::R2og)
+  step_mult(α::T, norm_∇fk::T, ::tr)
 
-Compute step size multiplier: `αk` for quadratic regularization(`::r2` and `::R2og`) and `αk/norm_∇fk` for trust region (`::tr`).
+Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`).
 """
-function step_mult(αk::T, norm_∇fk::T, ::Union{r2,R2og}) where{T}
-  αk
+function step_mult(α::T, norm_∇fk::T, ::Union{r2,R2og}) where{T}
+  α
 end
 
-function step_mult(αk::T, norm_∇fk::T, ::tr) where{T}
-  αk/norm_∇fk
+function step_mult(α::T, norm_∇fk::T, ::tr) where{T}
+  α/norm_∇fk
 end
\ No newline at end of file
diff --git a/test/callback.jl b/test/callback.jl
index 2511ee6b..ddadc799 100644
--- a/test/callback.jl
+++ b/test/callback.jl
@@ -58,16 +58,16 @@ end
   @test stats.iter == 8
 end
 
-# @testset "Testing Solver Values" begin
-#   f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2
-#   nlp = ADNLPModel(f, [-1.2; 1.0])
-#   function cb(nlp, solver, stats)
-#     if stats.iter == 4
-#       @test solver.σ > 0.0
-#       stats.status = :user
-#     end
-#   end
-#   stats = with_logger(NullLogger()) do
-#     R2(nlp, callback = cb)
-#   end
-# end
+@testset "Testing Solver Values" begin
+  f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2
+  nlp = ADNLPModel(f, [-1.2; 1.0])
+  function cb(nlp, solver, stats)
+    if stats.iter == 4
+      @test solver.α > 0.0
+      stats.status = :user
+    end
+  end
+  stats = with_logger(NullLogger()) do
+    R2(nlp, callback = cb)
+  end
+end

From e5497c506aa461e2daf5b758dfcad2bf1e4a6a39 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 17 Feb 2024 18:14:08 -0500
Subject: [PATCH 034/171] allocate memory for norm vector in find_beta

---
 src/fomo.jl | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 6e2c018a..f047169d 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -92,6 +92,7 @@ mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver
   c::V
   m::V
   d::V
+  p::V
   α::T
 end
 
@@ -101,7 +102,8 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   c = similar(nlp.meta.x0)
   m = fill!(similar(nlp.meta.x0), 0)
   d = fill!(similar(nlp.meta.x0), 0)
-  return FomoSolver{T, V}(x, g, c, m, d, T(0))
+  p = similar(nlp.meta.x0)
+  return FomoSolver{T, V}(x, g, c, m, d, p, T(0))
 end
 
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
@@ -117,7 +119,8 @@ function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
   c = similar(nlp.meta.x0)
   m = Vector{T}()
   d = g # similar without momentum
-  return FomoSolver{T, V}(x, g, c, m, d, T(0))
+  p = Vector{T}()
+  return FomoSolver{T, V}(x, g, c, m, d, p, T(0))
 end
 
 @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
@@ -174,6 +177,7 @@ function SolverCore.solve!(
   c = solver.c
   momentum = solver.m
   d = solver.d
+  p = solver.p
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
@@ -266,7 +270,8 @@ function SolverCore.solve!(
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if !r2mode
-        βmax = find_beta(momentum, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2)
+        p .= momentum .- ∇fk
+        βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
       end
@@ -327,9 +332,9 @@ Compute βmax which saturates the contibution of the momentum term to the gradie
 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖
 with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` 
 """ 
-function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
+function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   n1 = norm_∇f^2 - mdot∇f
-  n2 = norm(m .- ∇f)
+  n2 = norm(p)
   β1 = n1 > 0  ? (1-θ1)*norm_∇f^2/(n1)  : β
   β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β
   return min(β,min(β1,β2))

From 21791241abac892fc562b4fec0b898a8b34b141c Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 17 Feb 2024 22:45:36 -0500
Subject: [PATCH 035/171] Add FoSolver structure for no-momentum case. Change
 backend name to step_backend to avoid confusion. Update docstrings and tests.

---
 src/fomo.jl          | 123 +++++++++++++++++++++++++------------------
 test/test_solvers.jl |   2 +-
 2 files changed, 74 insertions(+), 51 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index f047169d..c672b7b7 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,10 +1,10 @@
-export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og
+export fomo, FomoSolver, FoSolver, R2, R2Solver, tr_step, r2_step
 
-abstract type AbstractFomoMethod end
+abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
-struct tr   <: AbstractFomoMethod end
-struct r2   <: AbstractFomoMethod end
-struct R2og <: AbstractFomoMethod end
+abstract type AbstractFomoMethod end
+struct tr_step   <: AbstractFomoMethod end
+struct r2_step   <: AbstractFomoMethod end
 
 """
     fomo(nlp; kwargs...)
@@ -17,11 +17,12 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
     solver = FomoSolver(nlp)
     solve!(solver, nlp; kwargs...)
 
-*Quadratic Regularization (R2)*: if the user do not want to use momentum (β = 0), it is recommended to use the memory-optimized `R2` method.
+**Quadratic Regularization (R2)**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` method.
 For advanced usage:
 
-    solver = R2Solver(nlp)
-    solve!(solver, nlp; backend = R2og(), kwargs...)
+    solver = FoSolver(nlp)
+    solve!(solver, nlp; kwargs...)
+Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 
 # Arguments
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
@@ -33,7 +34,7 @@ For advanced usage:
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
 - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
-- `αmax = 1/eps(T)`: step parameter for fomo algorithm.
+- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
@@ -41,9 +42,7 @@ For advanced usage:
 - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β].
 - `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖, with m memory of past gradient and βmax ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0).
-
-*Warning:* `R2og()` backend should be used only for advanced usage as described above.
+- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
 # Output
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -65,6 +64,7 @@ Notably, you can access, and modify, the following:
   - `stats.elapsed_time`: elapsed time in seconds.
 
 # Examples
+## `fomo`
 ```jldoctest
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))
@@ -83,10 +83,31 @@ stats = solve!(solver, nlp)
 
 # output
 
+"Execution stats: first-order stationary"
+```
+## `R2`
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+stats = R2(nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+solver = FoSolver(nlp);
+stats = solve!(solver, nlp)
+
+# output
+
 "Execution stats: first-order stationary"
 ```
 """
-mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver
+mutable struct FomoSolver{T, V} <: AbstractFirstOrderSolver
   x::V
   g::V
   c::V
@@ -113,23 +134,28 @@ end
   return solve!(solver, nlp, stats; kwargs...)
 end
 
-function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
+
+mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
+  x::V
+  g::V
+  c::V
+  α::T
+end
+
+function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   x = similar(nlp.meta.x0)
   g = similar(nlp.meta.x0)
   c = similar(nlp.meta.x0)
-  m = Vector{T}()
-  d = g # similar without momentum
-  p = Vector{T}()
-  return FomoSolver{T, V}(x, g, c, m, d, p, T(0))
+  return FoSolver{T, V}(x, g, c, T(0))
 end
 
 @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
-  solver = R2Solver(nlp)
+  solver = FoSolver(nlp)
   stats = GenericExecutionStats(nlp)
   if haskey(kwargs,:σmin)
-    return solve!(solver, nlp, stats; backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...)
+    return solve!(solver, nlp, stats; step_backend = r2_step(), αmax = 1/kwargs[:σmin], kwargs...)
   else
-    return solve!(solver, nlp, stats; backend = R2og(), kwargs...)
+    return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
   end
 end
 
@@ -141,7 +167,7 @@ end
 SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
 
 function SolverCore.solve!(
-  solver::FomoSolver{T, V},
+  solver::AbstractFirstOrderSolver,
   nlp::AbstractNLPModel{T, V},
   stats::GenericExecutionStats{T, V};
   callback = (args...) -> nothing,
@@ -161,12 +187,11 @@ function SolverCore.solve!(
   θ1::T = T(0.1),
   θ2::T = T(eps(T)^(1/3)),
   verbose::Int = 0,
-  backend = r2(),
+  step_backend = r2_step(),
   σmin = nothing # keep consistency with R2 interface. kwargs immutable, can't delete it in `R2`
 ) where {T, V}
-  r2mode = (backend == R2og())
-  mthname = r2mode ? "R2" : "fomo"
-  unconstrained(nlp) || error("$mthname should only be called on unconstrained problems.")
+  use_momentum = typeof(solver) <: FomoSolver
+  unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
   
   reset!(stats)
   start_time = time()
@@ -175,9 +200,9 @@ function SolverCore.solve!(
   x = solver.x .= x
   ∇fk = solver.g
   c = solver.c
-  momentum = solver.m
-  d = solver.d
-  p = solver.p
+  momentum = use_momentum ? solver.m : nothing # not used if no momentum
+  d = use_momentum ? solver.d : solver.g # g = d if no momentum
+  p = use_momentum ? solver.p : nothing # not used if no momentum
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
@@ -186,14 +211,14 @@ function SolverCore.solve!(
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)
 
-  solver.α = init_alpha(norm_∇fk,backend)
+  solver.α = init_alpha(norm_∇fk,step_backend)
   
   # Stopping criterion: 
   ϵ = atol + rtol * norm_∇fk
   optimal = norm_∇fk ≤ ϵ
   if optimal
     @info("Optimal point found at initial point")
-    if r2mode
+    if !use_momentum
       @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "σ"
       @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α
     else
@@ -203,7 +228,7 @@ function SolverCore.solve!(
     
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
-    if r2mode
+    if !use_momentum
       @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "σ" "ρk"
       infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α NaN
     else
@@ -238,10 +263,10 @@ function SolverCore.solve!(
   oneT = T(1)
   mdot∇f = T(0) # dot(momentum,∇fk)
   while !done
-    λk = step_mult(solver.α,norm_d,backend)
+    λk = step_mult(solver.α,norm_d,step_backend)
     c .= x .- λk .* d
     step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0
-    ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk
+    ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk with momentum, ‖∇fk‖²λk without momentum
     fck = obj(nlp, c)
     if fck == -Inf
       set_status!(stats, :unbounded)
@@ -253,7 +278,7 @@ function SolverCore.solve!(
       solver.α = min(αmax, γ2 * solver.α)
     elseif ρk < η1
       solver.α = solver.α * γ1
-      if !r2mode
+      if use_momentum
         βmax *= γ3
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
       end
@@ -262,20 +287,20 @@ function SolverCore.solve!(
     # Acceptance of the new candidate
     if ρk >= η1
       x .= c
-      if !r2mode
+      if use_momentum
         momentum .= ∇fk .* (oneT - β) .+ momentum .* β
         mdot∇f = dot(momentum,∇fk)
       end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
-      if !r2mode
+      if use_momentum
         p .= momentum .- ∇fk
         βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
       end
-      if !r2mode
+      if use_momentum
         avgβmax += βmax
         siter += 1
       end
@@ -288,7 +313,7 @@ function SolverCore.solve!(
 
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
-      if r2mode
+      if !use_momentum
         infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α ρk
       else
         infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α ρk βmax
@@ -315,7 +340,7 @@ function SolverCore.solve!(
 
     done = stats.status != :unknown
   end
-  if !r2mode
+  if use_momentum
     avgβmax /= siter
     stats.solver_specific[:avgβmax] = avgβmax
   end
@@ -341,31 +366,29 @@ function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where
 end
 
 """
-  init_alpha(norm_∇fk::T, ::r2)
-  init_alpha(norm_∇fk::T, ::R2og)
-  init_alpha(norm_∇fk::T, ::tr)
+  init_alpha(norm_∇fk::T, ::r2_step)
+  init_alpha(norm_∇fk::T, ::tr_step)
 
 Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
 """
-function init_alpha(norm_∇fk::T, ::Union{r2,R2og}) where{T}
+function init_alpha(norm_∇fk::T, ::r2_step) where{T}
   1/2^round(log2(norm_∇fk + 1))
 end
 
-function init_alpha(norm_∇fk::T, ::tr) where{T}
+function init_alpha(norm_∇fk::T, ::tr_step) where{T}
   norm_∇fk/2^round(log2(norm_∇fk + 1))
 end
 
 """
-  step_mult(α::T, norm_∇fk::T, ::r2)
-  step_mult(α::T, norm_∇fk::T, ::R2og)
-  step_mult(α::T, norm_∇fk::T, ::tr)
+  step_mult(α::T, norm_∇fk::T, ::r2_step)
+  step_mult(α::T, norm_∇fk::T, ::tr_step)
 
 Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`).
 """
-function step_mult(α::T, norm_∇fk::T, ::Union{r2,R2og}) where{T}
+function step_mult(α::T, norm_∇fk::T, ::r2_step) where{T}
   α
 end
 
-function step_mult(α::T, norm_∇fk::T, ::tr) where{T}
+function step_mult(α::T, norm_∇fk::T, ::tr_step) where{T}
   α/norm_∇fk
 end
\ No newline at end of file
diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index ba182731..d9266d29 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -9,7 +9,7 @@ function tests()
         ("tron", tron),
         ("R2", R2),
         ("fomo_r2", fomo),
-        ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,backend = JSOSolvers.tr(); kwargs...)),
+        ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,step_backend = JSOSolvers.tr_step(); kwargs...)),
       ]
         unconstrained_nlp(solver)
         multiprecision_nlp(solver, :unc)

From 57bf9c2d399cf500c5fba4754caf6cc39bc26de6 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 18 Feb 2024 12:56:34 -0500
Subject: [PATCH 036/171] fix allocs tests

---
 test/allocs.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/allocs.jl b/test/allocs.jl
index 5906ef84..63c33f01 100644
--- a/test/allocs.jl
+++ b/test/allocs.jl
@@ -30,7 +30,7 @@ end
 
 if Sys.isunix()
   @testset "Allocation tests" begin
-    @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver)
+    @testset "$symsolver" for symsolver in (:LBFGSSolver, :FoSolver, :FomoSolver, :TrunkSolver, :TronSolver)
       for model in NLPModelsTest.nlp_problems
         nlp = eval(Meta.parse(model))()
         if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver))

From a1acef6d0f8da5e83040a6d486a071125fc82369 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 18 Feb 2024 14:39:24 -0500
Subject: [PATCH 037/171] add reset! function to FoSolver

---
 src/fomo.jl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index c672b7b7..7a320173 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -166,6 +166,13 @@ end
 
 SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
 
+
+function SolverCore.reset!(solver::FoSolver{T}) where {T}
+  solver
+end
+
+SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver)
+
 function SolverCore.solve!(
   solver::AbstractFirstOrderSolver,
   nlp::AbstractNLPModel{T, V},

From 7ba442a33d5d72bf13d347a92fa595e5c23258c1 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 18 Feb 2024 15:22:43 -0500
Subject: [PATCH 038/171] remove  `R2Solver`, replaced by `FoSolver`

---
 src/fomo.jl | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 7a320173..c9a95e81 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,4 +1,4 @@
-export fomo, FomoSolver, FoSolver, R2, R2Solver, tr_step, r2_step
+export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step
 
 abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
@@ -134,6 +134,12 @@ end
   return solve!(solver, nlp, stats; kwargs...)
 end
 
+function SolverCore.reset!(solver::FomoSolver{T}) where {T}
+  fill!(solver.m,0)
+  solver
+end
+
+SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
 
 mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
   x::V
@@ -159,14 +165,6 @@ end
   end
 end
 
-function SolverCore.reset!(solver::FomoSolver{T}) where {T}
-  fill!(solver.m,0)
-  solver
-end
-
-SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
-
-
 function SolverCore.reset!(solver::FoSolver{T}) where {T}
   solver
 end

From 8278977d2c21c49bab6bb10ed9ae48e21372bb00 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 22:53:40 -0500
Subject: [PATCH 039/171] Update test/test_solvers.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 test/test_solvers.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index d9266d29..eb9029e1 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -9,7 +9,7 @@ function tests()
         ("tron", tron),
         ("R2", R2),
         ("fomo_r2", fomo),
-        ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,step_backend = JSOSolvers.tr_step(); kwargs...)),
+        ("fomo_tr", (nlp; kwargs...) -> fomo(nlp, step_backend = JSOSolvers.tr_step(); kwargs...)),
       ]
         unconstrained_nlp(solver)
         multiprecision_nlp(solver, :unc)

From 31fd68dc950fdece036579522289c4fac6338c62 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 22:54:14 -0500
Subject: [PATCH 040/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index c9a95e81..d28ff085 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -10,7 +10,7 @@ struct r2_step   <: AbstractFomoMethod end
     fomo(nlp; kwargs...)
     R2(nlp; kwargs...)
 
-A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods.
+A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region steps.
 
 For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
 

From 602ca50e74b425744ef526da48aecaf3185d7509 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 22:56:37 -0500
Subject: [PATCH 041/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index d28ff085..c96a7aff 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -12,7 +12,7 @@ struct r2_step   <: AbstractFomoMethod end
 
 A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region steps.
 
-For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
+For advanced usage, first define a `FomoSolver` or `FoSolver` to preallocate the memory used in the solver, and then call `solve!`:
 
     solver = FomoSolver(nlp)
     solve!(solver, nlp; kwargs...)

From ac736a4bfce27dfce618219672c9b6890e6b712b Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:00:58 -0500
Subject: [PATCH 042/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index c96a7aff..1f2f4d4f 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -32,7 +32,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
-- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
+- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters.
 - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
 - `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.

From f4c3481fae11bc25dcb01ba97bdc69181ebea2e8 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:06:07 -0500
Subject: [PATCH 043/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 1f2f4d4f..6c66ae4a 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -34,7 +34,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters.
 - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
-- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
+- `αmax = 1/eps(T)`: maximum step parameter for fomo solver.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.

From 701221c3e8e3acd02d6a97432bd23c0bdd59090a Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:07:13 -0500
Subject: [PATCH 044/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 6c66ae4a..0cc734ee 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -35,7 +35,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters.
 - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
 - `αmax = 1/eps(T)`: maximum step parameter for fomo solver.
-- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
+- `max_eval::Int = -1`: maximum number of evaluation of the objective function (-1 means unlimited).
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.

From 90763a248d65f839f331424f271b6cb686e910e3 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:08:03 -0500
Subject: [PATCH 045/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 0cc734ee..c59f9791 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -25,6 +25,7 @@ For advanced usage:
 Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 
 # Arguments
+
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
 # Keyword arguments 

From d0a99297355a2506a1dee7cdc937e7b47ee1b8e6 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:10:04 -0500
Subject: [PATCH 046/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index c59f9791..40865fa2 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -29,6 +29,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
 # Keyword arguments 
+
 - `x::V = nlp.meta.x0`: the initial guess.
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.

From f8c5a63428361ef930d63e921d98b582b43cf626 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:10:45 -0500
Subject: [PATCH 047/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 40865fa2..40bf8bec 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -47,6 +47,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
 # Output
+
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
 
 # Callback

From 998f926be4879cfde6af798204d8e2ca509782b6 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:11:28 -0500
Subject: [PATCH 048/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 40bf8bec..deac5778 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -51,6 +51,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
 
 # Callback
+
 The callback is called at each iteration.
 The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
 Changing any of the input arguments will affect the subsequent iterations.

From e692afe540dc11959556e2fe90612e612dbf2d6d Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:12:15 -0500
Subject: [PATCH 049/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index deac5778..645a0d68 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -68,6 +68,7 @@ Notably, you can access, and modify, the following:
   - `stats.elapsed_time`: elapsed time in seconds.
 
 # Examples
+
 ## `fomo`
 ```jldoctest
 using JSOSolvers, ADNLPModels

From d588e729ac80fea092f56cb72a47ba511a75477d Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:12:59 -0500
Subject: [PATCH 050/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 645a0d68..b1574045 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -70,6 +70,7 @@ Notably, you can access, and modify, the following:
 # Examples
 
 ## `fomo`
+
 ```jldoctest
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))

From a35720a490b10920ce3650e3f257f8eaa049f0f1 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:15:43 -0500
Subject: [PATCH 051/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index b1574045..c955b365 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -136,7 +136,7 @@ end
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FomoSolver(nlp)
   solver_specific = Dict(:avgβmax => T(0.))
-  stats = GenericExecutionStats(nlp;solver_specific=solver_specific)
+  stats = GenericExecutionStats(nlp; solver_specific = solver_specific)
   return solve!(solver, nlp, stats; kwargs...)
 end
 

From 2665fbf6f3b3190dfb1d000c6fca447492d30ad2 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:16:23 -0500
Subject: [PATCH 052/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index c955b365..425ee41d 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -92,6 +92,7 @@ stats = solve!(solver, nlp)
 "Execution stats: first-order stationary"
 ```
 ## `R2`
+
 ```jldoctest
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))

From b00fd12da96aced7203edb6fdfdb1b7c55adf5cc Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:16:50 -0500
Subject: [PATCH 053/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 425ee41d..edf16b14 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -142,7 +142,7 @@ end
 end
 
 function SolverCore.reset!(solver::FomoSolver{T}) where {T}
-  fill!(solver.m,0)
+  fill!(solver.m, 0)
   solver
 end
 

From dda1ca78f3f3471f847afe56f867eeabbd113b0a Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:17:15 -0500
Subject: [PATCH 054/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index edf16b14..141fe4f0 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -165,7 +165,7 @@ end
 @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FoSolver(nlp)
   stats = GenericExecutionStats(nlp)
-  if haskey(kwargs,:σmin)
+  if haskey(kwargs, :σmin)
     return solve!(solver, nlp, stats; step_backend = r2_step(), αmax = 1/kwargs[:σmin], kwargs...)
   else
     return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)

From 719dc5f6192b83b2d5a7ac4d38caac77802b8d33 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:18:07 -0500
Subject: [PATCH 055/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 141fe4f0..69426ffe 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -311,8 +311,6 @@ function SolverCore.solve!(
         βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
-      end
-      if use_momentum
         avgβmax += βmax
         siter += 1
       end

From 8f99dbe13facaf7ba4ae18114b7affaa3d5f7bb9 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:19:48 -0500
Subject: [PATCH 056/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 69426ffe..414bfb70 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -218,7 +218,6 @@ function SolverCore.solve!(
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
-  
   grad!(nlp, x, ∇fk)
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)

From 497b99ec0a4529ba7ffccd9c0d45706c52ffe464 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:20:43 -0500
Subject: [PATCH 057/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 414bfb70..79ba88b6 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -222,7 +222,7 @@ function SolverCore.solve!(
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)
 
-  solver.α = init_alpha(norm_∇fk,step_backend)
+  solver.α = init_alpha(norm_∇fk, step_backend)
   
   # Stopping criterion: 
   ϵ = atol + rtol * norm_∇fk

From a6a439f79dd14949575487ea665b35f58aed38de Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:21:29 -0500
Subject: [PATCH 058/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 79ba88b6..5c200a9e 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -236,7 +236,6 @@ function SolverCore.solve!(
       @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
       @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α
     end
-    
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
     if !use_momentum

From b0164773122a1bb1220b7b97a8d60991527d29e2 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 22 Feb 2024 00:04:15 -0500
Subject: [PATCH 059/171] Add TR solver (trust region with linear model) fix
 spacing update docstrings

---
 src/fomo.jl | 92 ++++++++++++++++++++++++++---------------------------
 1 file changed, 46 insertions(+), 46 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 5c200a9e..fa811924 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -2,75 +2,69 @@ export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step
 
 abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
-abstract type AbstractFomoMethod end
-struct tr_step   <: AbstractFomoMethod end
-struct r2_step   <: AbstractFomoMethod end
+abstract type AbstractFOMethod end
+struct tr_step   <: AbstractFOMethod end
+struct r2_step   <: AbstractFOMethod end
 
 """
     fomo(nlp; kwargs...)
     R2(nlp; kwargs...)
 
-A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region steps.
+A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods.
 
-For advanced usage, first define a `FomoSolver` or `FoSolver` to preallocate the memory used in the solver, and then call `solve!`:
+For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
 
     solver = FomoSolver(nlp)
     solve!(solver, nlp; kwargs...)
 
-**Quadratic Regularization (R2)**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` method.
+**No momentum**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods.
 For advanced usage:
 
     solver = FoSolver(nlp)
-    solve!(solver, nlp; kwargs...)
-Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
-
+    solve!(solver, nlp; step_bakckend = r2_step(),kwargs...) # for Quadratic Regularization (R2) step: s = - α .* ∇f(x)
+    solve!(solver, nlp; step_bakckend = tr_step(),kwargs...) # for linear model Trust Region (TR) step: s = - α .* ∇f(x) ./ ‖∇f(x)‖ 
+    
 # Arguments
-
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
 # Keyword arguments 
-
 - `x::V = nlp.meta.x0`: the initial guess.
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
-- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters.
+- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
 - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
-- `αmax = 1/eps(T)`: maximum step parameter for fomo solver.
-- `max_eval::Int = -1`: maximum number of evaluation of the objective function (-1 means unlimited).
+- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
+- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
-- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β].
-- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖, with m memory of past gradient and βmax ∈ [0,β]. 
+- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β].
+- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* m‖, with m memory of past gradient and βmax ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
 # Output
-
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
 
 # Callback
-
 The callback is called at each iteration.
 The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
 Changing any of the input arguments will affect the subsequent iterations.
-In particular, setting `stats.status = :user` will stop the algorithm.
+In particular, setting `stats.status = :user || stats.stats = :unknown` will stop the algorithm.
 All relevant information should be available in `nlp` and `solver`.
 Notably, you can access, and modify, the following:
 - `solver.x`: current iterate;
 - `solver.gx`: current gradient;
 - `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things:
-  - `stats.dual_feas`: norm of current gradient;
-  - `stats.iter`: current iteration counter;
-  - `stats.objective`: current objective function value;
-  - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
-  - `stats.elapsed_time`: elapsed time in seconds.
+    - `stats.dual_feas`: norm of current gradient;
+    - `stats.iter`: current iteration counter;
+    - `stats.objective`: current objective function value;
+    - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
+    - `stats.elapsed_time`: elapsed time in seconds.
 
 # Examples
-
 ## `fomo`
-
 ```jldoctest
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))
@@ -92,7 +86,6 @@ stats = solve!(solver, nlp)
 "Execution stats: first-order stationary"
 ```
 ## `R2`
-
 ```jldoctest
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))
@@ -137,12 +130,12 @@ end
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FomoSolver(nlp)
   solver_specific = Dict(:avgβmax => T(0.))
-  stats = GenericExecutionStats(nlp; solver_specific = solver_specific)
+  stats = GenericExecutionStats(nlp;solver_specific=solver_specific)
   return solve!(solver, nlp, stats; kwargs...)
 end
 
 function SolverCore.reset!(solver::FomoSolver{T}) where {T}
-  fill!(solver.m, 0)
+  fill!(solver.m,0)
   solver
 end
 
@@ -162,14 +155,18 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   return FoSolver{T, V}(x, g, c, T(0))
 end
 
-@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+@doc (@doc FomoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FoSolver(nlp)
   stats = GenericExecutionStats(nlp)
-  if haskey(kwargs, :σmin)
-    return solve!(solver, nlp, stats; step_backend = r2_step(), αmax = 1/kwargs[:σmin], kwargs...)
-  else
-    return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
-  end
+  return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
+end
+
+@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+  fo(nlp; step_backend = r2_step(), kwargs...)
+end
+
+@doc (@doc FomoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+  fo(nlp; step_backend = tr_step(), kwargs...)
 end
 
 function SolverCore.reset!(solver::FoSolver{T}) where {T}
@@ -200,7 +197,6 @@ function SolverCore.solve!(
   θ2::T = T(eps(T)^(1/3)),
   verbose::Int = 0,
   step_backend = r2_step(),
-  σmin = nothing # keep consistency with R2 interface. kwargs immutable, can't delete it in `R2`
 ) where {T, V}
   use_momentum = typeof(solver) <: FomoSolver
   unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
@@ -218,6 +214,7 @@ function SolverCore.solve!(
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
+  
   grad!(nlp, x, ∇fk)
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)
@@ -236,14 +233,15 @@ function SolverCore.solve!(
       @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
       @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α
     end
+    
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
     if !use_momentum
       @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "σ" "ρk"
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α NaN
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α 0
     else
       @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α NaN 0
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α 0 0
     end
   end
 
@@ -273,7 +271,7 @@ function SolverCore.solve!(
   oneT = T(1)
   mdot∇f = T(0) # dot(momentum,∇fk)
   while !done
-    λk = step_mult(solver.α,norm_d,step_backend)
+    λk = step_mult(solver.α, norm_d, step_backend)
     c .= x .- λk .* d
     step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0
     ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk with momentum, ‖∇fk‖²λk without momentum
@@ -299,7 +297,7 @@ function SolverCore.solve!(
       x .= c
       if use_momentum
         momentum .= ∇fk .* (oneT - β) .+ momentum .* β
-        mdot∇f = dot(momentum,∇fk)
+        mdot∇f = dot(momentum, ∇fk)
       end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
@@ -309,6 +307,8 @@ function SolverCore.solve!(
         βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
+      end
+      if use_momentum
         avgβmax += βmax
         siter += 1
       end
@@ -343,8 +343,8 @@ function SolverCore.solve!(
     
     callback(nlp, solver, stats)
 
-    step_underflow  && set_status!(stats,:small_step)
-    solver.α == 0         && set_status!(stats,:exception) # :small_nlstep exception should happen before
+    step_underflow  && set_status!(stats, :small_step)
+    solver.α == 0         && set_status!(stats, :exception) # :small_nlstep exception should happen before
 
     done = stats.status != :unknown
   end
@@ -357,13 +357,13 @@ function SolverCore.solve!(
 end
 
 """
-find_beta(m, md∇f, norm_∇f, β, θ1, θ2)
+find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
 
 Compute βmax which saturates the contibution of the momentum term to the gradient.
 `βmax` is computed such that the two gradient-related conditions are ensured: 
-1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ‖∇f(xk)‖²
-2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖
-with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` 
+1. [(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖²
+2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖
+with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` 
 """ 
 function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   n1 = norm_∇f^2 - mdot∇f

From 773ac53a367ad7d71bad85737466676dbdf926e0 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Mon, 4 Mar 2024 14:33:19 -0500
Subject: [PATCH 060/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index fa811924..d1795391 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -18,6 +18,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
     solve!(solver, nlp; kwargs...)
 
 **No momentum**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods.
+
 For advanced usage:
 
     solver = FoSolver(nlp)

From 20c3fa9659df5fbd357e1e4a47157a82a1109056 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Mon, 4 Mar 2024 15:36:16 -0500
Subject: [PATCH 061/171] fix docstrings and verbose display

---
 src/fomo.jl | 90 +++++++++++++++++++++++++++++------------------------
 1 file changed, 49 insertions(+), 41 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index d1795391..40b14e8c 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -3,8 +3,8 @@ export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step
 abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
 abstract type AbstractFOMethod end
-struct tr_step   <: AbstractFOMethod end
-struct r2_step   <: AbstractFOMethod end
+struct tr_step <: AbstractFOMethod end
+struct r2_step <: AbstractFOMethod end
 
 """
     fomo(nlp; kwargs...)
@@ -130,19 +130,19 @@ end
 
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FomoSolver(nlp)
-  solver_specific = Dict(:avgβmax => T(0.))
-  stats = GenericExecutionStats(nlp;solver_specific=solver_specific)
+  solver_specific = Dict(:avgβmax => T(0.0))
+  stats = GenericExecutionStats(nlp; solver_specific = solver_specific)
   return solve!(solver, nlp, stats; kwargs...)
 end
 
 function SolverCore.reset!(solver::FomoSolver{T}) where {T}
-  fill!(solver.m,0)
+  fill!(solver.m, 0)
   solver
 end
 
 SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
 
-mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
+@doc (@doc FomoSolver) mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
   x::V
   g::V
   c::V
@@ -186,22 +186,23 @@ function SolverCore.solve!(
   rtol::T = √eps(T),
   η1::T = T(eps(T)^(1 / 4)),
   η2::T = T(0.95),
-  γ1::T = T(1/2),
+  γ1::T = T(1 / 2),
   γ2::T = T(2),
-  γ3::T = T(1/2),
-  αmax::T = 1/eps(T),
+  γ3::T = T(1 / 2),
+  αmax::T = 1 / eps(T),
   max_time::Float64 = 30.0,
   max_eval::Int = -1,
   max_iter::Int = typemax(Int),
   β::T = T(0.9),
   θ1::T = T(0.1),
-  θ2::T = T(eps(T)^(1/3)),
+  θ2::T = T(eps(T)^(1 / 3)),
   verbose::Int = 0,
   step_backend = r2_step(),
 ) where {T, V}
   use_momentum = typeof(solver) <: FomoSolver
+  is_r2 = typeof(step_backend) <: r2_step
   unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
-  
+
   reset!(stats)
   start_time = time()
   set_time!(stats, 0.0)
@@ -215,34 +216,38 @@ function SolverCore.solve!(
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
-  
   grad!(nlp, x, ∇fk)
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)
 
   solver.α = init_alpha(norm_∇fk, step_backend)
-  
+
   # Stopping criterion: 
   ϵ = atol + rtol * norm_∇fk
   optimal = norm_∇fk ≤ ϵ
+  header = ["iter", "f", "‖∇f‖", "α"]
   if optimal
     @info("Optimal point found at initial point")
-    if !use_momentum
-      @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "σ"
-      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α
+    if is_r2
+      @info @sprintf "%5s  %9s  %7s  %7s " header...
+      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1 / solver.α
     else
-      @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
+      @info @sprintf "%5s  %9s  %7s  %7s " header...
       @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α
     end
-    
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
+    push!(header, "ρk")
+    step_param = is_r2 ? 1 / solver.α : solver.α
     if !use_momentum
-      @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "σ" "ρk"
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α 0
+      @info @sprintf "%5s  %9s  %7s  %7s  %7s " header...
+      infoline =
+        @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param
     else
-      @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α 0 0
+      push!(header, "βmax")
+      @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " header...
+      infoline =
+        @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' 0
     end
   end
 
@@ -305,7 +310,7 @@ function SolverCore.solve!(
       norm_∇fk = norm(∇fk)
       if use_momentum
         p .= momentum .- ∇fk
-        βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2)
+        βmax = find_beta(p, mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
       end
@@ -322,10 +327,13 @@ function SolverCore.solve!(
 
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
+      step_param = is_r2 ? 1 / solver.α : solver.α
       if !use_momentum
-        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α ρk
+        infoline =
+          @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param ρk
       else
-        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α ρk βmax
+        infoline =
+          @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param ρk βmax
       end
     end
 
@@ -341,11 +349,11 @@ function SolverCore.solve!(
         max_time = max_time,
       ),
     )
-    
+
     callback(nlp, solver, stats)
 
-    step_underflow  && set_status!(stats, :small_step)
-    solver.α == 0         && set_status!(stats, :exception) # :small_nlstep exception should happen before
+    step_underflow && set_status!(stats, :small_step)
+    solver.α == 0 && set_status!(stats, :exception) # :small_nlstep exception should happen before
 
     done = stats.status != :unknown
   end
@@ -362,16 +370,16 @@ find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
 
 Compute βmax which saturates the contibution of the momentum term to the gradient.
 `βmax` is computed such that the two gradient-related conditions are ensured: 
-1. [(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖²
+1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖²
 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖
 with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` 
-""" 
-function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
+"""
+function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V}
   n1 = norm_∇f^2 - mdot∇f
   n2 = norm(p)
-  β1 = n1 > 0  ? (1-θ1)*norm_∇f^2/(n1)  : β
-  β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β
-  return min(β,min(β1,β2))
+  β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / (n1) : β
+  β2 = n2 != 0 ? (1 - θ2) * norm_∇f / (n2) : β
+  return min(β, min(β1, β2))
 end
 
 """
@@ -380,12 +388,12 @@ end
 
 Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
 """
-function init_alpha(norm_∇fk::T, ::r2_step) where{T}
-  1/2^round(log2(norm_∇fk + 1))
+function init_alpha(norm_∇fk::T, ::r2_step) where {T}
+  1 / 2^round(log2(norm_∇fk + 1))
 end
 
-function init_alpha(norm_∇fk::T, ::tr_step) where{T}
-  norm_∇fk/2^round(log2(norm_∇fk + 1))
+function init_alpha(norm_∇fk::T, ::tr_step) where {T}
+  norm_∇fk / 2^round(log2(norm_∇fk + 1))
 end
 
 """
@@ -394,10 +402,10 @@ end
 
 Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`).
 """
-function step_mult(α::T, norm_∇fk::T, ::r2_step) where{T}
+function step_mult(α::T, norm_∇fk::T, ::r2_step) where {T}
   α
 end
 
-function step_mult(α::T, norm_∇fk::T, ::tr_step) where{T}
-  α/norm_∇fk
+function step_mult(α::T, norm_∇fk::T, ::tr_step) where {T}
+  α / norm_∇fk
 end
\ No newline at end of file

From 7950f6bfc0f9bb269fa06753660a0ab48e0d6d24 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 5 Mar 2024 14:18:18 -0500
Subject: [PATCH 062/171] update docstring, update info display, fix solver arg
 type, export TR

---
 src/fomo.jl | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 40b14e8c..e51e5b8a 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,4 +1,4 @@
-export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step
+export fomo, FomoSolver, FoSolver, R2, TR, tr_step, r2_step
 
 abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
@@ -10,14 +10,21 @@ struct r2_step <: AbstractFOMethod end
     fomo(nlp; kwargs...)
     R2(nlp; kwargs...)
 
-A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods.
+A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model.
+The step is perform along d with
+d = - (1-βmax) .* ∇f(xk) - βmax .* mk (1)
+with mk the memory of past gradients updated at each successful iteration as
+mk .= ∇f(xk) .* (1 - βmax) .+ momentum .* βmax (2)
+and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied:
+(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (3)
+‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖       (4)
 
 For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
 
     solver = FomoSolver(nlp)
     solve!(solver, nlp; kwargs...)
 
-**No momentum**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods.
+**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods.
 
 For advanced usage:
 
@@ -40,8 +47,8 @@ For advanced usage:
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
-- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β].
-- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* m‖, with m memory of past gradient and βmax ∈ [0,β]. 
+- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (3).
+- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
@@ -177,7 +184,7 @@ end
 SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver)
 
 function SolverCore.solve!(
-  solver::AbstractFirstOrderSolver,
+  solver::Union{FoSolver,FomoSolver},
   nlp::AbstractNLPModel{T, V},
   stats::GenericExecutionStats{T, V};
   callback = (args...) -> nothing,
@@ -225,7 +232,8 @@ function SolverCore.solve!(
   # Stopping criterion: 
   ϵ = atol + rtol * norm_∇fk
   optimal = norm_∇fk ≤ ϵ
-  header = ["iter", "f", "‖∇f‖", "α"]
+  header = ["iter", "f", "‖∇f‖"]
+  is_r2 ? push!(header,"σ") : push!(header,"Δ")
   if optimal
     @info("Optimal point found at initial point")
     if is_r2

From 02caa7b206194d67196af214c71ebaab766d4d39 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 5 Mar 2024 15:14:32 -0500
Subject: [PATCH 063/171] update docstring

---
 src/fomo.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index e51e5b8a..ed07d668 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -13,8 +13,8 @@ struct r2_step <: AbstractFOMethod end
 A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model.
 The step is perform along d with
 d = - (1-βmax) .* ∇f(xk) - βmax .* mk (1)
-with mk the memory of past gradients updated at each successful iteration as
-mk .= ∇f(xk) .* (1 - βmax) .+ momentum .* βmax (2)
+with mk the memory of past gradients (initiated with 0) updated at each successful iteration as
+mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax (2)
 and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied:
 (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (3)
 ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖       (4)

From d0a02b9d5e40e390b87c971bc1eec0435d85438e Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Wed, 6 Mar 2024 12:02:58 -0500
Subject: [PATCH 064/171] fix grad and momentum dot product

---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index ed07d668..d31c6b0e 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -311,12 +311,12 @@ function SolverCore.solve!(
       x .= c
       if use_momentum
         momentum .= ∇fk .* (oneT - β) .+ momentum .* β
-        mdot∇f = dot(momentum, ∇fk)
       end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if use_momentum
+        mdot∇f = dot(momentum, ∇fk)
         p .= momentum .- ∇fk
         βmax = find_beta(p, mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax

From efcba5030ead91c5fb438f681e5d6371eabb2bb3 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Wed, 6 Mar 2024 12:19:05 -0500
Subject: [PATCH 065/171] cosmetics

---
 src/fomo.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index d31c6b0e..0d15d1cd 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -48,7 +48,7 @@ For advanced usage:
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
 - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (3).
-- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). 
+- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
@@ -385,8 +385,8 @@ with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm`
 function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V}
   n1 = norm_∇f^2 - mdot∇f
   n2 = norm(p)
-  β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / (n1) : β
-  β2 = n2 != 0 ? (1 - θ2) * norm_∇f / (n2) : β
+  β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / n1 : β
+  β2 = n2 != 0 ? (1 - θ2) * norm_∇f / n2 : β
   return min(β, min(β1, β2))
 end
 

From c89db514d67abeb8547f592c1dbe6ac1e320cc97 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Thu, 7 Mar 2024 10:37:12 -0500
Subject: [PATCH 066/171] Update test/restart.jl

Co-authored-by: Tangi Migot <tangi.migot@gmail.com>
---
 test/restart.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/restart.jl b/test/restart.jl
index eb770739..e6b75cc1 100644
--- a/test/restart.jl
+++ b/test/restart.jl
@@ -44,7 +44,7 @@ end
 end
 
 @testset "Test restart with a different problem: $fun" for (fun, s) in (
-  (:R2, :FomoSolver),
+  (:R2, :FoSolver),
   (:fomo, :FomoSolver),
   (:lbfgs, :LBFGSSolver),
   (:tron, :TronSolver),

From 8a655c78d6a93cf3eedf4ccd936d8dd69d099d61 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Thu, 7 Mar 2024 10:37:22 -0500
Subject: [PATCH 067/171] Update test/restart.jl

Co-authored-by: Tangi Migot <tangi.migot@gmail.com>
---
 test/restart.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/restart.jl b/test/restart.jl
index e6b75cc1..38765465 100644
--- a/test/restart.jl
+++ b/test/restart.jl
@@ -1,5 +1,5 @@
 @testset "Test restart with a different initial guess: $fun" for (fun, s) in (
-  (:R2, :FomoSolver),
+  (:R2, :FoSolver),
   (:fomo, :FomoSolver),
   (:lbfgs, :LBFGSSolver),
   (:tron, :TronSolver),

From 1f67091b49288952fbd8e02c540a4ec1652bf98e Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 7 Mar 2024 11:18:24 -0500
Subject: [PATCH 068/171] Create FoSolver docstring

---
 src/fomo.jl | 130 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 89 insertions(+), 41 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 0d15d1cd..2e2527e1 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -8,29 +8,26 @@ struct r2_step <: AbstractFOMethod end
 
 """
     fomo(nlp; kwargs...)
-    R2(nlp; kwargs...)
 
 A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model.
+
+# Algorithm description
+
 The step is perform along d with
-d = - (1-βmax) .* ∇f(xk) - βmax .* mk (1)
+d = - (1-βmax) .* ∇f(xk) - βmax .* mk
 with mk the memory of past gradients (initiated with 0) updated at each successful iteration as
-mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax (2)
+mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax
 and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied:
-(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (3)
-‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖       (4)
+(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (1)
+‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖       (2)
 
+# Advanced usage
 For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
 
     solver = FomoSolver(nlp)
     solve!(solver, nlp; kwargs...)
 
-**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods.
-
-For advanced usage:
-
-    solver = FoSolver(nlp)
-    solve!(solver, nlp; step_bakckend = r2_step(),kwargs...) # for Quadratic Regularization (R2) step: s = - α .* ∇f(x)
-    solve!(solver, nlp; step_bakckend = tr_step(),kwargs...) # for linear model Trust Region (TR) step: s = - α .* ∇f(x) ./ ‖∇f(x)‖ 
+**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `fo` method.
     
 # Arguments
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
@@ -47,8 +44,8 @@ For advanced usage:
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
-- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (3).
-- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). 
+- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (1).
+- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (2). 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
@@ -91,27 +88,6 @@ stats = solve!(solver, nlp)
 
 # output
 
-"Execution stats: first-order stationary"
-```
-## `R2`
-```jldoctest
-using JSOSolvers, ADNLPModels
-nlp = ADNLPModel(x -> sum(x.^2), ones(3))
-stats = R2(nlp)
-
-# output
-
-"Execution stats: first-order stationary"
-```
-
-```jldoctest
-using JSOSolvers, ADNLPModels
-nlp = ADNLPModel(x -> sum(x.^2), ones(3))
-solver = FoSolver(nlp);
-stats = solve!(solver, nlp)
-
-# output
-
 "Execution stats: first-order stationary"
 ```
 """
@@ -149,7 +125,79 @@ end
 
 SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
 
-@doc (@doc FomoSolver) mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
+"""
+    fo(nlp; kwargs...)
+    R2(nlp; kwargs...)
+    TR(nlp; kwargs...)
+
+A First-Order (FO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model.
+
+For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
+
+    solver = FoSolver(nlp)
+    solve!(solver, nlp; kwargs...)
+
+`R2` and `TR` runs `fo` with the dedicated `step_backend` keyword argument.
+
+# Arguments
+- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
+
+# Keyword arguments 
+- `x::V = nlp.meta.x0`: the initial guess.
+- `atol::T = √eps(T)`: absolute tolerance.
+- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
+- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
+- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
+- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
+- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
+- `max_time::Float64 = 30.0`: maximum time limit in seconds.
+- `max_iter::Int = typemax(Int)`: maximum number of iterations.
+- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
+- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
+
+# Output
+The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
+
+# Callback
+The callback is called at each iteration.
+The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
+Changing any of the input arguments will affect the subsequent iterations.
+In particular, setting `stats.status = :user || stats.stats = :unknown` will stop the algorithm.
+All relevant information should be available in `nlp` and `solver`.
+Notably, you can access, and modify, the following:
+- `solver.x`: current iterate;
+- `solver.gx`: current gradient;
+- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things:
+    - `stats.dual_feas`: norm of current gradient;
+    - `stats.iter`: current iteration counter;
+    - `stats.objective`: current objective function value;
+    - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
+    - `stats.elapsed_time`: elapsed time in seconds.
+
+# Examples
+
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+stats = fo(nlp) # run with step_backend = r2_step(), equivalent to R2(nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+solver = FoSolver(nlp);
+stats = solve!(solver, nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+"""
+mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
   x::V
   g::V
   c::V
@@ -163,17 +211,17 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   return FoSolver{T, V}(x, g, c, T(0))
 end
 
-@doc (@doc FomoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FoSolver(nlp)
   stats = GenericExecutionStats(nlp)
   return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
 end
 
-@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+@doc (@doc FoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   fo(nlp; step_backend = r2_step(), kwargs...)
 end
 
-@doc (@doc FomoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+@doc (@doc FoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   fo(nlp; step_backend = tr_step(), kwargs...)
 end
 
@@ -184,7 +232,7 @@ end
 SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver)
 
 function SolverCore.solve!(
-  solver::Union{FoSolver,FomoSolver},
+  solver::Union{FoSolver, FomoSolver},
   nlp::AbstractNLPModel{T, V},
   stats::GenericExecutionStats{T, V};
   callback = (args...) -> nothing,
@@ -233,7 +281,7 @@ function SolverCore.solve!(
   ϵ = atol + rtol * norm_∇fk
   optimal = norm_∇fk ≤ ϵ
   header = ["iter", "f", "‖∇f‖"]
-  is_r2 ? push!(header,"σ") : push!(header,"Δ")
+  is_r2 ? push!(header, "σ") : push!(header, "Δ")
   if optimal
     @info("Optimal point found at initial point")
     if is_r2

From 0e2fdff0b05797b8d2598a9ca916c422c765fb02 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Fri, 8 Mar 2024 11:31:07 -0500
Subject: [PATCH 069/171] deprecate R2Solver

---
 src/fomo.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 2e2527e1..002ba712 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -211,6 +211,8 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   return FoSolver{T, V}(x, g, c, T(0))
 end
 
+Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(nlp::AbstractNLPModel; kwargs...)
+
 @doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FoSolver(nlp)
   stats = GenericExecutionStats(nlp)

From 9e58f9011d829f1d047c607a2c129321e4d166e0 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:13:41 -0500
Subject: [PATCH 070/171] Update src/fomo.jl

Co-authored-by: Tangi Migot <tangi.migot@gmail.com>
---
 src/fomo.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 002ba712..9009c36a 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -211,6 +211,11 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   return FoSolver{T, V}(x, g, c, T(0))
 end
 
+"""
+    `R2Solver` is deprecated, please check the documentation of `R2`.
+"""
+mutable struct R2Solver{T, V} <: AbstractOptimizationSolver end
+
 Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(nlp::AbstractNLPModel; kwargs...)
 
 @doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}

From b02a20f73fdfdcbacea9231053fa55785a11b3d6 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:14:27 -0500
Subject: [PATCH 071/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 9009c36a..ba22964b 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -13,7 +13,7 @@ A First-Order with MOmentum (FOMO) model-based method for unconstrained optimiza
 
 # Algorithm description
 
-The step is perform along d with
+The step is computed along
 d = - (1-βmax) .* ∇f(xk) - βmax .* mk
 with mk the memory of past gradients (initiated with 0) updated at each successful iteration as
 mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax

From 670e815761736defa4a2c6a4754258886d4782bb Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:14:45 -0500
Subject: [PATCH 072/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index ba22964b..20b5cce1 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -15,7 +15,7 @@ A First-Order with MOmentum (FOMO) model-based method for unconstrained optimiza
 
 The step is computed along
 d = - (1-βmax) .* ∇f(xk) - βmax .* mk
-with mk the memory of past gradients (initiated with 0) updated at each successful iteration as
+with mk the memory of past gradients (initialized at 0), and updated at each successful iteration as
 mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax
 and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied:
 (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (1)

From ee02796e626743bad740f9a92a7f66228d2dd2c0 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:15:14 -0500
Subject: [PATCH 073/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 20b5cce1..35d497a4 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -22,6 +22,7 @@ and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the follow
 ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖       (2)
 
 # Advanced usage
+
 For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
 
     solver = FomoSolver(nlp)

From 4d6e0e335fbf99c9776a5c1c57a9194809a57445 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:15:36 -0500
Subject: [PATCH 074/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 35d497a4..ac78ecbc 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -31,6 +31,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 **No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `fo` method.
     
 # Arguments
+
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
 # Keyword arguments 

From 1eafb0d36dcf9ecccdc75676ee970ab1becff227 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:15:57 -0500
Subject: [PATCH 075/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index ac78ecbc..88f42570 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -35,6 +35,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
 # Keyword arguments 
+
 - `x::V = nlp.meta.x0`: the initial guess.
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.

From ae75016ec86efa8f031b7797d3194c7bb729690f Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:16:14 -0500
Subject: [PATCH 076/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 88f42570..edc5608a 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -432,7 +432,7 @@ function SolverCore.solve!(
 end
 
 """
-find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
+    find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
 
 Compute βmax which saturates the contibution of the momentum term to the gradient.
 `βmax` is computed such that the two gradient-related conditions are ensured: 

From 4fe1a1403b8de444f936a176bec40f602540e0b5 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:16:30 -0500
Subject: [PATCH 077/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index edc5608a..6cf1e221 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -434,7 +434,7 @@ end
 """
     find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
 
-Compute βmax which saturates the contibution of the momentum term to the gradient.
+Compute value `βmax` that saturates the contribution of the momentum term to the gradient.
 `βmax` is computed such that the two gradient-related conditions are ensured: 
 1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖²
 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖

From df12e042e6e474c9ea8cf1c1dbb79fb8ffa983ab Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:16:48 -0500
Subject: [PATCH 078/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 6cf1e221..2043fa07 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -449,8 +449,8 @@ function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where
 end
 
 """
-  init_alpha(norm_∇fk::T, ::r2_step)
-  init_alpha(norm_∇fk::T, ::tr_step)
+    init_alpha(norm_∇fk::T, ::r2_step)
+    init_alpha(norm_∇fk::T, ::tr_step)
 
 Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
 """

From 22c1de5a7c048a18a701386b6f4952e2b8d6dbfb Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:17:11 -0500
Subject: [PATCH 079/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 2043fa07..b5ebcdfc 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -452,7 +452,8 @@ end
     init_alpha(norm_∇fk::T, ::r2_step)
     init_alpha(norm_∇fk::T, ::tr_step)
 
-Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
+Initialize `α` step size parameter.
+Ensure first step is the same for quadratic regularization and trust region methods.
 """
 function init_alpha(norm_∇fk::T, ::r2_step) where {T}
   1 / 2^round(log2(norm_∇fk + 1))

From 6f63df08b0198beca21001a3792c2b66961a6899 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:17:25 -0500
Subject: [PATCH 080/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index b5ebcdfc..96c994c3 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -464,8 +464,8 @@ function init_alpha(norm_∇fk::T, ::tr_step) where {T}
 end
 
 """
-  step_mult(α::T, norm_∇fk::T, ::r2_step)
-  step_mult(α::T, norm_∇fk::T, ::tr_step)
+    step_mult(α::T, norm_∇fk::T, ::r2_step)
+    step_mult(α::T, norm_∇fk::T, ::tr_step)
 
 Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`).
 """

From d350cdbd1b3fb894aead9df71ef2c608fc439258 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:17:48 -0500
Subject: [PATCH 081/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 96c994c3..917f1fff 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -43,7 +43,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
 - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
 - `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
-- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
+- `max_eval::Int = -1`: maximum number of objective evaluations.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.

From 9ec41f8de9d77a0f75b3337d836c3d891be3c9e3 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:19:42 -0500
Subject: [PATCH 082/171] Apply suggestions from code review

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 917f1fff..560d7505 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -46,16 +46,18 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `max_eval::Int = -1`: maximum number of objective evaluations.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
-- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (1).
-- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (2). 
+- `β = T(0.9) ∈ [0,1)`: target decay rate for the momentum.
+- `θ1 = T(0.1)`: momentum contribution parameter for convergence condition (1).
+- `θ2 = T(eps(T)^(1/3))`: momentum contribution parameter for convergence condition (2). 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
 # Output
+
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
 
 # Callback
+
 The callback is called at each iteration.
 The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
 Changing any of the input arguments will affect the subsequent iterations.
@@ -72,7 +74,9 @@ Notably, you can access, and modify, the following:
     - `stats.elapsed_time`: elapsed time in seconds.
 
 # Examples
+
 ## `fomo`
+
 ```jldoctest
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))
@@ -143,9 +147,11 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 `R2` and `TR` runs `fo` with the dedicated `step_backend` keyword argument.
 
 # Arguments
+
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
 # Keyword arguments 
+
 - `x::V = nlp.meta.x0`: the initial guess.
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
@@ -159,9 +165,11 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
 # Output
+
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
 
 # Callback
+
 The callback is called at each iteration.
 The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
 Changing any of the input arguments will affect the subsequent iterations.

From eacf70b81ab2b314a97019cb8b72a5c95e273e7e Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 21 Dec 2023 12:04:12 -0500
Subject: [PATCH 083/171] add fomo solver

---
 docs/src/solvers.md  |   4 +-
 src/JSOSolvers.jl    |   1 +
 src/fomo.jl          | 288 +++++++++++++++++++++++++++++++++++++++++++
 test/allocs.jl       |   2 +-
 test/callback.jl     |   5 +
 test/consistency.jl  |   5 +-
 test/restart.jl      |   2 +
 test/runtests.jl     |   2 +-
 test/test_solvers.jl |   1 +
 9 files changed, 305 insertions(+), 5 deletions(-)
 create mode 100644 src/fomo.jl

diff --git a/docs/src/solvers.md b/docs/src/solvers.md
index 06fe0eed..322f7c2e 100644
--- a/docs/src/solvers.md
+++ b/docs/src/solvers.md
@@ -6,10 +6,11 @@
 - [`tron`](@ref)
 - [`trunk`](@ref)
 - [`R2`](@ref)
+- [`fomo`](@ref)
 
 | Problem type          | Solvers  |
 | --------------------- | -------- |
-| Unconstrained NLP     | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref)|
+| Unconstrained NLP     | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref), [`fomo`](@ref)|
 | Unconstrained NLS     | [`trunk`](@ref), [`tron`](@ref) |
 | Bound-constrained NLP | [`tron`](@ref) |
 | Bound-constrained NLS | [`tron`](@ref) |
@@ -21,4 +22,5 @@ lbfgs
 tron
 trunk
 R2
+fomo
 ```
diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl
index cd65c9b2..79abace3 100644
--- a/src/JSOSolvers.jl
+++ b/src/JSOSolvers.jl
@@ -14,6 +14,7 @@ export solve!
 include("lbfgs.jl")
 include("trunk.jl")
 include("R2.jl")
+include("fomo.jl")
 
 # Unconstrained solvers for NLS
 include("trunkls.jl")
diff --git a/src/fomo.jl b/src/fomo.jl
new file mode 100644
index 00000000..711e7fa3
--- /dev/null
+++ b/src/fomo.jl
@@ -0,0 +1,288 @@
+export fomo, FomoSolver, tr, qr
+
+abstract type AbstractFomoMethod end
+
+struct tr <: AbstractFomoMethod end
+struct qr <: AbstractFomoMethod end
+
+"""
+    fomo(nlp; kwargs...)
+
+A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods.
+
+For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
+
+    solver = FomoSolver(nlp)
+    solve!(solver, nlp; kwargs...)
+
+# Arguments
+- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
+
+# Keyword arguments 
+- `x::V = nlp.meta.x0`: the initial guess.
+- `atol::T = √eps(T)`: absolute tolerance.
+- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
+- `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters.
+- `κg = T(0.8)` : maximum contribution of momentum term to the gradient, ||∇f-g||≤κg||g|| with g = (1-β)∇f + β m, with m memory of past gradients. Must satisfy 0 < κg < 1 - η2.
+- `γ1 = T(0.8)`, `γ2 = T(1.2)`: regularization update parameters.
+- `αmax = 1/eps(T)`: step parameter for fomo algorithm.
+- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
+- `max_time::Float64 = 30.0`: maximum time limit in seconds.
+- `max_iter::Int = typemax(Int)`: maximum number of iterations.
+- `β = T(0) ∈ [0,1)` : constant in the momentum term.
+- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
+- `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region
+
+# Output
+The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
+
+# Callback
+The callback is called at each iteration.
+The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
+Changing any of the input arguments will affect the subsequent iterations.
+In particular, setting `stats.status = :user` will stop the algorithm.
+All relevant information should be available in `nlp` and `solver`.
+Notably, you can access, and modify, the following:
+- `solver.x`: current iterate;
+- `solver.gx`: current gradient;
+- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things:
+  - `stats.dual_feas`: norm of current gradient;
+  - `stats.iter`: current iteration counter;
+  - `stats.objective`: current objective function value;
+  - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
+  - `stats.elapsed_time`: elapsed time in seconds.
+
+# Examples
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+stats = fomo(nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+solver = FomoSolver(nlp);
+stats = solve!(solver, nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+"""
+mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver
+  x::V
+  g::V
+  c::V
+  m::V
+end
+
+function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
+  x = similar(nlp.meta.x0)
+  g = similar(nlp.meta.x0)
+  c = similar(nlp.meta.x0)
+  m = fill!(similar(nlp.meta.x0), 0)
+  return FomoSolver{T, V}(x, g, c, m)
+end
+
+@doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+  solver = FomoSolver(nlp)
+  return solve!(solver, nlp; kwargs...)
+end
+
+function SolverCore.reset!(solver::FomoSolver{T}) where {T}
+  fill!(solver.m,0)
+  solver
+end
+SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
+
+function SolverCore.solve!(
+  solver::FomoSolver{T, V},
+  nlp::AbstractNLPModel{T, V},
+  stats::GenericExecutionStats{T, V};
+  callback = (args...) -> nothing,
+  x::V = nlp.meta.x0,
+  atol::T = √eps(T),
+  rtol::T = √eps(T),
+  η1 = eps(T)^(1 / 4),
+  η2 = T(0.2),
+  κg = T(0.8),
+  γ1 = T(0.5),
+  γ2 = T(2),
+  αmax = 1/eps(T),
+  max_time::Float64 = 30.0,
+  max_eval::Int = -1,
+  max_iter::Int = typemax(Int),
+  β::T = T(0.9),
+  verbose::Int = 0,
+  backend = qr()
+) where {T, V}
+  unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
+
+  reset!(stats)
+  start_time = time()
+  set_time!(stats, 0.0)
+
+  x = solver.x .= x
+  ∇fk = solver.g
+  c = solver.c
+  m = solver.m
+
+  set_iter!(stats, 0)
+  set_objective!(stats, obj(nlp, x))
+
+  grad!(nlp, x, ∇fk)
+  norm_∇fk = norm(∇fk)
+  set_dual_residual!(stats, norm_∇fk)
+
+  αk = init_alpha(norm_∇fk,backend)
+  
+  # Stopping criterion: 
+  ϵ = atol + rtol * norm_∇fk
+  optimal = norm_∇fk ≤ ϵ
+  if optimal
+    @info("Optimal point found at initial point")
+    @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
+    @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk
+  end
+  if verbose > 0 && mod(stats.iter, verbose) == 0
+    @info @sprintf "%5s  %9s  %7s  %7s  %7s" "iter" "f" "‖∇f‖" "α" "staβ"
+    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN
+  end
+
+  set_status!(
+    stats,
+    get_status(
+      nlp,
+      elapsed_time = stats.elapsed_time,
+      optimal = optimal,
+      max_eval = max_eval,
+      iter = stats.iter,
+      max_iter = max_iter,
+      max_time = max_time,
+    ),
+  )
+
+  callback(nlp, solver, stats)
+
+  done = stats.status != :unknown
+
+  while !done
+    λk = step_mult(αk,norm_∇fk,backend)
+    if β == 0
+      c .= x .- λk .* (∇fk)
+    else
+      satβ = find_beta(β, κg, m, ∇fk)
+      c .= x .- λk .* (∇fk .* (T(1) - satβ) .+ m .* satβ)
+      m .= ∇fk .* (T(1) - β) .+ m .* β
+    end
+    ΔTk = norm_∇fk^2 * λk
+    fck = obj(nlp, c)
+    if fck == -Inf
+      set_status!(stats, :unbounded)
+      break
+    end
+
+    ρk = (stats.objective - fck) / ΔTk
+
+    # Update regularization parameters
+    if ρk >= η2
+      αk = min(αmax, γ2 * αk)
+    elseif ρk < η1
+      αk = αk * γ1
+    end
+
+    # Acceptance of the new candidate
+    if ρk >= η1
+      x .= c
+      set_objective!(stats, fck)
+      grad!(nlp, x, ∇fk)
+      norm_∇fk = norm(∇fk)
+    end
+
+    set_iter!(stats, stats.iter + 1)
+    set_time!(stats, time() - start_time)
+    set_dual_residual!(stats, norm_∇fk)
+    optimal = norm_∇fk ≤ ϵ
+
+    if verbose > 0 && mod(stats.iter, verbose) == 0
+      @info infoline
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk satβ
+    end
+
+    set_status!(
+      stats,
+      get_status(
+        nlp,
+        elapsed_time = stats.elapsed_time,
+        optimal = optimal,
+        max_eval = max_eval,
+        iter = stats.iter,
+        max_iter = max_iter,
+        max_time = max_time,
+      ),
+    )
+
+    callback(nlp, solver, stats)
+
+    done = stats.status != :unknown
+  end
+
+  set_solution!(stats, x)
+  return stats
+end
+
+"""
+  find_beta(β,κg,d,∇f;tol=0.01)
+
+Compute satβ which saturates the contibution of the momentum term to the gradient.
+Use bisection method to solve satβ * ||∇f .- d|| = κg * ||(1-satβ) .* ∇f + satβ .* d|| where d is the momentum term.
+""" 
+function find_beta(β::T,κg::T,d::V,∇f::V;tol=0.01) where {T,V}
+  if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0.
+    return β
+  end
+  a = T(0)
+  b = β 
+  while b-a > tol
+    β = (b+a) / 2
+    if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + b .* d) <= 0     
+      a = β
+    else
+      b = β
+    end
+  end
+  return β
+end
+
+"""
+  init_alpha(norm_∇fk::T, ::qr)
+  init_alpha(norm_∇fk::T, ::tr)
+
+Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
+"""
+function init_alpha(norm_∇fk::T, ::qr) where{T}
+  1/2^round(log2(norm_∇fk + 1))
+end
+
+function init_alpha(norm_∇fk::T, ::tr) where{T}
+  norm_∇fk/2^round(log2(norm_∇fk + 1))
+end
+
+"""
+  step_mult(αk::T, norm_∇fk::T, ::qr)
+  step_mult(αk::T, norm_∇fk::T, ::tr)
+
+Compute step size multiplier: `αk` for quadratic regularization(`::qr`) and `αk/norm_∇fk` for trust region (`::tr`).
+"""
+function step_mult(αk::T, norm_∇fk::T, ::qr) where{T}
+  αk
+end
+
+function step_mult(αk::T, norm_∇fk::T, ::tr) where{T}
+  αk/norm_∇fk
+end
\ No newline at end of file
diff --git a/test/allocs.jl b/test/allocs.jl
index b02b6621..f029c5f1 100644
--- a/test/allocs.jl
+++ b/test/allocs.jl
@@ -30,7 +30,7 @@ end
 
 if Sys.isunix()
   @testset "Allocation tests" begin
-    @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :TrunkSolver, :TronSolver)
+    @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver)
       for model in NLPModelsTest.nlp_problems
         nlp = eval(Meta.parse(model))()
         if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver))
diff --git a/test/callback.jl b/test/callback.jl
index f43796fd..ddadc799 100644
--- a/test/callback.jl
+++ b/test/callback.jl
@@ -31,6 +31,11 @@ using ADNLPModels, JSOSolvers, LinearAlgebra, Logging #, Plots
     tron(nlp, callback = cb)
   end
   @test stats.iter == 8
+
+  stats = with_logger(NullLogger()) do
+    fomo(nlp, callback = cb)
+  end
+  @test stats.iter == 8
 end
 
 @testset "Test callback for NLS" begin
diff --git a/test/consistency.jl b/test/consistency.jl
index 94569dca..fb725b5b 100644
--- a/test/consistency.jl
+++ b/test/consistency.jl
@@ -10,8 +10,9 @@ function consistency()
   @testset "Consistency" begin
     args = Pair{Symbol, Number}[:atol => 1e-6, :rtol => 1e-6, :max_eval => 20000, :max_time => 60.0]
 
-    @testset "NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2]
+    @testset "NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo]
       with_logger(NullLogger()) do
+        reset!(unlp)
         stats = mtd(unlp; args...)
         @test stats isa GenericExecutionStats
         @test stats.status == :first_order
@@ -27,7 +28,7 @@ function consistency()
       end
     end
 
-    @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2]
+    @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo]
       with_logger(NullLogger()) do
         reset!(qnlp)
         stats = mtd(qnlp; args...)
diff --git a/test/restart.jl b/test/restart.jl
index 02d90902..98f82103 100644
--- a/test/restart.jl
+++ b/test/restart.jl
@@ -1,5 +1,6 @@
 @testset "Test restart with a different initial guess: $fun" for (fun, s) in (
   (:R2, :R2Solver),
+  (:fomo, :FomoSolver),
   (:lbfgs, :LBFGSSolver),
   (:tron, :TronSolver),
   (:trunk, :TrunkSolver),
@@ -44,6 +45,7 @@ end
 
 @testset "Test restart with a different problem: $fun" for (fun, s) in (
   (:R2, :R2Solver),
+  (:fomo, :FomoSolver),
   (:lbfgs, :LBFGSSolver),
   (:tron, :TronSolver),
   (:trunk, :TrunkSolver),
diff --git a/test/runtests.jl b/test/runtests.jl
index de0295ed..bb41eeba 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -18,7 +18,7 @@ using JSOSolvers
 end
 
 @testset "Test iteration limit" begin
-  @testset "$fun" for fun in (R2, lbfgs, tron, trunk)
+  @testset "$fun" for fun in (R2, fomo, lbfgs, tron, trunk)
     f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2
     nlp = ADNLPModel(f, [-1.2; 1.0])
 
diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index cb41e83e..ddad51e8 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -8,6 +8,7 @@ function tests()
         ("lbfgs", lbfgs),
         ("tron", tron),
         ("R2", R2),
+        ("fomo", fomo),
       ]
         unconstrained_nlp(solver)
         multiprecision_nlp(solver, :unc)

From 48ca6f65df3d16b038ab1e76c23c1f190d98fb16 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 28 Dec 2023 16:54:39 -0500
Subject: [PATCH 084/171] fix update rule, fix find_beta algo

---
 src/fomo.jl | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 711e7fa3..41e74145 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -151,7 +151,7 @@ function SolverCore.solve!(
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
     @info @sprintf "%5s  %9s  %7s  %7s  %7s" "iter" "f" "‖∇f‖" "α" "staβ"
-    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN
+    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk 0
   end
 
   set_status!(
@@ -171,14 +171,13 @@ function SolverCore.solve!(
 
   done = stats.status != :unknown
 
+  satβ = T(0)
   while !done
     λk = step_mult(αk,norm_∇fk,backend)
     if β == 0
       c .= x .- λk .* (∇fk)
     else
-      satβ = find_beta(β, κg, m, ∇fk)
       c .= x .- λk .* (∇fk .* (T(1) - satβ) .+ m .* satβ)
-      m .= ∇fk .* (T(1) - β) .+ m .* β
     end
     ΔTk = norm_∇fk^2 * λk
     fck = obj(nlp, c)
@@ -186,9 +185,9 @@ function SolverCore.solve!(
       set_status!(stats, :unbounded)
       break
     end
-
+    
     ρk = (stats.objective - fck) / ΔTk
-
+    
     # Update regularization parameters
     if ρk >= η2
       αk = min(αmax, γ2 * αk)
@@ -199,9 +198,15 @@ function SolverCore.solve!(
     # Acceptance of the new candidate
     if ρk >= η1
       x .= c
+      if β!=0
+        m .= ∇fk .* (T(1) - β) .+ m .* β
+      end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
+      if β!=0
+        satβ = find_beta(β, κg, m, ∇fk)
+      end
     end
 
     set_iter!(stats, stats.iter + 1)
@@ -250,13 +255,13 @@ function find_beta(β::T,κg::T,d::V,∇f::V;tol=0.01) where {T,V}
   b = β 
   while b-a > tol
     β = (b+a) / 2
-    if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + b .* d) <= 0     
+    if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0     
       a = β
     else
       b = β
     end
   end
-  return β
+  return a
 end
 
 """

From 454a8e1238e2ae4c56949fb4f77724485574548e Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Fri, 5 Jan 2024 13:37:59 -0500
Subject: [PATCH 085/171] modify with gradient related strategy

---
 src/fomo.jl | 69 +++++++++++++++++++++++++++++------------------------
 1 file changed, 38 insertions(+), 31 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 41e74145..db77937e 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -23,8 +23,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters.
-- `κg = T(0.8)` : maximum contribution of momentum term to the gradient, ||∇f-g||≤κg||g|| with g = (1-β)∇f + β m, with m memory of past gradients. Must satisfy 0 < κg < 1 - η2.
-- `γ1 = T(0.8)`, `γ2 = T(1.2)`: regularization update parameters.
+- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
 - `αmax = 1/eps(T)`: step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
@@ -79,6 +78,7 @@ mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver
   g::V
   c::V
   m::V
+  d::V
 end
 
 function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
@@ -86,7 +86,8 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   g = similar(nlp.meta.x0)
   c = similar(nlp.meta.x0)
   m = fill!(similar(nlp.meta.x0), 0)
-  return FomoSolver{T, V}(x, g, c, m)
+  d = fill!(similar(nlp.meta.x0), 0)
+  return FomoSolver{T, V}(x, g, c, m, d)
 end
 
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
@@ -109,8 +110,7 @@ function SolverCore.solve!(
   atol::T = √eps(T),
   rtol::T = √eps(T),
   η1 = eps(T)^(1 / 4),
-  η2 = T(0.2),
-  κg = T(0.8),
+  η2 = T(0.95),
   γ1 = T(0.5),
   γ2 = T(2),
   αmax = 1/eps(T),
@@ -131,7 +131,7 @@ function SolverCore.solve!(
   ∇fk = solver.g
   c = solver.c
   m = solver.m
-
+  d = solver.d
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
@@ -171,15 +171,23 @@ function SolverCore.solve!(
 
   done = stats.status != :unknown
 
+  d .= ∇fk
+  norm_d = norm_∇fk
   satβ = T(0)
+  ρk = T(0)
   while !done
-    λk = step_mult(αk,norm_∇fk,backend)
-    if β == 0
-      c .= x .- λk .* (∇fk)
-    else
-      c .= x .- λk .* (∇fk .* (T(1) - satβ) .+ m .* satβ)
-    end
-    ΔTk = norm_∇fk^2 * λk
+    # if β!=0
+    #   satβ = find_beta(β, m, ∇fk, norm_∇fk)
+    #   d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
+    #   m .= ∇fk .* (T(1) - β) .+ m .* β
+    #   norm_d = norm(d)
+    # else
+    #   d .= ∇fk
+    #   norm_d = norm_∇fk
+    # end
+    λk = step_mult(αk,norm_d,backend)
+    c .= x .- λk .* d
+    ΔTk = norm_∇fk^2 *λk
     fck = obj(nlp, c)
     if fck == -Inf
       set_status!(stats, :unbounded)
@@ -187,6 +195,7 @@ function SolverCore.solve!(
     end
     
     ρk = (stats.objective - fck) / ΔTk
+    # ρk = (1-β) * (stats.objective - fck) / ΔTk +β * ρk
     
     # Update regularization parameters
     if ρk >= η2
@@ -204,9 +213,15 @@ function SolverCore.solve!(
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
-      if β!=0
-        satβ = find_beta(β, κg, m, ∇fk)
+      if β!= 0
+        satβ = find_beta(β, m, ∇fk, norm_∇fk)
+        d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
+        norm_d = norm(d)
+      else
+        d .= ∇fk
+        norm_d = norm_∇fk
       end
+      
     end
 
     set_iter!(stats, stats.iter + 1)
@@ -216,7 +231,7 @@ function SolverCore.solve!(
 
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk satβ
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk satβ
     end
 
     set_status!(
@@ -242,26 +257,18 @@ function SolverCore.solve!(
 end
 
 """
-  find_beta(β,κg,d,∇f;tol=0.01)
+  find_beta(β,m,∇f,norm_∇f,θ)
 
 Compute satβ which saturates the contibution of the momentum term to the gradient.
-Use bisection method to solve satβ * ||∇f .- d|| = κg * ||(1-satβ) .* ∇f + satβ .* d|| where d is the momentum term.
+satβ is computed such that m.∇f > θ * norm_∇f^2
 """ 
-function find_beta(β::T,κg::T,d::V,∇f::V;tol=0.01) where {T,V}
-  if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0.
+function find_beta(β::T,m::V,∇f::V,norm_∇f::T;θ = T(1e-1)) where {T,V}
+  dotprod = dot(m,∇f)
+  if dotprod > θ * norm_∇f^2
     return β
+  else
+    return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β)
   end
-  a = T(0)
-  b = β 
-  while b-a > tol
-    β = (b+a) / 2
-    if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0     
-      a = β
-    else
-      b = β
-    end
-  end
-  return a
 end
 
 """

From 0a75bb257d6cab537e58fb919aae9338ac560d28 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Fri, 5 Jan 2024 15:11:20 -0500
Subject: [PATCH 086/171] fix model decrease computation

---
 src/fomo.jl | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index db77937e..e2b582e8 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -176,18 +176,9 @@ function SolverCore.solve!(
   satβ = T(0)
   ρk = T(0)
   while !done
-    # if β!=0
-    #   satβ = find_beta(β, m, ∇fk, norm_∇fk)
-    #   d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
-    #   m .= ∇fk .* (T(1) - β) .+ m .* β
-    #   norm_d = norm(d)
-    # else
-    #   d .= ∇fk
-    #   norm_d = norm_∇fk
-    # end
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
-    ΔTk = norm_∇fk^2 *λk
+    ΔTk = dot(∇fk , d) * λk
     fck = obj(nlp, c)
     if fck == -Inf
       set_status!(stats, :unbounded)

From f981b2feb1351fd0b3ba34de74a517bf52353a20 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Mon, 15 Jan 2024 15:48:56 -0500
Subject: [PATCH 087/171] fix find_beta function

---
 src/fomo.jl | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index e2b582e8..ed3a1340 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -175,6 +175,7 @@ function SolverCore.solve!(
   norm_d = norm_∇fk
   satβ = T(0)
   ρk = T(0)
+  #μ = αk
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
@@ -184,10 +185,7 @@ function SolverCore.solve!(
       set_status!(stats, :unbounded)
       break
     end
-    
     ρk = (stats.objective - fck) / ΔTk
-    # ρk = (1-β) * (stats.objective - fck) / ΔTk +β * ρk
-    
     # Update regularization parameters
     if ρk >= η2
       αk = min(αmax, γ2 * αk)
@@ -199,8 +197,11 @@ function SolverCore.solve!(
     if ρk >= η1
       x .= c
       if β!=0
+        #μ = αk * (T(1) - β) + αk * β
+        #m .= (αk/μ) .* ∇fk .* (T(1) - β) .+ m .* β
         m .= ∇fk .* (T(1) - β) .+ m .* β
       end
+      #αk = μ
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
@@ -222,7 +223,7 @@ function SolverCore.solve!(
 
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk satβ
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk satβ
     end
 
     set_status!(
@@ -255,10 +256,11 @@ satβ is computed such that m.∇f > θ * norm_∇f^2
 """ 
 function find_beta(β::T,m::V,∇f::V,norm_∇f::T;θ = T(1e-1)) where {T,V}
   dotprod = dot(m,∇f)
-  if dotprod > θ * norm_∇f^2
+  if (1-β)*norm_∇f^2 + β*dotprod > θ * norm_∇f^2
     return β
   else
-    return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β)
+    return ((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod)
+    #return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β)
   end
 end
 

From 7007e521fb33db94588e69e5b73c0586418cc94d Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 25 Jan 2024 11:53:34 -0500
Subject: [PATCH 088/171] fix null step size issue

---
 src/fomo.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index ed3a1340..a0a543ee 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -238,7 +238,8 @@ function SolverCore.solve!(
         max_time = max_time,
       ),
     )
-
+    
+    αk == 0 && set_status!(stats,:exception)
     callback(nlp, solver, stats)
 
     done = stats.status != :unknown

From 943d788a969d74bcc62eb02d7ffb2230cd0f4b71 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 25 Jan 2024 12:33:40 -0500
Subject: [PATCH 089/171] fix test

---
 test/test_solvers.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index ddad51e8..ba182731 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -8,7 +8,8 @@ function tests()
         ("lbfgs", lbfgs),
         ("tron", tron),
         ("R2", R2),
-        ("fomo", fomo),
+        ("fomo_r2", fomo),
+        ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,backend = JSOSolvers.tr(); kwargs...)),
       ]
         unconstrained_nlp(solver)
         multiprecision_nlp(solver, :unc)

From 3555443b73785acb416465917addd8505b82dbb2 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 25 Jan 2024 12:49:29 -0500
Subject: [PATCH 090/171] update docstring

---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index a0a543ee..215d2d94 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -28,7 +28,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `β = T(0) ∈ [0,1)` : constant in the momentum term.
+- `β = T(0) ∈ [0,1)` : decay rate for the momentum.
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region
 

From 5e637393738f7974634c35861c874956c43f7d0a Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Mon, 29 Jan 2024 15:14:16 -0500
Subject: [PATCH 091/171] add average sat beta to genericexecutionstat

---
 src/fomo.jl | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 215d2d94..af4f521f 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -92,7 +92,9 @@ end
 
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FomoSolver(nlp)
-  return solve!(solver, nlp; kwargs...)
+  solver_specific = Dict(:avgsatβ => T(0.))
+  stats = GenericExecutionStats(nlp;solver_specific=solver_specific)
+  return solve!(solver, nlp, stats; kwargs...)
 end
 
 function SolverCore.reset!(solver::FomoSolver{T}) where {T}
@@ -175,6 +177,8 @@ function SolverCore.solve!(
   norm_d = norm_∇fk
   satβ = T(0)
   ρk = T(0)
+  avgsatβ = T(0.)
+  siter = 0
   #μ = αk
   while !done
     λk = step_mult(αk,norm_d,backend)
@@ -213,7 +217,8 @@ function SolverCore.solve!(
         d .= ∇fk
         norm_d = norm_∇fk
       end
-      
+      avgsatβ += satβ
+      siter += 1
     end
 
     set_iter!(stats, stats.iter + 1)
@@ -245,6 +250,8 @@ function SolverCore.solve!(
     done = stats.status != :unknown
   end
 
+  avgsatβ /= siter
+  stats.solver_specific[:avgsatβ] = avgsatβ
   set_solution!(stats, x)
   return stats
 end

From 23bc42216e0b284d5abebbc5b6a607acf55e52b7 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 30 Jan 2024 15:14:18 -0500
Subject: [PATCH 092/171] add theta param as key arg

---
 src/fomo.jl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index af4f521f..a746483e 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -29,6 +29,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0) ∈ [0,1)` : decay rate for the momentum.
+- `θ = T(0.1)` : momentum contribution restriction parameter. [(1-β)∇f(xk) + β mk].[∇f(xk)] ≥ θ||∇f(xk)||², with mk memory of past gradient. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region
 
@@ -120,6 +121,7 @@ function SolverCore.solve!(
   max_eval::Int = -1,
   max_iter::Int = typemax(Int),
   β::T = T(0.9),
+  θ::T = T(0.1),
   verbose::Int = 0,
   backend = qr()
 ) where {T, V}
@@ -210,7 +212,7 @@ function SolverCore.solve!(
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if β!= 0
-        satβ = find_beta(β, m, ∇fk, norm_∇fk)
+        satβ = find_beta(β, m, ∇fk, norm_∇fk, θ)
         d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
         norm_d = norm(d)
       else
@@ -262,13 +264,12 @@ end
 Compute satβ which saturates the contibution of the momentum term to the gradient.
 satβ is computed such that m.∇f > θ * norm_∇f^2
 """ 
-function find_beta(β::T,m::V,∇f::V,norm_∇f::T;θ = T(1e-1)) where {T,V}
+function find_beta(β::T,m::V,∇f::V,norm_∇f::T, θ::T) where {T,V}
   dotprod = dot(m,∇f)
   if (1-β)*norm_∇f^2 + β*dotprod > θ * norm_∇f^2
     return β
   else
     return ((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod)
-    #return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β)
   end
 end
 

From bee01c3b80fc80ac740f85d5fa287b4f8ca454b6 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 1 Feb 2024 12:25:34 -0500
Subject: [PATCH 093/171] update convergence conditions in find_beta. add
 satbeta decrease strategy if iteration is unsuccessful.

---
 src/fomo.jl | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index a746483e..557b1a29 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -24,12 +24,14 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters.
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
+- `γ3 = T(1/2)` : momentum factor satβ update parameter in case of unsuccessful iteration.
 - `αmax = 1/eps(T)`: step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `β = T(0) ∈ [0,1)` : decay rate for the momentum.
-- `θ = T(0.1)` : momentum contribution restriction parameter. [(1-β)∇f(xk) + β mk].[∇f(xk)] ≥ θ||∇f(xk)||², with mk memory of past gradient. 
+- `β = T(0) ∈ [0,1)` : target decay rate for the momentum.
+- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β].
+- `θ2 = T(1e-5)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region
 
@@ -116,12 +118,14 @@ function SolverCore.solve!(
   η2 = T(0.95),
   γ1 = T(0.5),
   γ2 = T(2),
+  γ3 = T(1/2),
   αmax = 1/eps(T),
   max_time::Float64 = 30.0,
   max_eval::Int = -1,
   max_iter::Int = typemax(Int),
   β::T = T(0.9),
-  θ::T = T(0.1),
+  θ1::T = T(1e-5),
+  θ2::T = T(1e-5),
   verbose::Int = 0,
   backend = qr()
 ) where {T, V}
@@ -181,6 +185,7 @@ function SolverCore.solve!(
   ρk = T(0)
   avgsatβ = T(0.)
   siter = 0
+
   #μ = αk
   while !done
     λk = step_mult(αk,norm_d,backend)
@@ -197,6 +202,8 @@ function SolverCore.solve!(
       αk = min(αmax, γ2 * αk)
     elseif ρk < η1
       αk = αk * γ1
+      satβ *= γ3
+      d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
     end
 
     # Acceptance of the new candidate
@@ -212,7 +219,7 @@ function SolverCore.solve!(
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if β!= 0
-        satβ = find_beta(β, m, ∇fk, norm_∇fk, θ)
+        satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
         norm_d = norm(d)
       else
@@ -259,18 +266,16 @@ function SolverCore.solve!(
 end
 
 """
-  find_beta(β,m,∇f,norm_∇f,θ)
+find_beta(m, ∇f, norm_∇f, β, θ1, θ2)
 
 Compute satβ which saturates the contibution of the momentum term to the gradient.
 satβ is computed such that m.∇f > θ * norm_∇f^2
 """ 
-function find_beta(β::T,m::V,∇f::V,norm_∇f::T, θ::T) where {T,V}
+function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   dotprod = dot(m,∇f)
-  if (1-β)*norm_∇f^2 + β*dotprod > θ * norm_∇f^2
-    return β
-  else
-    return ((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod)
-  end
+  β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β
+  β2 = (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f))
+  return min(β,min(β1,β2)) 
 end
 
 """

From 6ae60cddc5200c2e479f9212a9a3d9d1f2997597 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 1 Feb 2024 12:57:44 -0500
Subject: [PATCH 094/171] fix possible 0 division in find_beta

---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 557b1a29..ff24afb9 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -274,7 +274,7 @@ satβ is computed such that m.∇f > θ * norm_∇f^2
 function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   dotprod = dot(m,∇f)
   β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β
-  β2 = (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f))
+  β2 = m != ∇f             ? (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f))      : β
   return min(β,min(β1,β2)) 
 end
 

From 6e4acfdee309510d48a69decc1d6575834491d44 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 3 Feb 2024 00:16:09 -0500
Subject: [PATCH 095/171] Misc improvments: - update docstrings - rename qr ->
 r2 - remove dead code

---
 src/fomo.jl | 62 ++++++++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index ff24afb9..6a7628a3 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,9 +1,9 @@
-export fomo, FomoSolver, tr, qr
+export fomo, FomoSolver, tr, r2
 
 abstract type AbstractFomoMethod end
 
 struct tr <: AbstractFomoMethod end
-struct qr <: AbstractFomoMethod end
+struct r2 <: AbstractFomoMethod end
 
 """
     fomo(nlp; kwargs...)
@@ -22,18 +22,18 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `x::V = nlp.meta.x0`: the initial guess.
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
-- `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters.
+- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
 - `γ3 = T(1/2)` : momentum factor satβ update parameter in case of unsuccessful iteration.
 - `αmax = 1/eps(T)`: step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `β = T(0) ∈ [0,1)` : target decay rate for the momentum.
+- `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
 - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β].
-- `θ2 = T(1e-5)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. 
+- `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-- `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region
+- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region
 
 # Output
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -114,20 +114,20 @@ function SolverCore.solve!(
   x::V = nlp.meta.x0,
   atol::T = √eps(T),
   rtol::T = √eps(T),
-  η1 = eps(T)^(1 / 4),
-  η2 = T(0.95),
-  γ1 = T(0.5),
-  γ2 = T(2),
-  γ3 = T(1/2),
-  αmax = 1/eps(T),
+  η1::T = T(eps(T)^(1 / 4)),
+  η2::T = T(0.95),
+  γ1::T = T(1/2),
+  γ2::T = T(2),
+  γ3::T = T(1/2),
+  αmax::T = 1/eps(T),
   max_time::Float64 = 30.0,
   max_eval::Int = -1,
   max_iter::Int = typemax(Int),
   β::T = T(0.9),
-  θ1::T = T(1e-5),
-  θ2::T = T(1e-5),
+  θ1::T = T(0.1),
+  θ2::T = T(eps(T)^(1/3)),
   verbose::Int = 0,
-  backend = qr()
+  backend = r2()
 ) where {T, V}
   unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
 
@@ -183,10 +183,9 @@ function SolverCore.solve!(
   norm_d = norm_∇fk
   satβ = T(0)
   ρk = T(0)
-  avgsatβ = T(0.)
+  avgsatβ = T(0)
   siter = 0
-
-  #μ = αk
+  oneT = T(1)
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
@@ -203,24 +202,21 @@ function SolverCore.solve!(
     elseif ρk < η1
       αk = αk * γ1
       satβ *= γ3
-      d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
+      d .= ∇fk .* (oneT - satβ) .+ m .* satβ
     end
 
     # Acceptance of the new candidate
     if ρk >= η1
       x .= c
       if β!=0
-        #μ = αk * (T(1) - β) + αk * β
-        #m .= (αk/μ) .* ∇fk .* (T(1) - β) .+ m .* β
-        m .= ∇fk .* (T(1) - β) .+ m .* β
+        m .= ∇fk .* (oneT - β) .+ m .* β
       end
-      #αk = μ
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if β!= 0
         satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2)
-        d .= ∇fk .* (T(1) - satβ) .+ m .* satβ
+        d .= ∇fk .* (oneT - satβ) .+ m .* satβ
         norm_d = norm(d)
       else
         d .= ∇fk
@@ -269,22 +265,26 @@ end
 find_beta(m, ∇f, norm_∇f, β, θ1, θ2)
 
 Compute satβ which saturates the contibution of the momentum term to the gradient.
-satβ is computed such that m.∇f > θ * norm_∇f^2
+`satβ` is computed such that the two gradient-related conditions are ensured: 
+1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||²
+2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||
+with `m` memory of past gradient/
 """ 
 function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   dotprod = dot(m,∇f)
+  diffnorm = norm(m .- ∇f)
   β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β
-  β2 = m != ∇f             ? (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f))      : β
+  β2 = diffnorm != 0       ? (1-θ2)*norm_∇f/(θ2*diffnorm)           : β
   return min(β,min(β1,β2)) 
 end
 
 """
-  init_alpha(norm_∇fk::T, ::qr)
+  init_alpha(norm_∇fk::T, ::r2)
   init_alpha(norm_∇fk::T, ::tr)
 
 Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
 """
-function init_alpha(norm_∇fk::T, ::qr) where{T}
+function init_alpha(norm_∇fk::T, ::r2) where{T}
   1/2^round(log2(norm_∇fk + 1))
 end
 
@@ -293,12 +293,12 @@ function init_alpha(norm_∇fk::T, ::tr) where{T}
 end
 
 """
-  step_mult(αk::T, norm_∇fk::T, ::qr)
+  step_mult(αk::T, norm_∇fk::T, ::r2)
   step_mult(αk::T, norm_∇fk::T, ::tr)
 
-Compute step size multiplier: `αk` for quadratic regularization(`::qr`) and `αk/norm_∇fk` for trust region (`::tr`).
+Compute step size multiplier: `αk` for quadratic regularization(`::r2`) and `αk/norm_∇fk` for trust region (`::tr`).
 """
-function step_mult(αk::T, norm_∇fk::T, ::qr) where{T}
+function step_mult(αk::T, norm_∇fk::T, ::r2) where{T}
   αk
 end
 

From 69d1b4a040036b199eb418d9c87c5bfdf684b7ca Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 3 Feb 2024 14:54:29 -0500
Subject: [PATCH 096/171] fix null denominator in find_beta

---
 src/fomo.jl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 6a7628a3..c983205b 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -272,9 +272,10 @@ with `m` memory of past gradient/
 """ 
 function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   dotprod = dot(m,∇f)
-  diffnorm = norm(m .- ∇f)
-  β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β
-  β2 = diffnorm != 0       ? (1-θ2)*norm_∇f/(θ2*diffnorm)           : β
+  n1 = norm_∇f^2 - dotprod
+  n2 = norm(m .- ∇f)
+  β1 = n1 > 0  ? (1-θ1)*norm_∇f^2/(n1)  : β
+  β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β
   return min(β,min(β1,β2)) 
 end
 

From 9d1997501b7106a218fd57b41bdf4bba3ad57308 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 4 Feb 2024 02:41:30 -0500
Subject: [PATCH 097/171] Make R2 and R2Solver interfaces to fomo solver.
 Delete obsolete R2.jl file.

---
 src/JSOSolvers.jl |   1 -
 src/R2.jl         | 231 ----------------------------------------------
 src/fomo.jl       | 150 ++++++++++++++++++++++++++----
 test/allocs.jl    |   2 +-
 test/restart.jl   |   4 +-
 5 files changed, 134 insertions(+), 254 deletions(-)
 delete mode 100644 src/R2.jl

diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl
index 79abace3..85afc4fe 100644
--- a/src/JSOSolvers.jl
+++ b/src/JSOSolvers.jl
@@ -13,7 +13,6 @@ export solve!
 # Unconstrained solvers
 include("lbfgs.jl")
 include("trunk.jl")
-include("R2.jl")
 include("fomo.jl")
 
 # Unconstrained solvers for NLS
diff --git a/src/R2.jl b/src/R2.jl
deleted file mode 100644
index b7304dc3..00000000
--- a/src/R2.jl
+++ /dev/null
@@ -1,231 +0,0 @@
-export R2, R2Solver
-
-"""
-    R2(nlp; kwargs...)
-
-A first-order quadratic regularization method for unconstrained optimization.
-
-For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`:
-
-    solver = R2Solver(nlp)
-    solve!(solver, nlp; kwargs...)
-
-# Arguments
-- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
-
-# Keyword arguments 
-- `x::V = nlp.meta.x0`: the initial guess.
-- `atol::T = √eps(T)`: absolute tolerance.
-- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
-- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
-- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters.
-- `αmax = 1/eps(T)`: maximum value for step size parameter for R2 algorithm.
-- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
-- `max_time::Float64 = 30.0`: maximum time limit in seconds.
-- `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `β = T(0) ∈ [0,1]` is the constant in the momentum term. If `β == 0`, R2 does not use momentum.
-- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-
-# Output
-The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
-
-# Callback
-The callback is called at each iteration.
-The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
-Changing any of the input arguments will affect the subsequent iterations.
-In particular, setting `stats.status = :user` will stop the algorithm.
-All relevant information should be available in `nlp` and `solver`.
-Notably, you can access, and modify, the following:
-- `solver.x`: current iterate;
-- `solver.gx`: current gradient;
-- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things:
-  - `stats.dual_feas`: norm of current gradient;
-  - `stats.iter`: current iteration counter;
-  - `stats.objective`: current objective function value;
-  - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
-  - `stats.elapsed_time`: elapsed time in seconds.
-
-# Examples
-```jldoctest
-using JSOSolvers, ADNLPModels
-nlp = ADNLPModel(x -> sum(x.^2), ones(3))
-stats = R2(nlp)
-
-# output
-
-"Execution stats: first-order stationary"
-```
-
-```jldoctest
-using JSOSolvers, ADNLPModels
-nlp = ADNLPModel(x -> sum(x.^2), ones(3))
-solver = R2Solver(nlp);
-stats = solve!(solver, nlp)
-
-# output
-
-"Execution stats: first-order stationary"
-```
-"""
-mutable struct R2Solver{T, V} <: AbstractOptimizationSolver
-  x::V
-  gx::V
-  cx::V
-  d::V   # used for momentum term
-  α::T
-end
-
-function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
-  x = similar(nlp.meta.x0)
-  gx = similar(nlp.meta.x0)
-  cx = similar(nlp.meta.x0)
-  d = fill!(similar(nlp.meta.x0), 0)
-  α = zero(T) # init it to zero for now 
-  return R2Solver{T, V}(x, gx, cx, d, α)
-end
-
-@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
-  solver = R2Solver(nlp)
-  return solve!(solver, nlp; kwargs...)
-end
-
-function SolverCore.reset!(solver::R2Solver{T}) where {T}
-  solver.d .= zero(T)
-  solver
-end
-SolverCore.reset!(solver::R2Solver, ::AbstractNLPModel) = reset!(solver)
-
-function SolverCore.solve!(
-  solver::R2Solver{T, V},
-  nlp::AbstractNLPModel{T, V},
-  stats::GenericExecutionStats{T, V};
-  callback = (args...) -> nothing,
-  x::V = nlp.meta.x0,
-  atol::T = √eps(T),
-  rtol::T = √eps(T),
-  η1 = eps(T)^(1 / 4),
-  η2 = T(0.95),
-  γ1 = T(1 / 2),
-  γ2 = 1 / γ1,
-  αmax = T(Inf),
-  max_time::Float64 = 30.0,
-  max_eval::Int = -1,
-  max_iter::Int = typemax(Int),
-  β::T = T(0),
-  verbose::Int = 0,
-) where {T, V}
-  unconstrained(nlp) || error("R2 should only be called on unconstrained problems.")
-
-  reset!(stats)
-  start_time = time()
-  set_time!(stats, 0.0)
-
-  x = solver.x .= x
-  ∇fk = solver.gx
-  ck = solver.cx
-  d = solver.d
-  αk = solver.α
-
-  set_iter!(stats, 0)
-  set_objective!(stats, obj(nlp, x))
-
-  grad!(nlp, x, ∇fk)
-  norm_∇fk = norm(∇fk)
-  set_dual_residual!(stats, norm_∇fk)
-
-  αk = 1 / 2^round(log2(norm_∇fk + 1))
-  # Stopping criterion: 
-  ϵ = atol + rtol * norm_∇fk
-  optimal = norm_∇fk ≤ ϵ
-  if optimal
-    @info("Optimal point found at initial point")
-    @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
-    @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk
-  end
-  if verbose > 0 && mod(stats.iter, verbose) == 0
-    @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
-    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk
-  end
-
-  set_status!(
-    stats,
-    get_status(
-      nlp,
-      elapsed_time = stats.elapsed_time,
-      optimal = optimal,
-      max_eval = max_eval,
-      iter = stats.iter,
-      max_iter = max_iter,
-      max_time = max_time,
-    ),
-  )
-
-  solver.α = αk
-  callback(nlp, solver, stats)
-  αk = solver.α
-
-  done = stats.status != :unknown
-
-  while !done
-    if β == 0
-      ck .= x .- (∇fk .* αk)
-    else
-      d .= ∇fk .* (T(1) - β) .+ d .* β
-      ck .= x .- (d .* αk)
-    end
-    ΔTk = norm_∇fk^2 * αk
-    fck = obj(nlp, ck)
-    if fck == -Inf
-      set_status!(stats, :unbounded)
-      break
-    end
-
-    ρk = (stats.objective - fck) / ΔTk
-
-    # Update regularization parameters
-    if ρk >= η2
-      αk = min(αmax, γ2 * αk)
-    elseif ρk < η1
-      αk = αk * γ1
-    end
-
-    # Acceptance of the new candidate
-    if ρk >= η1
-      x .= ck
-      set_objective!(stats, fck)
-      grad!(nlp, x, ∇fk)
-      norm_∇fk = norm(∇fk)
-    end
-
-    set_iter!(stats, stats.iter + 1)
-    set_time!(stats, time() - start_time)
-    set_dual_residual!(stats, norm_∇fk)
-    optimal = norm_∇fk ≤ ϵ
-
-    if verbose > 0 && mod(stats.iter, verbose) == 0
-      @info infoline
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk
-    end
-
-    set_status!(
-      stats,
-      get_status(
-        nlp,
-        elapsed_time = stats.elapsed_time,
-        optimal = optimal,
-        max_eval = max_eval,
-        iter = stats.iter,
-        max_iter = max_iter,
-        max_time = max_time,
-      ),
-    )
-    solver.α = αk
-    callback(nlp, solver, stats)
-    αk = solver.α
-
-    done = stats.status != :unknown
-  end
-
-  set_solution!(stats, x)
-  return stats
-end
diff --git a/src/fomo.jl b/src/fomo.jl
index c983205b..78e151d5 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,9 +1,10 @@
-export fomo, FomoSolver, tr, r2
+export fomo, FomoSolver, tr, r2, R2
 
 abstract type AbstractFomoMethod end
 
-struct tr <: AbstractFomoMethod end
-struct r2 <: AbstractFomoMethod end
+struct tr   <: AbstractFomoMethod end
+struct r2   <: AbstractFomoMethod end
+struct R2og <: AbstractFomoMethod end
 
 """
     fomo(nlp; kwargs...)
@@ -100,10 +101,97 @@ end
   return solve!(solver, nlp, stats; kwargs...)
 end
 
+"""
+    R2(nlp; kwargs...)
+
+A first-order quadratic regularization method for unconstrained optimization.
+
+For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`:
+
+    solver = R2Solver(nlp)
+    solve!(solver, nlp; kwargs...)
+Important: `R2` and `R2Solver` are only interfaces to `FomoSolver`, a first order solver that includes momentum strategy. The momentum strategy is ignore with `R2`.
+
+# Arguments
+- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
+
+# Keyword arguments 
+- `x::V = nlp.meta.x0`: the initial guess.
+- `atol::T = √eps(T)`: absolute tolerance.
+- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
+- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
+- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters.
+- `σmin = eps(T)`: step parameter for R2 algorithm.
+- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
+- `max_time::Float64 = 30.0`: maximum time limit in seconds.
+- `max_iter::Int = typemax(Int)`: maximum number of iterations.
+- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
+
+# Output
+The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
+
+# Callback
+The callback is called at each iteration.
+The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
+Changing any of the input arguments will affect the subsequent iterations.
+In particular, setting `stats.status = :user` will stop the algorithm.
+All relevant information should be available in `nlp` and `solver`.
+Notably, you can access, and modify, the following:
+- `solver.x`: current iterate;
+- `solver.gx`: current gradient;
+- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things:
+  - `stats.dual_feas`: norm of current gradient;
+  - `stats.iter`: current iteration counter;
+  - `stats.objective`: current objective function value;
+  - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
+  - `stats.elapsed_time`: elapsed time in seconds.
+
+# Examples
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+stats = R2(nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+solver = R2Solver(nlp);
+stats = solve!(solver, nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+"""
+function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
+  x = similar(nlp.meta.x0)
+  g = similar(nlp.meta.x0)
+  c = similar(nlp.meta.x0)
+  m = Vector{T}()
+  d = g # similar without momentum
+  return FomoSolver{T, V}(x, g, c, m, d)
+end
+
+@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+  solver = R2Solver(nlp)
+  stats = GenericExecutionStats(nlp)
+  if haskey(kwargs,:σmax)
+    return solve!(solver, nlp, stats; β = T(0), backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...)
+  else
+    return solve!(solver, nlp, stats; β = T(0), backend = R2og(), kwargs...)
+  end
+end
+
 function SolverCore.reset!(solver::FomoSolver{T}) where {T}
   fill!(solver.m,0)
   solver
 end
+
 SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
 
 function SolverCore.solve!(
@@ -127,10 +215,13 @@ function SolverCore.solve!(
   θ1::T = T(0.1),
   θ2::T = T(eps(T)^(1/3)),
   verbose::Int = 0,
-  backend = r2()
+  backend = r2(),
+  σmin = nothing # keep consistency with R2 interface. kwargs immutable, can't delete it in `R2`
 ) where {T, V}
-  unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
-
+  r2mode = (backend == R2og())
+  mthname = r2mode ? "R2" : "fomo"
+  unconstrained(nlp) || error("$mthname should only be called on unconstrained problems.")
+  
   reset!(stats)
   start_time = time()
   set_time!(stats, 0.0)
@@ -154,12 +245,24 @@ function SolverCore.solve!(
   optimal = norm_∇fk ≤ ϵ
   if optimal
     @info("Optimal point found at initial point")
-    @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
-    @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk
+    if r2mode
+      @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "σ"
+      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk
+    else
+      @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
+      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk
+    end
+    
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
-    @info @sprintf "%5s  %9s  %7s  %7s  %7s" "iter" "f" "‖∇f‖" "α" "staβ"
+    if r2mode
+      @info @sprintf "%5s  %9s  %7s  %7s" "iter" "f" "‖∇f‖" "σ"
+    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk
+    else
+      @info @sprintf "%5s  %9s  %7s  %7s  %7s" "iter" "f" "‖∇f‖" "α" "staβ"
     infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk 0
+    end
+    
   end
 
   set_status!(
@@ -201,8 +304,10 @@ function SolverCore.solve!(
       αk = min(αmax, γ2 * αk)
     elseif ρk < η1
       αk = αk * γ1
-      satβ *= γ3
-      d .= ∇fk .* (oneT - satβ) .+ m .* satβ
+      if !r2mode
+        satβ *= γ3
+        (d .= ∇fk .* (oneT - satβ) .+ m .* satβ)
+      end
     end
 
     # Acceptance of the new candidate
@@ -222,8 +327,10 @@ function SolverCore.solve!(
         d .= ∇fk
         norm_d = norm_∇fk
       end
-      avgsatβ += satβ
-      siter += 1
+      if !r2mode
+        (avgsatβ += satβ)
+        (siter += 1)
+      end
     end
 
     set_iter!(stats, stats.iter + 1)
@@ -233,7 +340,11 @@ function SolverCore.solve!(
 
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk satβ
+      if r2mode
+        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk
+      else
+        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk satβ
+      end
     end
 
     set_status!(
@@ -254,9 +365,10 @@ function SolverCore.solve!(
 
     done = stats.status != :unknown
   end
-
-  avgsatβ /= siter
-  stats.solver_specific[:avgsatβ] = avgsatβ
+  if !r2mode
+    avgsatβ /= siter
+    stats.solver_specific[:avgsatβ] = avgsatβ
+  end
   set_solution!(stats, x)
   return stats
 end
@@ -285,7 +397,7 @@ end
 
 Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
 """
-function init_alpha(norm_∇fk::T, ::r2) where{T}
+function init_alpha(norm_∇fk::T, ::Union{r2,R2og}) where{T}
   1/2^round(log2(norm_∇fk + 1))
 end
 
@@ -299,7 +411,7 @@ end
 
 Compute step size multiplier: `αk` for quadratic regularization(`::r2`) and `αk/norm_∇fk` for trust region (`::tr`).
 """
-function step_mult(αk::T, norm_∇fk::T, ::r2) where{T}
+function step_mult(αk::T, norm_∇fk::T, ::Union{r2,R2og}) where{T}
   αk
 end
 
diff --git a/test/allocs.jl b/test/allocs.jl
index f029c5f1..ea17a8a2 100644
--- a/test/allocs.jl
+++ b/test/allocs.jl
@@ -30,7 +30,7 @@ end
 
 if Sys.isunix()
   @testset "Allocation tests" begin
-    @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver)
+    @testset "$symsolver" for symsolver in (:LBFGSSolver, :FomoSolver, :TrunkSolver, :TronSolver)
       for model in NLPModelsTest.nlp_problems
         nlp = eval(Meta.parse(model))()
         if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver))
diff --git a/test/restart.jl b/test/restart.jl
index 98f82103..eb770739 100644
--- a/test/restart.jl
+++ b/test/restart.jl
@@ -1,5 +1,5 @@
 @testset "Test restart with a different initial guess: $fun" for (fun, s) in (
-  (:R2, :R2Solver),
+  (:R2, :FomoSolver),
   (:fomo, :FomoSolver),
   (:lbfgs, :LBFGSSolver),
   (:tron, :TronSolver),
@@ -44,7 +44,7 @@ end
 end
 
 @testset "Test restart with a different problem: $fun" for (fun, s) in (
-  (:R2, :R2Solver),
+  (:R2, :FomoSolver),
   (:fomo, :FomoSolver),
   (:lbfgs, :LBFGSSolver),
   (:tron, :TronSolver),

From 6f2f01bfd2aa151db900c0a28e15b7c7f269f543 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 4 Feb 2024 10:40:13 -0500
Subject: [PATCH 098/171] Rxport R2Solver (fix doc build issue) and backend for
 R2 classic

---
 src/fomo.jl | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 78e151d5..fcd60978 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,4 +1,4 @@
-export fomo, FomoSolver, tr, r2, R2
+export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og
 
 abstract type AbstractFomoMethod end
 
@@ -34,7 +34,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β].
 - `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region
+- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization ( no momentum, optimized for β = 0).
 
 # Output
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -161,7 +161,7 @@ stats = R2(nlp)
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))
 solver = R2Solver(nlp);
-stats = solve!(solver, nlp)
+stats = solve!(solver, nlp, backend = R2og())
 
 # output
 
@@ -181,9 +181,9 @@ end
   solver = R2Solver(nlp)
   stats = GenericExecutionStats(nlp)
   if haskey(kwargs,:σmax)
-    return solve!(solver, nlp, stats; β = T(0), backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...)
+    return solve!(solver, nlp, stats; backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...)
   else
-    return solve!(solver, nlp, stats; β = T(0), backend = R2og(), kwargs...)
+    return solve!(solver, nlp, stats; backend = R2og(), kwargs...)
   end
 end
 
@@ -292,7 +292,11 @@ function SolverCore.solve!(
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
-    ΔTk = dot(∇fk , d) * λk
+    if r2mode
+      ΔTk = norm_∇fk^2 * λk
+    else  
+      ΔTk = dot(∇fk , d) * λk
+    end
     fck = obj(nlp, c)
     if fck == -Inf
       set_status!(stats, :unbounded)
@@ -306,20 +310,20 @@ function SolverCore.solve!(
       αk = αk * γ1
       if !r2mode
         satβ *= γ3
-        (d .= ∇fk .* (oneT - satβ) .+ m .* satβ)
+        d .= ∇fk .* (oneT - satβ) .+ m .* satβ
       end
     end
 
     # Acceptance of the new candidate
     if ρk >= η1
       x .= c
-      if β!=0
+      if !r2mode
         m .= ∇fk .* (oneT - β) .+ m .* β
       end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
-      if β!= 0
+      if !r2mode
         satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - satβ) .+ m .* satβ
         norm_d = norm(d)
@@ -328,8 +332,8 @@ function SolverCore.solve!(
         norm_d = norm_∇fk
       end
       if !r2mode
-        (avgsatβ += satβ)
-        (siter += 1)
+        avgsatβ += satβ
+        siter += 1
       end
     end
 

From 772adabfa1a3cc18c3d9701d974ce53d924c0509 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 4 Feb 2024 11:09:20 -0500
Subject: [PATCH 099/171] add :smallstep exception (step addition underflow)

---
 src/fomo.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index fcd60978..49b535c6 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -292,6 +292,7 @@ function SolverCore.solve!(
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
+    x == c && set_status!(stats,:smallstep) # step addition underfow on every dimensions, should happen before αk == 0
     if r2mode
       ΔTk = norm_∇fk^2 * λk
     else  
@@ -364,7 +365,8 @@ function SolverCore.solve!(
       ),
     )
     
-    αk == 0 && set_status!(stats,:exception)
+    αk == 0 && set_status!(stats,:exception) # :smallstep exception should happen before
+
     callback(nlp, solver, stats)
 
     done = stats.status != :unknown

From 03e18bfdc8200dbcee3587a333b5d7b86668f2bb Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 4 Feb 2024 11:17:28 -0500
Subject: [PATCH 100/171] fix small_step exception

---
 src/fomo.jl | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 49b535c6..95655ea4 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -292,7 +292,8 @@ function SolverCore.solve!(
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
-    x == c && set_status!(stats,:smallstep) # step addition underfow on every dimensions, should happen before αk == 0
+    uf = x == c # step addition underfow on every dimensions, should happen before αk == 0
+    @show stats.status
     if r2mode
       ΔTk = norm_∇fk^2 * λk
     else  
@@ -365,11 +366,13 @@ function SolverCore.solve!(
       ),
     )
     
-    αk == 0 && set_status!(stats,:exception) # :smallstep exception should happen before
-
     callback(nlp, solver, stats)
 
+    uf      && set_status!(stats,:small_step)
+    αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before
+
     done = stats.status != :unknown
+    @show stats.status αk
   end
   if !r2mode
     avgsatβ /= siter

From 81cb4432bb848ce3183cd9ab4a0c08b4bf05a3cd Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Mon, 5 Feb 2024 11:20:03 -0500
Subject: [PATCH 101/171] remove terminal ouput

---
 src/fomo.jl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 95655ea4..497bdfb3 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -293,7 +293,6 @@ function SolverCore.solve!(
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
     uf = x == c # step addition underfow on every dimensions, should happen before αk == 0
-    @show stats.status
     if r2mode
       ΔTk = norm_∇fk^2 * λk
     else  
@@ -372,7 +371,6 @@ function SolverCore.solve!(
     αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before
 
     done = stats.status != :unknown
-    @show stats.status αk
   end
   if !r2mode
     avgsatβ /= siter

From 2048735fea1d42cc865ce8befb1a695d7311ff9d Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Mon, 5 Feb 2024 16:24:01 -0500
Subject: [PATCH 102/171] update docstring, add rhok to the output

---
 src/fomo.jl | 134 +++++++++++++++-------------------------------------
 1 file changed, 37 insertions(+), 97 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 497bdfb3..30fc236f 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -8,6 +8,7 @@ struct R2og <: AbstractFomoMethod end
 
 """
     fomo(nlp; kwargs...)
+    R2(nlp; kwargs...)
 
 A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods.
 
@@ -16,6 +17,12 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
     solver = FomoSolver(nlp)
     solve!(solver, nlp; kwargs...)
 
+*Quadratic Regularization (R2)*: if the user do not want to use momentum (β = 0), it is recommended to use the memory-optimized `R2` method.
+For advanced usage:
+
+    solver = R2Solver(nlp)
+    solve!(solver, nlp; kwargs...)
+
 # Arguments
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
@@ -25,16 +32,16 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
-- `γ3 = T(1/2)` : momentum factor satβ update parameter in case of unsuccessful iteration.
+- `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
 - `αmax = 1/eps(T)`: step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
-- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β].
-- `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. 
+- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ||∇f(xk)||², with m memory of past gradient and βmax ∈ [0,β].
+- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))||, with m memory of past gradient and βmax ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization ( no momentum, optimized for β = 0).
+- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0).
 
 # Output
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -96,78 +103,11 @@ end
 
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FomoSolver(nlp)
-  solver_specific = Dict(:avgsatβ => T(0.))
+  solver_specific = Dict(:avgβmax => T(0.))
   stats = GenericExecutionStats(nlp;solver_specific=solver_specific)
   return solve!(solver, nlp, stats; kwargs...)
 end
 
-"""
-    R2(nlp; kwargs...)
-
-A first-order quadratic regularization method for unconstrained optimization.
-
-For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`:
-
-    solver = R2Solver(nlp)
-    solve!(solver, nlp; kwargs...)
-Important: `R2` and `R2Solver` are only interfaces to `FomoSolver`, a first order solver that includes momentum strategy. The momentum strategy is ignore with `R2`.
-
-# Arguments
-- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
-
-# Keyword arguments 
-- `x::V = nlp.meta.x0`: the initial guess.
-- `atol::T = √eps(T)`: absolute tolerance.
-- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
-- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
-- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters.
-- `σmin = eps(T)`: step parameter for R2 algorithm.
-- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
-- `max_time::Float64 = 30.0`: maximum time limit in seconds.
-- `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-
-# Output
-The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
-
-# Callback
-The callback is called at each iteration.
-The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
-Changing any of the input arguments will affect the subsequent iterations.
-In particular, setting `stats.status = :user` will stop the algorithm.
-All relevant information should be available in `nlp` and `solver`.
-Notably, you can access, and modify, the following:
-- `solver.x`: current iterate;
-- `solver.gx`: current gradient;
-- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things:
-  - `stats.dual_feas`: norm of current gradient;
-  - `stats.iter`: current iteration counter;
-  - `stats.objective`: current objective function value;
-  - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
-  - `stats.elapsed_time`: elapsed time in seconds.
-
-# Examples
-```jldoctest
-using JSOSolvers, ADNLPModels
-nlp = ADNLPModel(x -> sum(x.^2), ones(3))
-stats = R2(nlp)
-
-# output
-
-"Execution stats: first-order stationary"
-```
-
-```jldoctest
-using JSOSolvers, ADNLPModels
-nlp = ADNLPModel(x -> sum(x.^2), ones(3))
-solver = R2Solver(nlp);
-stats = solve!(solver, nlp, backend = R2og())
-
-# output
-
-"Execution stats: first-order stationary"
-```
-"""
 function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
   x = similar(nlp.meta.x0)
   g = similar(nlp.meta.x0)
@@ -177,7 +117,7 @@ function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
   return FomoSolver{T, V}(x, g, c, m, d)
 end
 
-@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = R2Solver(nlp)
   stats = GenericExecutionStats(nlp)
   if haskey(kwargs,:σmax)
@@ -256,11 +196,11 @@ function SolverCore.solve!(
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
     if r2mode
-      @info @sprintf "%5s  %9s  %7s  %7s" "iter" "f" "‖∇f‖" "σ"
-    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk
+      @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "σ" "ρk"
+    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN
     else
-      @info @sprintf "%5s  %9s  %7s  %7s  %7s" "iter" "f" "‖∇f‖" "α" "staβ"
-    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk 0
+      @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
+    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0
     end
     
   end
@@ -284,15 +224,15 @@ function SolverCore.solve!(
 
   d .= ∇fk
   norm_d = norm_∇fk
-  satβ = T(0)
+  βmax = T(0)
   ρk = T(0)
-  avgsatβ = T(0)
+  avgβmax = T(0)
   siter = 0
   oneT = T(1)
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
-    uf = x == c # step addition underfow on every dimensions, should happen before αk == 0
+    step_underflow = x == c # step addition underfow on every dimensions, should happen before αk == 0
     if r2mode
       ΔTk = norm_∇fk^2 * λk
     else  
@@ -310,8 +250,8 @@ function SolverCore.solve!(
     elseif ρk < η1
       αk = αk * γ1
       if !r2mode
-        satβ *= γ3
-        d .= ∇fk .* (oneT - satβ) .+ m .* satβ
+        βmax *= γ3
+        d .= ∇fk .* (oneT - βmax) .+ m .* βmax
       end
     end
 
@@ -325,15 +265,15 @@ function SolverCore.solve!(
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if !r2mode
-        satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2)
-        d .= ∇fk .* (oneT - satβ) .+ m .* satβ
+        βmax = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2)
+        d .= ∇fk .* (oneT - βmax) .+ m .* βmax
         norm_d = norm(d)
       else
         d .= ∇fk
         norm_d = norm_∇fk
       end
       if !r2mode
-        avgsatβ += satβ
+        avgβmax += βmax
         siter += 1
       end
     end
@@ -346,9 +286,9 @@ function SolverCore.solve!(
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
       if r2mode
-        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk
+        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk ρk
       else
-        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk satβ
+        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk ρk βmax
       end
     end
 
@@ -367,14 +307,14 @@ function SolverCore.solve!(
     
     callback(nlp, solver, stats)
 
-    uf      && set_status!(stats,:small_step)
-    αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before
+    step_underflow  && set_status!(stats,:small_step)
+    αk == 0         && set_status!(stats,:exception) # :small_nlstep exception should happen before
 
     done = stats.status != :unknown
   end
   if !r2mode
-    avgsatβ /= siter
-    stats.solver_specific[:avgsatβ] = avgsatβ
+    avgβmax /= siter
+    stats.solver_specific[:avgβmax] = avgβmax
   end
   set_solution!(stats, x)
   return stats
@@ -383,11 +323,11 @@ end
 """
 find_beta(m, ∇f, norm_∇f, β, θ1, θ2)
 
-Compute satβ which saturates the contibution of the momentum term to the gradient.
-`satβ` is computed such that the two gradient-related conditions are ensured: 
-1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||²
-2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||
-with `m` memory of past gradient/
+Compute βmax which saturates the contibution of the momentum term to the gradient.
+`βmax` is computed such that the two gradient-related conditions are ensured: 
+1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||²
+2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m.∇f(xk))||
+with `m` memory of past gradient
 """ 
 function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   dotprod = dot(m,∇f)
@@ -395,7 +335,7 @@ function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   n2 = norm(m .- ∇f)
   β1 = n1 > 0  ? (1-θ1)*norm_∇f^2/(n1)  : β
   β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β
-  return min(β,min(β1,β2)) 
+  return min(β,min(β1,β2))
 end
 
 """

From 72b2456bcaa09a87f28a17eb95c92998bf8c17b3 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Mon, 5 Feb 2024 15:35:49 -0500
Subject: [PATCH 103/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 30fc236f..cf4697ae 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -329,7 +329,7 @@ Compute βmax which saturates the contibution of the momentum term to the gradie
 2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m.∇f(xk))||
 with `m` memory of past gradient
 """ 
-function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
+function find_beta(m::V, ∇f::V, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   dotprod = dot(m,∇f)
   n1 = norm_∇f^2 - dotprod
   n2 = norm(m .- ∇f)

From 049e31ce575677683c9c457f5abbe36274aae109 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 6 Feb 2024 12:22:54 -0500
Subject: [PATCH 104/171] - create variable for dot(m,nabla f): avoid
 computation of dot(d, nabla f) in model decrease, is used in find_beta
 (interface updated) - update docstrings

---
 src/fomo.jl | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index cf4697ae..83fe6648 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -21,7 +21,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 For advanced usage:
 
     solver = R2Solver(nlp)
-    solve!(solver, nlp; kwargs...)
+    solve!(solver, nlp; backend = R2og(), kwargs...)
 
 # Arguments
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
@@ -39,10 +39,12 @@ For advanced usage:
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
 - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ||∇f(xk)||², with m memory of past gradient and βmax ∈ [0,β].
-- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))||, with m memory of past gradient and βmax ∈ [0,β]. 
+- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||, with m memory of past gradient and βmax ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0).
 
+*Warning:* `R2og()` backend should be used only for advanced usage as described above.
+
 # Output
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
 
@@ -174,6 +176,7 @@ function SolverCore.solve!(
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
+  
   grad!(nlp, x, ∇fk)
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)
@@ -229,15 +232,12 @@ function SolverCore.solve!(
   avgβmax = T(0)
   siter = 0
   oneT = T(1)
+  mdot∇f = T(0) # dot(m,∇fk)
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
     step_underflow = x == c # step addition underfow on every dimensions, should happen before αk == 0
-    if r2mode
-      ΔTk = norm_∇fk^2 * λk
-    else  
-      ΔTk = dot(∇fk , d) * λk
-    end
+    ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk
     fck = obj(nlp, c)
     if fck == -Inf
       set_status!(stats, :unbounded)
@@ -260,17 +260,15 @@ function SolverCore.solve!(
       x .= c
       if !r2mode
         m .= ∇fk .* (oneT - β) .+ m .* β
+        mdot∇f = dot(m,∇fk)
       end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if !r2mode
-        βmax = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2)
+        βmax = find_beta(m, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ m .* βmax
         norm_d = norm(d)
-      else
-        d .= ∇fk
-        norm_d = norm_∇fk
       end
       if !r2mode
         avgβmax += βmax
@@ -321,17 +319,16 @@ function SolverCore.solve!(
 end
 
 """
-find_beta(m, ∇f, norm_∇f, β, θ1, θ2)
+find_beta(m, md∇f, norm_∇f, β, θ1, θ2)
 
 Compute βmax which saturates the contibution of the momentum term to the gradient.
 `βmax` is computed such that the two gradient-related conditions are ensured: 
 1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||²
-2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m.∇f(xk))||
-with `m` memory of past gradient
+2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||
+with `m` memory of past gradient and `mdot∇f = dot(m,∇f(xk))` 
 """ 
-function find_beta(m::V, ∇f::V, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
-  dotprod = dot(m,∇f)
-  n1 = norm_∇f^2 - dotprod
+function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
+  n1 = norm_∇f^2 - mdot∇f
   n2 = norm(m .- ∇f)
   β1 = n1 > 0  ? (1-θ1)*norm_∇f^2/(n1)  : β
   β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β
@@ -354,9 +351,10 @@ end
 
 """
   step_mult(αk::T, norm_∇fk::T, ::r2)
+  step_mult(αk::T, norm_∇fk::T, ::R2og)
   step_mult(αk::T, norm_∇fk::T, ::tr)
 
-Compute step size multiplier: `αk` for quadratic regularization(`::r2`) and `αk/norm_∇fk` for trust region (`::tr`).
+Compute step size multiplier: `αk` for quadratic regularization(`::r2` and `::R2og`) and `αk/norm_∇fk` for trust region (`::tr`).
 """
 function step_mult(αk::T, norm_∇fk::T, ::Union{r2,R2og}) where{T}
   αk

From 2064b6b263ca04e6c48a1fe63c3de639535b6ec0 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 6 Feb 2024 12:54:53 -0500
Subject: [PATCH 105/171] rename `m` as `momentum`

---
 src/fomo.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 83fe6648..b234622e 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -171,7 +171,7 @@ function SolverCore.solve!(
   x = solver.x .= x
   ∇fk = solver.g
   c = solver.c
-  m = solver.m
+  momentum = solver.m
   d = solver.d
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
@@ -232,7 +232,7 @@ function SolverCore.solve!(
   avgβmax = T(0)
   siter = 0
   oneT = T(1)
-  mdot∇f = T(0) # dot(m,∇fk)
+  mdot∇f = T(0) # dot(momentum,∇fk)
   while !done
     λk = step_mult(αk,norm_d,backend)
     c .= x .- λk .* d
@@ -251,7 +251,7 @@ function SolverCore.solve!(
       αk = αk * γ1
       if !r2mode
         βmax *= γ3
-        d .= ∇fk .* (oneT - βmax) .+ m .* βmax
+        d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
       end
     end
 
@@ -259,7 +259,7 @@ function SolverCore.solve!(
     if ρk >= η1
       x .= c
       if !r2mode
-        m .= ∇fk .* (oneT - β) .+ m .* β
+        momentum .= ∇fk .* (oneT - β) .+ momentum .* β
         mdot∇f = dot(m,∇fk)
       end
       set_objective!(stats, fck)
@@ -267,7 +267,7 @@ function SolverCore.solve!(
       norm_∇fk = norm(∇fk)
       if !r2mode
         βmax = find_beta(m, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2)
-        d .= ∇fk .* (oneT - βmax) .+ m .* βmax
+        d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
       end
       if !r2mode
@@ -325,7 +325,7 @@ Compute βmax which saturates the contibution of the momentum term to the gradie
 `βmax` is computed such that the two gradient-related conditions are ensured: 
 1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||²
 2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||
-with `m` memory of past gradient and `mdot∇f = dot(m,∇f(xk))` 
+with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` 
 """ 
 function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   n1 = norm_∇f^2 - mdot∇f

From 16a164ab9cb449e0b202d036761ff97b7b81ad89 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Tue, 6 Feb 2024 12:47:50 -0500
Subject: [PATCH 106/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index b234622e..1f6a45cb 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -200,7 +200,7 @@ function SolverCore.solve!(
   if verbose > 0 && mod(stats.iter, verbose) == 0
     if r2mode
       @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "σ" "ρk"
-    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN
     else
       @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
     infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0

From d521169e0f9039986f53fb6531dc06b840bcc8d6 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Tue, 6 Feb 2024 12:48:10 -0500
Subject: [PATCH 107/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 1f6a45cb..c1ef4d1f 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -203,7 +203,7 @@ function SolverCore.solve!(
       infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN
     else
       @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
-    infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0
     end
     
   end

From 56dac0dde8cd42d0b1cab27a7d4008c4db56a26b Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Tue, 6 Feb 2024 12:48:26 -0500
Subject: [PATCH 108/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index c1ef4d1f..0a202466 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -205,7 +205,6 @@ function SolverCore.solve!(
       @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
       infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0
     end
-    
   end
 
   set_status!(

From 45bbd4fb9d46c9094e7c85b1e5c5dd3a96098287 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 6 Feb 2024 13:05:58 -0500
Subject: [PATCH 109/171] update docstring, fix `m` to `momentum` renaming.

---
 src/fomo.jl | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 0a202466..93a5c11a 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -38,8 +38,8 @@ For advanced usage:
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
-- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ||∇f(xk)||², with m memory of past gradient and βmax ∈ [0,β].
-- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||, with m memory of past gradient and βmax ∈ [0,β]. 
+- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β].
+- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖, with m memory of past gradient and βmax ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0).
 
@@ -259,13 +259,13 @@ function SolverCore.solve!(
       x .= c
       if !r2mode
         momentum .= ∇fk .* (oneT - β) .+ momentum .* β
-        mdot∇f = dot(m,∇fk)
+        mdot∇f = dot(momentum,∇fk)
       end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if !r2mode
-        βmax = find_beta(m, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2)
+        βmax = find_beta(momentum, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
       end
@@ -322,8 +322,8 @@ find_beta(m, md∇f, norm_∇f, β, θ1, θ2)
 
 Compute βmax which saturates the contibution of the momentum term to the gradient.
 `βmax` is computed such that the two gradient-related conditions are ensured: 
-1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||²
-2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||
+1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ‖∇f(xk)‖²
+2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖
 with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` 
 """ 
 function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
@@ -336,6 +336,7 @@ end
 
 """
   init_alpha(norm_∇fk::T, ::r2)
+  init_alpha(norm_∇fk::T, ::R2og)
   init_alpha(norm_∇fk::T, ::tr)
 
 Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.

From cb364960b75cd10a0997aede1fb5974f1fab0144 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Fri, 16 Feb 2024 09:31:41 -0500
Subject: [PATCH 110/171] Update src/fomo.jl

Co-authored-by: tmigot <tangi.migot@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 93a5c11a..c04d5725 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -122,7 +122,7 @@ end
 @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = R2Solver(nlp)
   stats = GenericExecutionStats(nlp)
-  if haskey(kwargs,:σmax)
+  if haskey(kwargs,:σmin)
     return solve!(solver, nlp, stats; backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...)
   else
     return solve!(solver, nlp, stats; backend = R2og(), kwargs...)

From 98543b8a8da6b1fab7d5e359fba904c6c6257dab Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 17 Feb 2024 17:08:10 -0500
Subject: [PATCH 111/171] Update test/allocs.jl

Co-authored-by: tmigot <tangi.migot@gmail.com>
---
 test/allocs.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/allocs.jl b/test/allocs.jl
index ea17a8a2..f029c5f1 100644
--- a/test/allocs.jl
+++ b/test/allocs.jl
@@ -30,7 +30,7 @@ end
 
 if Sys.isunix()
   @testset "Allocation tests" begin
-    @testset "$symsolver" for symsolver in (:LBFGSSolver, :FomoSolver, :TrunkSolver, :TronSolver)
+    @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver)
       for model in NLPModelsTest.nlp_problems
         nlp = eval(Meta.parse(model))()
         if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver))

From 6ff2cc1d87f0cba96d5443723612e2e26cbfd0b5 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 17 Feb 2024 17:09:55 -0500
Subject: [PATCH 112/171] Update src/fomo.jl

Co-authored-by: tmigot <tangi.migot@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index c04d5725..b84c8664 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,6 +1,6 @@
 export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og
 
-abstract type AbstractFomoMethod end
+abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
 struct tr   <: AbstractFomoMethod end
 struct r2   <: AbstractFomoMethod end

From 6b93fa5e8296e6b63658cb04eb9375f17bedaca0 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 17 Feb 2024 17:48:27 -0500
Subject: [PATCH 113/171] put alpha in solver structure, uncomment callback
 test

---
 src/fomo.jl | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index b84c8664..6e2c018a 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,6 +1,6 @@
 export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og
 
-abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
+abstract type AbstractFomoMethod end
 
 struct tr   <: AbstractFomoMethod end
 struct r2   <: AbstractFomoMethod end
@@ -92,6 +92,7 @@ mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver
   c::V
   m::V
   d::V
+  α::T
 end
 
 function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
@@ -100,7 +101,7 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   c = similar(nlp.meta.x0)
   m = fill!(similar(nlp.meta.x0), 0)
   d = fill!(similar(nlp.meta.x0), 0)
-  return FomoSolver{T, V}(x, g, c, m, d)
+  return FomoSolver{T, V}(x, g, c, m, d, T(0))
 end
 
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
@@ -116,7 +117,7 @@ function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
   c = similar(nlp.meta.x0)
   m = Vector{T}()
   d = g # similar without momentum
-  return FomoSolver{T, V}(x, g, c, m, d)
+  return FomoSolver{T, V}(x, g, c, m, d, T(0))
 end
 
 @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
@@ -181,7 +182,7 @@ function SolverCore.solve!(
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)
 
-  αk = init_alpha(norm_∇fk,backend)
+  solver.α = init_alpha(norm_∇fk,backend)
   
   # Stopping criterion: 
   ϵ = atol + rtol * norm_∇fk
@@ -190,20 +191,20 @@ function SolverCore.solve!(
     @info("Optimal point found at initial point")
     if r2mode
       @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "σ"
-      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk
+      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α
     else
       @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
-      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk
+      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α
     end
     
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
     if r2mode
       @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "σ" "ρk"
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α NaN
     else
       @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α NaN 0
     end
   end
 
@@ -233,9 +234,9 @@ function SolverCore.solve!(
   oneT = T(1)
   mdot∇f = T(0) # dot(momentum,∇fk)
   while !done
-    λk = step_mult(αk,norm_d,backend)
+    λk = step_mult(solver.α,norm_d,backend)
     c .= x .- λk .* d
-    step_underflow = x == c # step addition underfow on every dimensions, should happen before αk == 0
+    step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0
     ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk
     fck = obj(nlp, c)
     if fck == -Inf
@@ -245,9 +246,9 @@ function SolverCore.solve!(
     ρk = (stats.objective - fck) / ΔTk
     # Update regularization parameters
     if ρk >= η2
-      αk = min(αmax, γ2 * αk)
+      solver.α = min(αmax, γ2 * solver.α)
     elseif ρk < η1
-      αk = αk * γ1
+      solver.α = solver.α * γ1
       if !r2mode
         βmax *= γ3
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
@@ -283,9 +284,9 @@ function SolverCore.solve!(
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
       if r2mode
-        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/αk ρk
+        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α ρk
       else
-        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk αk ρk βmax
+        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α ρk βmax
       end
     end
 
@@ -305,7 +306,7 @@ function SolverCore.solve!(
     callback(nlp, solver, stats)
 
     step_underflow  && set_status!(stats,:small_step)
-    αk == 0         && set_status!(stats,:exception) # :small_nlstep exception should happen before
+    solver.α == 0         && set_status!(stats,:exception) # :small_nlstep exception should happen before
 
     done = stats.status != :unknown
   end
@@ -350,16 +351,16 @@ function init_alpha(norm_∇fk::T, ::tr) where{T}
 end
 
 """
-  step_mult(αk::T, norm_∇fk::T, ::r2)
-  step_mult(αk::T, norm_∇fk::T, ::R2og)
-  step_mult(αk::T, norm_∇fk::T, ::tr)
+  step_mult(α::T, norm_∇fk::T, ::r2)
+  step_mult(α::T, norm_∇fk::T, ::R2og)
+  step_mult(α::T, norm_∇fk::T, ::tr)
 
-Compute step size multiplier: `αk` for quadratic regularization(`::r2` and `::R2og`) and `αk/norm_∇fk` for trust region (`::tr`).
+Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`).
 """
-function step_mult(αk::T, norm_∇fk::T, ::Union{r2,R2og}) where{T}
-  αk
+function step_mult(α::T, norm_∇fk::T, ::Union{r2,R2og}) where{T}
+  α
 end
 
-function step_mult(αk::T, norm_∇fk::T, ::tr) where{T}
-  αk/norm_∇fk
+function step_mult(α::T, norm_∇fk::T, ::tr) where{T}
+  α/norm_∇fk
 end
\ No newline at end of file

From 65959d1dfab6483fb663b1bcb1f458e4b5bd111d Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 17 Feb 2024 18:14:08 -0500
Subject: [PATCH 114/171] allocate memory for norm vector in find_beta

---
 src/fomo.jl | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 6e2c018a..f047169d 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -92,6 +92,7 @@ mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver
   c::V
   m::V
   d::V
+  p::V
   α::T
 end
 
@@ -101,7 +102,8 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   c = similar(nlp.meta.x0)
   m = fill!(similar(nlp.meta.x0), 0)
   d = fill!(similar(nlp.meta.x0), 0)
-  return FomoSolver{T, V}(x, g, c, m, d, T(0))
+  p = similar(nlp.meta.x0)
+  return FomoSolver{T, V}(x, g, c, m, d, p, T(0))
 end
 
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
@@ -117,7 +119,8 @@ function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
   c = similar(nlp.meta.x0)
   m = Vector{T}()
   d = g # similar without momentum
-  return FomoSolver{T, V}(x, g, c, m, d, T(0))
+  p = Vector{T}()
+  return FomoSolver{T, V}(x, g, c, m, d, p, T(0))
 end
 
 @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
@@ -174,6 +177,7 @@ function SolverCore.solve!(
   c = solver.c
   momentum = solver.m
   d = solver.d
+  p = solver.p
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
@@ -266,7 +270,8 @@ function SolverCore.solve!(
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if !r2mode
-        βmax = find_beta(momentum, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2)
+        p .= momentum .- ∇fk
+        βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
       end
@@ -327,9 +332,9 @@ Compute βmax which saturates the contibution of the momentum term to the gradie
 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖
 with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` 
 """ 
-function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
+function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   n1 = norm_∇f^2 - mdot∇f
-  n2 = norm(m .- ∇f)
+  n2 = norm(p)
   β1 = n1 > 0  ? (1-θ1)*norm_∇f^2/(n1)  : β
   β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β
   return min(β,min(β1,β2))

From 937288838934af02ecb90fe309913e9ef3e022c8 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 17 Feb 2024 22:45:36 -0500
Subject: [PATCH 115/171] Add FoSolver structure for no-momentum case. Change
 backend name to step_backend to avoid confusion. Update docstrings and tests.

---
 src/fomo.jl          | 123 +++++++++++++++++++++++++------------------
 test/test_solvers.jl |   2 +-
 2 files changed, 74 insertions(+), 51 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index f047169d..c672b7b7 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,10 +1,10 @@
-export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og
+export fomo, FomoSolver, FoSolver, R2, R2Solver, tr_step, r2_step
 
-abstract type AbstractFomoMethod end
+abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
-struct tr   <: AbstractFomoMethod end
-struct r2   <: AbstractFomoMethod end
-struct R2og <: AbstractFomoMethod end
+abstract type AbstractFomoMethod end
+struct tr_step   <: AbstractFomoMethod end
+struct r2_step   <: AbstractFomoMethod end
 
 """
     fomo(nlp; kwargs...)
@@ -17,11 +17,12 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
     solver = FomoSolver(nlp)
     solve!(solver, nlp; kwargs...)
 
-*Quadratic Regularization (R2)*: if the user do not want to use momentum (β = 0), it is recommended to use the memory-optimized `R2` method.
+**Quadratic Regularization (R2)**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` method.
 For advanced usage:
 
-    solver = R2Solver(nlp)
-    solve!(solver, nlp; backend = R2og(), kwargs...)
+    solver = FoSolver(nlp)
+    solve!(solver, nlp; kwargs...)
+Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 
 # Arguments
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
@@ -33,7 +34,7 @@ For advanced usage:
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
 - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
-- `αmax = 1/eps(T)`: step parameter for fomo algorithm.
+- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
@@ -41,9 +42,7 @@ For advanced usage:
 - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β].
 - `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖, with m memory of past gradient and βmax ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0).
-
-*Warning:* `R2og()` backend should be used only for advanced usage as described above.
+- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
 # Output
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
@@ -65,6 +64,7 @@ Notably, you can access, and modify, the following:
   - `stats.elapsed_time`: elapsed time in seconds.
 
 # Examples
+## `fomo`
 ```jldoctest
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))
@@ -83,10 +83,31 @@ stats = solve!(solver, nlp)
 
 # output
 
+"Execution stats: first-order stationary"
+```
+## `R2`
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+stats = R2(nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+solver = FoSolver(nlp);
+stats = solve!(solver, nlp)
+
+# output
+
 "Execution stats: first-order stationary"
 ```
 """
-mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver
+mutable struct FomoSolver{T, V} <: AbstractFirstOrderSolver
   x::V
   g::V
   c::V
@@ -113,23 +134,28 @@ end
   return solve!(solver, nlp, stats; kwargs...)
 end
 
-function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V}
+
+mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
+  x::V
+  g::V
+  c::V
+  α::T
+end
+
+function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   x = similar(nlp.meta.x0)
   g = similar(nlp.meta.x0)
   c = similar(nlp.meta.x0)
-  m = Vector{T}()
-  d = g # similar without momentum
-  p = Vector{T}()
-  return FomoSolver{T, V}(x, g, c, m, d, p, T(0))
+  return FoSolver{T, V}(x, g, c, T(0))
 end
 
 @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
-  solver = R2Solver(nlp)
+  solver = FoSolver(nlp)
   stats = GenericExecutionStats(nlp)
   if haskey(kwargs,:σmin)
-    return solve!(solver, nlp, stats; backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...)
+    return solve!(solver, nlp, stats; step_backend = r2_step(), αmax = 1/kwargs[:σmin], kwargs...)
   else
-    return solve!(solver, nlp, stats; backend = R2og(), kwargs...)
+    return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
   end
 end
 
@@ -141,7 +167,7 @@ end
 SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
 
 function SolverCore.solve!(
-  solver::FomoSolver{T, V},
+  solver::AbstractFirstOrderSolver,
   nlp::AbstractNLPModel{T, V},
   stats::GenericExecutionStats{T, V};
   callback = (args...) -> nothing,
@@ -161,12 +187,11 @@ function SolverCore.solve!(
   θ1::T = T(0.1),
   θ2::T = T(eps(T)^(1/3)),
   verbose::Int = 0,
-  backend = r2(),
+  step_backend = r2_step(),
   σmin = nothing # keep consistency with R2 interface. kwargs immutable, can't delete it in `R2`
 ) where {T, V}
-  r2mode = (backend == R2og())
-  mthname = r2mode ? "R2" : "fomo"
-  unconstrained(nlp) || error("$mthname should only be called on unconstrained problems.")
+  use_momentum = typeof(solver) <: FomoSolver
+  unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
   
   reset!(stats)
   start_time = time()
@@ -175,9 +200,9 @@ function SolverCore.solve!(
   x = solver.x .= x
   ∇fk = solver.g
   c = solver.c
-  momentum = solver.m
-  d = solver.d
-  p = solver.p
+  momentum = use_momentum ? solver.m : nothing # not used if no momentum
+  d = use_momentum ? solver.d : solver.g # g = d if no momentum
+  p = use_momentum ? solver.p : nothing # not used if no momentum
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
@@ -186,14 +211,14 @@ function SolverCore.solve!(
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)
 
-  solver.α = init_alpha(norm_∇fk,backend)
+  solver.α = init_alpha(norm_∇fk,step_backend)
   
   # Stopping criterion: 
   ϵ = atol + rtol * norm_∇fk
   optimal = norm_∇fk ≤ ϵ
   if optimal
     @info("Optimal point found at initial point")
-    if r2mode
+    if !use_momentum
       @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "σ"
       @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α
     else
@@ -203,7 +228,7 @@ function SolverCore.solve!(
     
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
-    if r2mode
+    if !use_momentum
       @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "σ" "ρk"
       infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α NaN
     else
@@ -238,10 +263,10 @@ function SolverCore.solve!(
   oneT = T(1)
   mdot∇f = T(0) # dot(momentum,∇fk)
   while !done
-    λk = step_mult(solver.α,norm_d,backend)
+    λk = step_mult(solver.α,norm_d,step_backend)
     c .= x .- λk .* d
     step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0
-    ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk
+    ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk with momentum, ‖∇fk‖²λk without momentum
     fck = obj(nlp, c)
     if fck == -Inf
       set_status!(stats, :unbounded)
@@ -253,7 +278,7 @@ function SolverCore.solve!(
       solver.α = min(αmax, γ2 * solver.α)
     elseif ρk < η1
       solver.α = solver.α * γ1
-      if !r2mode
+      if use_momentum
         βmax *= γ3
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
       end
@@ -262,20 +287,20 @@ function SolverCore.solve!(
     # Acceptance of the new candidate
     if ρk >= η1
       x .= c
-      if !r2mode
+      if use_momentum
         momentum .= ∇fk .* (oneT - β) .+ momentum .* β
         mdot∇f = dot(momentum,∇fk)
       end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
-      if !r2mode
+      if use_momentum
         p .= momentum .- ∇fk
         βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
       end
-      if !r2mode
+      if use_momentum
         avgβmax += βmax
         siter += 1
       end
@@ -288,7 +313,7 @@ function SolverCore.solve!(
 
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
-      if r2mode
+      if !use_momentum
         infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α ρk
       else
         infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α ρk βmax
@@ -315,7 +340,7 @@ function SolverCore.solve!(
 
     done = stats.status != :unknown
   end
-  if !r2mode
+  if use_momentum
     avgβmax /= siter
     stats.solver_specific[:avgβmax] = avgβmax
   end
@@ -341,31 +366,29 @@ function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where
 end
 
 """
-  init_alpha(norm_∇fk::T, ::r2)
-  init_alpha(norm_∇fk::T, ::R2og)
-  init_alpha(norm_∇fk::T, ::tr)
+  init_alpha(norm_∇fk::T, ::r2_step)
+  init_alpha(norm_∇fk::T, ::tr_step)
 
 Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
 """
-function init_alpha(norm_∇fk::T, ::Union{r2,R2og}) where{T}
+function init_alpha(norm_∇fk::T, ::r2_step) where{T}
   1/2^round(log2(norm_∇fk + 1))
 end
 
-function init_alpha(norm_∇fk::T, ::tr) where{T}
+function init_alpha(norm_∇fk::T, ::tr_step) where{T}
   norm_∇fk/2^round(log2(norm_∇fk + 1))
 end
 
 """
-  step_mult(α::T, norm_∇fk::T, ::r2)
-  step_mult(α::T, norm_∇fk::T, ::R2og)
-  step_mult(α::T, norm_∇fk::T, ::tr)
+  step_mult(α::T, norm_∇fk::T, ::r2_step)
+  step_mult(α::T, norm_∇fk::T, ::tr_step)
 
 Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`).
 """
-function step_mult(α::T, norm_∇fk::T, ::Union{r2,R2og}) where{T}
+function step_mult(α::T, norm_∇fk::T, ::r2_step) where{T}
   α
 end
 
-function step_mult(α::T, norm_∇fk::T, ::tr) where{T}
+function step_mult(α::T, norm_∇fk::T, ::tr_step) where{T}
   α/norm_∇fk
 end
\ No newline at end of file
diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index ba182731..d9266d29 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -9,7 +9,7 @@ function tests()
         ("tron", tron),
         ("R2", R2),
         ("fomo_r2", fomo),
-        ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,backend = JSOSolvers.tr(); kwargs...)),
+        ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,step_backend = JSOSolvers.tr_step(); kwargs...)),
       ]
         unconstrained_nlp(solver)
         multiprecision_nlp(solver, :unc)

From dc911e36c2f5b01c20ea2f3aba9ff1a59edb4f3f Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 18 Feb 2024 12:56:34 -0500
Subject: [PATCH 116/171] fix allocs tests

---
 test/allocs.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/allocs.jl b/test/allocs.jl
index f029c5f1..1fe38f23 100644
--- a/test/allocs.jl
+++ b/test/allocs.jl
@@ -30,7 +30,7 @@ end
 
 if Sys.isunix()
   @testset "Allocation tests" begin
-    @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver)
+    @testset "$symsolver" for symsolver in (:LBFGSSolver, :FoSolver, :FomoSolver, :TrunkSolver, :TronSolver)
       for model in NLPModelsTest.nlp_problems
         nlp = eval(Meta.parse(model))()
         if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver))

From 51c3445c5008967166e95d589d83fe292ed9ef8f Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 18 Feb 2024 14:39:24 -0500
Subject: [PATCH 117/171] add reset! function to FoSolver

---
 src/fomo.jl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index c672b7b7..7a320173 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -166,6 +166,13 @@ end
 
 SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
 
+
+function SolverCore.reset!(solver::FoSolver{T}) where {T}
+  solver
+end
+
+SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver)
+
 function SolverCore.solve!(
   solver::AbstractFirstOrderSolver,
   nlp::AbstractNLPModel{T, V},

From ece54bc2cd1352fcbabb0a1e1f3c513fec48b1f5 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sun, 18 Feb 2024 15:22:43 -0500
Subject: [PATCH 118/171] remove  `R2Solver`, replaced by `FoSolver`

---
 src/fomo.jl | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 7a320173..c9a95e81 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,4 +1,4 @@
-export fomo, FomoSolver, FoSolver, R2, R2Solver, tr_step, r2_step
+export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step
 
 abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
@@ -134,6 +134,12 @@ end
   return solve!(solver, nlp, stats; kwargs...)
 end
 
+function SolverCore.reset!(solver::FomoSolver{T}) where {T}
+  fill!(solver.m,0)
+  solver
+end
+
+SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
 
 mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
   x::V
@@ -159,14 +165,6 @@ end
   end
 end
 
-function SolverCore.reset!(solver::FomoSolver{T}) where {T}
-  fill!(solver.m,0)
-  solver
-end
-
-SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
-
-
 function SolverCore.reset!(solver::FoSolver{T}) where {T}
   solver
 end

From 4d13504ffea8c69451b4a41c426c17ad0482bd7c Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 22:53:40 -0500
Subject: [PATCH 119/171] Update test/test_solvers.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 test/test_solvers.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_solvers.jl b/test/test_solvers.jl
index d9266d29..eb9029e1 100644
--- a/test/test_solvers.jl
+++ b/test/test_solvers.jl
@@ -9,7 +9,7 @@ function tests()
         ("tron", tron),
         ("R2", R2),
         ("fomo_r2", fomo),
-        ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,step_backend = JSOSolvers.tr_step(); kwargs...)),
+        ("fomo_tr", (nlp; kwargs...) -> fomo(nlp, step_backend = JSOSolvers.tr_step(); kwargs...)),
       ]
         unconstrained_nlp(solver)
         multiprecision_nlp(solver, :unc)

From 6a8af9c3496dfadd9cbfb361264dc2e989d1ae2a Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 22:54:14 -0500
Subject: [PATCH 120/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index c9a95e81..d28ff085 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -10,7 +10,7 @@ struct r2_step   <: AbstractFomoMethod end
     fomo(nlp; kwargs...)
     R2(nlp; kwargs...)
 
-A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods.
+A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region steps.
 
 For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
 

From d6d5dc9dbcac39998137dc82e0cc6441949417d4 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 22:56:37 -0500
Subject: [PATCH 121/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index d28ff085..c96a7aff 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -12,7 +12,7 @@ struct r2_step   <: AbstractFomoMethod end
 
 A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region steps.
 
-For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
+For advanced usage, first define a `FomoSolver` or `FoSolver` to preallocate the memory used in the solver, and then call `solve!`:
 
     solver = FomoSolver(nlp)
     solve!(solver, nlp; kwargs...)

From e9456ae879c18358e4c81566b93d8b79e83f4bfe Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:00:58 -0500
Subject: [PATCH 122/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index c96a7aff..1f2f4d4f 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -32,7 +32,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
-- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
+- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters.
 - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
 - `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.

From f39bba2848468c09d18218e681ca4fabd390ea08 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:06:07 -0500
Subject: [PATCH 123/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 1f2f4d4f..6c66ae4a 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -34,7 +34,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters.
 - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
-- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
+- `αmax = 1/eps(T)`: maximum step parameter for fomo solver.
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.

From 5d9d12d9fd283e88df9b37a8c8d9bf9ad1fdb925 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:07:13 -0500
Subject: [PATCH 124/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 6c66ae4a..0cc734ee 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -35,7 +35,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters.
 - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
 - `αmax = 1/eps(T)`: maximum step parameter for fomo solver.
-- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
+- `max_eval::Int = -1`: maximum number of evaluation of the objective function (-1 means unlimited).
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.

From aa48abde49cb177bd98f6baf5eb0de49cef481df Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:08:03 -0500
Subject: [PATCH 125/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 0cc734ee..c59f9791 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -25,6 +25,7 @@ For advanced usage:
 Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 
 # Arguments
+
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
 # Keyword arguments 

From bd34530d8795d941555333913e8e3e2d6d2be7c1 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:10:04 -0500
Subject: [PATCH 126/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index c59f9791..40865fa2 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -29,6 +29,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
 # Keyword arguments 
+
 - `x::V = nlp.meta.x0`: the initial guess.
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.

From dd324ef3bcef8cb9de56eb85103a944e9987314a Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:10:45 -0500
Subject: [PATCH 127/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 40865fa2..40bf8bec 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -47,6 +47,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
 # Output
+
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
 
 # Callback

From 6f52bcf6903d825e8a70f4305aac66801304a883 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:11:28 -0500
Subject: [PATCH 128/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 40bf8bec..deac5778 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -51,6 +51,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
 
 # Callback
+
 The callback is called at each iteration.
 The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
 Changing any of the input arguments will affect the subsequent iterations.

From 35ced21fff1db4815c7b0e3fd71e7c1af3496e1c Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:12:15 -0500
Subject: [PATCH 129/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index deac5778..645a0d68 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -68,6 +68,7 @@ Notably, you can access, and modify, the following:
   - `stats.elapsed_time`: elapsed time in seconds.
 
 # Examples
+
 ## `fomo`
 ```jldoctest
 using JSOSolvers, ADNLPModels

From ed95a20c9369a174296f554792d343bf6ea4d4c5 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:12:59 -0500
Subject: [PATCH 130/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 645a0d68..b1574045 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -70,6 +70,7 @@ Notably, you can access, and modify, the following:
 # Examples
 
 ## `fomo`
+
 ```jldoctest
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))

From dfd8068b367e6baf46bb3e45aa4ae1dc715aec31 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:15:43 -0500
Subject: [PATCH 131/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index b1574045..c955b365 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -136,7 +136,7 @@ end
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FomoSolver(nlp)
   solver_specific = Dict(:avgβmax => T(0.))
-  stats = GenericExecutionStats(nlp;solver_specific=solver_specific)
+  stats = GenericExecutionStats(nlp; solver_specific = solver_specific)
   return solve!(solver, nlp, stats; kwargs...)
 end
 

From 3acfac02b4b46da7bc74b34b338ce6b86280e2ac Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:16:23 -0500
Subject: [PATCH 132/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index c955b365..425ee41d 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -92,6 +92,7 @@ stats = solve!(solver, nlp)
 "Execution stats: first-order stationary"
 ```
 ## `R2`
+
 ```jldoctest
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))

From b34ac990086c3227cc7babbe55b2729f9a6921f0 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:16:50 -0500
Subject: [PATCH 133/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 425ee41d..edf16b14 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -142,7 +142,7 @@ end
 end
 
 function SolverCore.reset!(solver::FomoSolver{T}) where {T}
-  fill!(solver.m,0)
+  fill!(solver.m, 0)
   solver
 end
 

From b0a04928ae3923c820443697ea7ffc83fd5653fa Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:17:15 -0500
Subject: [PATCH 134/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index edf16b14..141fe4f0 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -165,7 +165,7 @@ end
 @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FoSolver(nlp)
   stats = GenericExecutionStats(nlp)
-  if haskey(kwargs,:σmin)
+  if haskey(kwargs, :σmin)
     return solve!(solver, nlp, stats; step_backend = r2_step(), αmax = 1/kwargs[:σmin], kwargs...)
   else
     return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)

From 2d6055b9689f6a705628904d0797c9d70b5fe362 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:18:07 -0500
Subject: [PATCH 135/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 141fe4f0..69426ffe 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -311,8 +311,6 @@ function SolverCore.solve!(
         βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
-      end
-      if use_momentum
         avgβmax += βmax
         siter += 1
       end

From 090be62973c2be85cdb00dfbf42a37ee6fa352bc Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:19:48 -0500
Subject: [PATCH 136/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 69426ffe..414bfb70 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -218,7 +218,6 @@ function SolverCore.solve!(
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
-  
   grad!(nlp, x, ∇fk)
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)

From 29989a79dcffb08a9d2feff9947df127172794a8 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:20:43 -0500
Subject: [PATCH 137/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 414bfb70..79ba88b6 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -222,7 +222,7 @@ function SolverCore.solve!(
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)
 
-  solver.α = init_alpha(norm_∇fk,step_backend)
+  solver.α = init_alpha(norm_∇fk, step_backend)
   
   # Stopping criterion: 
   ϵ = atol + rtol * norm_∇fk

From 83c85b120ba4f66d5d72932ca26152a8da7d3c0f Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 21 Feb 2024 23:21:29 -0500
Subject: [PATCH 138/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 79ba88b6..5c200a9e 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -236,7 +236,6 @@ function SolverCore.solve!(
       @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
       @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α
     end
-    
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
     if !use_momentum

From 5de664b9e2dcdbc9e1704f5434eba1f8e74b9271 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 22 Feb 2024 00:04:15 -0500
Subject: [PATCH 139/171] Add TR solver (trust region with linear model) fix
 spacing update docstrings

---
 src/fomo.jl | 92 ++++++++++++++++++++++++++---------------------------
 1 file changed, 46 insertions(+), 46 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 5c200a9e..fa811924 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -2,75 +2,69 @@ export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step
 
 abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
-abstract type AbstractFomoMethod end
-struct tr_step   <: AbstractFomoMethod end
-struct r2_step   <: AbstractFomoMethod end
+abstract type AbstractFOMethod end
+struct tr_step   <: AbstractFOMethod end
+struct r2_step   <: AbstractFOMethod end
 
 """
     fomo(nlp; kwargs...)
     R2(nlp; kwargs...)
 
-A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region steps.
+A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods.
 
-For advanced usage, first define a `FomoSolver` or `FoSolver` to preallocate the memory used in the solver, and then call `solve!`:
+For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
 
     solver = FomoSolver(nlp)
     solve!(solver, nlp; kwargs...)
 
-**Quadratic Regularization (R2)**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` method.
+**No momentum**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods.
 For advanced usage:
 
     solver = FoSolver(nlp)
-    solve!(solver, nlp; kwargs...)
-Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`).
-
+    solve!(solver, nlp; step_bakckend = r2_step(),kwargs...) # for Quadratic Regularization (R2) step: s = - α .* ∇f(x)
+    solve!(solver, nlp; step_bakckend = tr_step(),kwargs...) # for linear model Trust Region (TR) step: s = - α .* ∇f(x) ./ ‖∇f(x)‖ 
+    
 # Arguments
-
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
 # Keyword arguments 
-
 - `x::V = nlp.meta.x0`: the initial guess.
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
 - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
-- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters.
+- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
 - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
-- `αmax = 1/eps(T)`: maximum step parameter for fomo solver.
-- `max_eval::Int = -1`: maximum number of evaluation of the objective function (-1 means unlimited).
+- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
+- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
-- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β].
-- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖, with m memory of past gradient and βmax ∈ [0,β]. 
+- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β].
+- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* m‖, with m memory of past gradient and βmax ∈ [0,β]. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
 # Output
-
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
 
 # Callback
-
 The callback is called at each iteration.
 The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
 Changing any of the input arguments will affect the subsequent iterations.
-In particular, setting `stats.status = :user` will stop the algorithm.
+In particular, setting `stats.status = :user || stats.stats = :unknown` will stop the algorithm.
 All relevant information should be available in `nlp` and `solver`.
 Notably, you can access, and modify, the following:
 - `solver.x`: current iterate;
 - `solver.gx`: current gradient;
 - `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things:
-  - `stats.dual_feas`: norm of current gradient;
-  - `stats.iter`: current iteration counter;
-  - `stats.objective`: current objective function value;
-  - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
-  - `stats.elapsed_time`: elapsed time in seconds.
+    - `stats.dual_feas`: norm of current gradient;
+    - `stats.iter`: current iteration counter;
+    - `stats.objective`: current objective function value;
+    - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
+    - `stats.elapsed_time`: elapsed time in seconds.
 
 # Examples
-
 ## `fomo`
-
 ```jldoctest
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))
@@ -92,7 +86,6 @@ stats = solve!(solver, nlp)
 "Execution stats: first-order stationary"
 ```
 ## `R2`
-
 ```jldoctest
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))
@@ -137,12 +130,12 @@ end
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FomoSolver(nlp)
   solver_specific = Dict(:avgβmax => T(0.))
-  stats = GenericExecutionStats(nlp; solver_specific = solver_specific)
+  stats = GenericExecutionStats(nlp;solver_specific=solver_specific)
   return solve!(solver, nlp, stats; kwargs...)
 end
 
 function SolverCore.reset!(solver::FomoSolver{T}) where {T}
-  fill!(solver.m, 0)
+  fill!(solver.m,0)
   solver
 end
 
@@ -162,14 +155,18 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   return FoSolver{T, V}(x, g, c, T(0))
 end
 
-@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+@doc (@doc FomoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FoSolver(nlp)
   stats = GenericExecutionStats(nlp)
-  if haskey(kwargs, :σmin)
-    return solve!(solver, nlp, stats; step_backend = r2_step(), αmax = 1/kwargs[:σmin], kwargs...)
-  else
-    return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
-  end
+  return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
+end
+
+@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+  fo(nlp; step_backend = r2_step(), kwargs...)
+end
+
+@doc (@doc FomoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+  fo(nlp; step_backend = tr_step(), kwargs...)
 end
 
 function SolverCore.reset!(solver::FoSolver{T}) where {T}
@@ -200,7 +197,6 @@ function SolverCore.solve!(
   θ2::T = T(eps(T)^(1/3)),
   verbose::Int = 0,
   step_backend = r2_step(),
-  σmin = nothing # keep consistency with R2 interface. kwargs immutable, can't delete it in `R2`
 ) where {T, V}
   use_momentum = typeof(solver) <: FomoSolver
   unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
@@ -218,6 +214,7 @@ function SolverCore.solve!(
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
+  
   grad!(nlp, x, ∇fk)
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)
@@ -236,14 +233,15 @@ function SolverCore.solve!(
       @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
       @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α
     end
+    
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
     if !use_momentum
       @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "σ" "ρk"
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α NaN
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α 0
     else
       @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α NaN 0
+      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α 0 0
     end
   end
 
@@ -273,7 +271,7 @@ function SolverCore.solve!(
   oneT = T(1)
   mdot∇f = T(0) # dot(momentum,∇fk)
   while !done
-    λk = step_mult(solver.α,norm_d,step_backend)
+    λk = step_mult(solver.α, norm_d, step_backend)
     c .= x .- λk .* d
     step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0
     ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk with momentum, ‖∇fk‖²λk without momentum
@@ -299,7 +297,7 @@ function SolverCore.solve!(
       x .= c
       if use_momentum
         momentum .= ∇fk .* (oneT - β) .+ momentum .* β
-        mdot∇f = dot(momentum,∇fk)
+        mdot∇f = dot(momentum, ∇fk)
       end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
@@ -309,6 +307,8 @@ function SolverCore.solve!(
         βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
+      end
+      if use_momentum
         avgβmax += βmax
         siter += 1
       end
@@ -343,8 +343,8 @@ function SolverCore.solve!(
     
     callback(nlp, solver, stats)
 
-    step_underflow  && set_status!(stats,:small_step)
-    solver.α == 0         && set_status!(stats,:exception) # :small_nlstep exception should happen before
+    step_underflow  && set_status!(stats, :small_step)
+    solver.α == 0         && set_status!(stats, :exception) # :small_nlstep exception should happen before
 
     done = stats.status != :unknown
   end
@@ -357,13 +357,13 @@ function SolverCore.solve!(
 end
 
 """
-find_beta(m, md∇f, norm_∇f, β, θ1, θ2)
+find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
 
 Compute βmax which saturates the contibution of the momentum term to the gradient.
 `βmax` is computed such that the two gradient-related conditions are ensured: 
-1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ‖∇f(xk)‖²
-2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖
-with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` 
+1. [(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖²
+2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖
+with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` 
 """ 
 function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
   n1 = norm_∇f^2 - mdot∇f

From 5bc9befcb112968eeaa2529f813457fe3bba8ed8 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Mon, 4 Mar 2024 14:33:19 -0500
Subject: [PATCH 140/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index fa811924..d1795391 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -18,6 +18,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
     solve!(solver, nlp; kwargs...)
 
 **No momentum**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods.
+
 For advanced usage:
 
     solver = FoSolver(nlp)

From 192944ec22e15cbc0ee4ab5d492a3cb4e21235a6 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Mon, 4 Mar 2024 15:36:16 -0500
Subject: [PATCH 141/171] fix docstrings and verbose display

---
 src/fomo.jl | 90 +++++++++++++++++++++++++++++------------------------
 1 file changed, 49 insertions(+), 41 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index d1795391..40b14e8c 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -3,8 +3,8 @@ export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step
 abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
 abstract type AbstractFOMethod end
-struct tr_step   <: AbstractFOMethod end
-struct r2_step   <: AbstractFOMethod end
+struct tr_step <: AbstractFOMethod end
+struct r2_step <: AbstractFOMethod end
 
 """
     fomo(nlp; kwargs...)
@@ -130,19 +130,19 @@ end
 
 @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FomoSolver(nlp)
-  solver_specific = Dict(:avgβmax => T(0.))
-  stats = GenericExecutionStats(nlp;solver_specific=solver_specific)
+  solver_specific = Dict(:avgβmax => T(0.0))
+  stats = GenericExecutionStats(nlp; solver_specific = solver_specific)
   return solve!(solver, nlp, stats; kwargs...)
 end
 
 function SolverCore.reset!(solver::FomoSolver{T}) where {T}
-  fill!(solver.m,0)
+  fill!(solver.m, 0)
   solver
 end
 
 SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
 
-mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
+@doc (@doc FomoSolver) mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
   x::V
   g::V
   c::V
@@ -186,22 +186,23 @@ function SolverCore.solve!(
   rtol::T = √eps(T),
   η1::T = T(eps(T)^(1 / 4)),
   η2::T = T(0.95),
-  γ1::T = T(1/2),
+  γ1::T = T(1 / 2),
   γ2::T = T(2),
-  γ3::T = T(1/2),
-  αmax::T = 1/eps(T),
+  γ3::T = T(1 / 2),
+  αmax::T = 1 / eps(T),
   max_time::Float64 = 30.0,
   max_eval::Int = -1,
   max_iter::Int = typemax(Int),
   β::T = T(0.9),
   θ1::T = T(0.1),
-  θ2::T = T(eps(T)^(1/3)),
+  θ2::T = T(eps(T)^(1 / 3)),
   verbose::Int = 0,
   step_backend = r2_step(),
 ) where {T, V}
   use_momentum = typeof(solver) <: FomoSolver
+  is_r2 = typeof(step_backend) <: r2_step
   unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
-  
+
   reset!(stats)
   start_time = time()
   set_time!(stats, 0.0)
@@ -215,34 +216,38 @@ function SolverCore.solve!(
   set_iter!(stats, 0)
   set_objective!(stats, obj(nlp, x))
 
-  
   grad!(nlp, x, ∇fk)
   norm_∇fk = norm(∇fk)
   set_dual_residual!(stats, norm_∇fk)
 
   solver.α = init_alpha(norm_∇fk, step_backend)
-  
+
   # Stopping criterion: 
   ϵ = atol + rtol * norm_∇fk
   optimal = norm_∇fk ≤ ϵ
+  header = ["iter", "f", "‖∇f‖", "α"]
   if optimal
     @info("Optimal point found at initial point")
-    if !use_momentum
-      @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "σ"
-      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α
+    if is_r2
+      @info @sprintf "%5s  %9s  %7s  %7s " header...
+      @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1 / solver.α
     else
-      @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" "α"
+      @info @sprintf "%5s  %9s  %7s  %7s " header...
       @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α
     end
-    
   end
   if verbose > 0 && mod(stats.iter, verbose) == 0
+    push!(header, "ρk")
+    step_param = is_r2 ? 1 / solver.α : solver.α
     if !use_momentum
-      @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "σ" "ρk"
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α 0
+      @info @sprintf "%5s  %9s  %7s  %7s  %7s " header...
+      infoline =
+        @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param
     else
-      @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax"
-      infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α 0 0
+      push!(header, "βmax")
+      @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " header...
+      infoline =
+        @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' 0
     end
   end
 
@@ -305,7 +310,7 @@ function SolverCore.solve!(
       norm_∇fk = norm(∇fk)
       if use_momentum
         p .= momentum .- ∇fk
-        βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2)
+        βmax = find_beta(p, mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
       end
@@ -322,10 +327,13 @@ function SolverCore.solve!(
 
     if verbose > 0 && mod(stats.iter, verbose) == 0
       @info infoline
+      step_param = is_r2 ? 1 / solver.α : solver.α
       if !use_momentum
-        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α ρk
+        infoline =
+          @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param ρk
       else
-        infoline = @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α ρk βmax
+        infoline =
+          @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param ρk βmax
       end
     end
 
@@ -341,11 +349,11 @@ function SolverCore.solve!(
         max_time = max_time,
       ),
     )
-    
+
     callback(nlp, solver, stats)
 
-    step_underflow  && set_status!(stats, :small_step)
-    solver.α == 0         && set_status!(stats, :exception) # :small_nlstep exception should happen before
+    step_underflow && set_status!(stats, :small_step)
+    solver.α == 0 && set_status!(stats, :exception) # :small_nlstep exception should happen before
 
     done = stats.status != :unknown
   end
@@ -362,16 +370,16 @@ find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
 
 Compute βmax which saturates the contibution of the momentum term to the gradient.
 `βmax` is computed such that the two gradient-related conditions are ensured: 
-1. [(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖²
+1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖²
 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖
 with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` 
-""" 
-function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V}
+"""
+function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V}
   n1 = norm_∇f^2 - mdot∇f
   n2 = norm(p)
-  β1 = n1 > 0  ? (1-θ1)*norm_∇f^2/(n1)  : β
-  β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β
-  return min(β,min(β1,β2))
+  β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / (n1) : β
+  β2 = n2 != 0 ? (1 - θ2) * norm_∇f / (n2) : β
+  return min(β, min(β1, β2))
 end
 
 """
@@ -380,12 +388,12 @@ end
 
 Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
 """
-function init_alpha(norm_∇fk::T, ::r2_step) where{T}
-  1/2^round(log2(norm_∇fk + 1))
+function init_alpha(norm_∇fk::T, ::r2_step) where {T}
+  1 / 2^round(log2(norm_∇fk + 1))
 end
 
-function init_alpha(norm_∇fk::T, ::tr_step) where{T}
-  norm_∇fk/2^round(log2(norm_∇fk + 1))
+function init_alpha(norm_∇fk::T, ::tr_step) where {T}
+  norm_∇fk / 2^round(log2(norm_∇fk + 1))
 end
 
 """
@@ -394,10 +402,10 @@ end
 
 Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`).
 """
-function step_mult(α::T, norm_∇fk::T, ::r2_step) where{T}
+function step_mult(α::T, norm_∇fk::T, ::r2_step) where {T}
   α
 end
 
-function step_mult(α::T, norm_∇fk::T, ::tr_step) where{T}
-  α/norm_∇fk
+function step_mult(α::T, norm_∇fk::T, ::tr_step) where {T}
+  α / norm_∇fk
 end
\ No newline at end of file

From ad65f30fc77035d93e23c98d81d5e1e27f6937d4 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 5 Mar 2024 14:18:18 -0500
Subject: [PATCH 142/171] update docstring, update info display, fix solver arg
 type, export TR

---
 src/fomo.jl | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 40b14e8c..e51e5b8a 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,4 +1,4 @@
-export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step
+export fomo, FomoSolver, FoSolver, R2, TR, tr_step, r2_step
 
 abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
@@ -10,14 +10,21 @@ struct r2_step <: AbstractFOMethod end
     fomo(nlp; kwargs...)
     R2(nlp; kwargs...)
 
-A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods.
+A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model.
+The step is perform along d with
+d = - (1-βmax) .* ∇f(xk) - βmax .* mk (1)
+with mk the memory of past gradients updated at each successful iteration as
+mk .= ∇f(xk) .* (1 - βmax) .+ momentum .* βmax (2)
+and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied:
+(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (3)
+‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖       (4)
 
 For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
 
     solver = FomoSolver(nlp)
     solve!(solver, nlp; kwargs...)
 
-**No momentum**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods.
+**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods.
 
 For advanced usage:
 
@@ -40,8 +47,8 @@ For advanced usage:
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
-- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β].
-- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* m‖, with m memory of past gradient and βmax ∈ [0,β]. 
+- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (3).
+- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
@@ -177,7 +184,7 @@ end
 SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver)
 
 function SolverCore.solve!(
-  solver::AbstractFirstOrderSolver,
+  solver::Union{FoSolver,FomoSolver},
   nlp::AbstractNLPModel{T, V},
   stats::GenericExecutionStats{T, V};
   callback = (args...) -> nothing,
@@ -225,7 +232,8 @@ function SolverCore.solve!(
   # Stopping criterion: 
   ϵ = atol + rtol * norm_∇fk
   optimal = norm_∇fk ≤ ϵ
-  header = ["iter", "f", "‖∇f‖", "α"]
+  header = ["iter", "f", "‖∇f‖"]
+  is_r2 ? push!(header,"σ") : push!(header,"Δ")
   if optimal
     @info("Optimal point found at initial point")
     if is_r2

From d0de9dac9d7535174f378d219a4d8b575e5dff5f Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 5 Mar 2024 15:14:32 -0500
Subject: [PATCH 143/171] update docstring

---
 src/fomo.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index e51e5b8a..ed07d668 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -13,8 +13,8 @@ struct r2_step <: AbstractFOMethod end
 A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model.
 The step is perform along d with
 d = - (1-βmax) .* ∇f(xk) - βmax .* mk (1)
-with mk the memory of past gradients updated at each successful iteration as
-mk .= ∇f(xk) .* (1 - βmax) .+ momentum .* βmax (2)
+with mk the memory of past gradients (initiated with 0) updated at each successful iteration as
+mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax (2)
 and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied:
 (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (3)
 ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖       (4)

From a7077c9f941677bd02a50adc93791504e8de0e2a Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Wed, 6 Mar 2024 12:02:58 -0500
Subject: [PATCH 144/171] fix grad and momentum dot product

---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index ed07d668..d31c6b0e 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -311,12 +311,12 @@ function SolverCore.solve!(
       x .= c
       if use_momentum
         momentum .= ∇fk .* (oneT - β) .+ momentum .* β
-        mdot∇f = dot(momentum, ∇fk)
       end
       set_objective!(stats, fck)
       grad!(nlp, x, ∇fk)
       norm_∇fk = norm(∇fk)
       if use_momentum
+        mdot∇f = dot(momentum, ∇fk)
         p .= momentum .- ∇fk
         βmax = find_beta(p, mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax

From 67b6da27a21be74f7ac5a63f0950d0243734b27f Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Wed, 6 Mar 2024 12:19:05 -0500
Subject: [PATCH 145/171] cosmetics

---
 src/fomo.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index d31c6b0e..0d15d1cd 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -48,7 +48,7 @@ For advanced usage:
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
 - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (3).
-- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). 
+- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
@@ -385,8 +385,8 @@ with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm`
 function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V}
   n1 = norm_∇f^2 - mdot∇f
   n2 = norm(p)
-  β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / (n1) : β
-  β2 = n2 != 0 ? (1 - θ2) * norm_∇f / (n2) : β
+  β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / n1 : β
+  β2 = n2 != 0 ? (1 - θ2) * norm_∇f / n2 : β
   return min(β, min(β1, β2))
 end
 

From 62995a4978b1779357f0cabf3e14ef12a4652371 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Thu, 7 Mar 2024 11:18:24 -0500
Subject: [PATCH 146/171] Create FoSolver docstring

---
 src/fomo.jl | 130 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 89 insertions(+), 41 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 0d15d1cd..2e2527e1 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -8,29 +8,26 @@ struct r2_step <: AbstractFOMethod end
 
 """
     fomo(nlp; kwargs...)
-    R2(nlp; kwargs...)
 
 A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model.
+
+# Algorithm description
+
 The step is perform along d with
-d = - (1-βmax) .* ∇f(xk) - βmax .* mk (1)
+d = - (1-βmax) .* ∇f(xk) - βmax .* mk
 with mk the memory of past gradients (initiated with 0) updated at each successful iteration as
-mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax (2)
+mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax
 and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied:
-(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (3)
-‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖       (4)
+(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (1)
+‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖       (2)
 
+# Advanced usage
 For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
 
     solver = FomoSolver(nlp)
     solve!(solver, nlp; kwargs...)
 
-**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods.
-
-For advanced usage:
-
-    solver = FoSolver(nlp)
-    solve!(solver, nlp; step_bakckend = r2_step(),kwargs...) # for Quadratic Regularization (R2) step: s = - α .* ∇f(x)
-    solve!(solver, nlp; step_bakckend = tr_step(),kwargs...) # for linear model Trust Region (TR) step: s = - α .* ∇f(x) ./ ‖∇f(x)‖ 
+**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `fo` method.
     
 # Arguments
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
@@ -47,8 +44,8 @@ For advanced usage:
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
-- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (3).
-- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). 
+- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (1).
+- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (2). 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
@@ -91,27 +88,6 @@ stats = solve!(solver, nlp)
 
 # output
 
-"Execution stats: first-order stationary"
-```
-## `R2`
-```jldoctest
-using JSOSolvers, ADNLPModels
-nlp = ADNLPModel(x -> sum(x.^2), ones(3))
-stats = R2(nlp)
-
-# output
-
-"Execution stats: first-order stationary"
-```
-
-```jldoctest
-using JSOSolvers, ADNLPModels
-nlp = ADNLPModel(x -> sum(x.^2), ones(3))
-solver = FoSolver(nlp);
-stats = solve!(solver, nlp)
-
-# output
-
 "Execution stats: first-order stationary"
 ```
 """
@@ -149,7 +125,79 @@ end
 
 SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver)
 
-@doc (@doc FomoSolver) mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
+"""
+    fo(nlp; kwargs...)
+    R2(nlp; kwargs...)
+    TR(nlp; kwargs...)
+
+A First-Order (FO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model.
+
+For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
+
+    solver = FoSolver(nlp)
+    solve!(solver, nlp; kwargs...)
+
+`R2` and `TR` runs `fo` with the dedicated `step_backend` keyword argument.
+
+# Arguments
+- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
+
+# Keyword arguments 
+- `x::V = nlp.meta.x0`: the initial guess.
+- `atol::T = √eps(T)`: absolute tolerance.
+- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
+- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
+- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
+- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
+- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
+- `max_time::Float64 = 30.0`: maximum time limit in seconds.
+- `max_iter::Int = typemax(Int)`: maximum number of iterations.
+- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
+- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
+
+# Output
+The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
+
+# Callback
+The callback is called at each iteration.
+The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
+Changing any of the input arguments will affect the subsequent iterations.
+In particular, setting `stats.status = :user || stats.stats = :unknown` will stop the algorithm.
+All relevant information should be available in `nlp` and `solver`.
+Notably, you can access, and modify, the following:
+- `solver.x`: current iterate;
+- `solver.gx`: current gradient;
+- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things:
+    - `stats.dual_feas`: norm of current gradient;
+    - `stats.iter`: current iteration counter;
+    - `stats.objective`: current objective function value;
+    - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention.
+    - `stats.elapsed_time`: elapsed time in seconds.
+
+# Examples
+
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+stats = fo(nlp) # run with step_backend = r2_step(), equivalent to R2(nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+
+```jldoctest
+using JSOSolvers, ADNLPModels
+nlp = ADNLPModel(x -> sum(x.^2), ones(3))
+solver = FoSolver(nlp);
+stats = solve!(solver, nlp)
+
+# output
+
+"Execution stats: first-order stationary"
+```
+"""
+mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
   x::V
   g::V
   c::V
@@ -163,17 +211,17 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   return FoSolver{T, V}(x, g, c, T(0))
 end
 
-@doc (@doc FomoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FoSolver(nlp)
   stats = GenericExecutionStats(nlp)
   return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
 end
 
-@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+@doc (@doc FoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   fo(nlp; step_backend = r2_step(), kwargs...)
 end
 
-@doc (@doc FomoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
+@doc (@doc FoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   fo(nlp; step_backend = tr_step(), kwargs...)
 end
 
@@ -184,7 +232,7 @@ end
 SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver)
 
 function SolverCore.solve!(
-  solver::Union{FoSolver,FomoSolver},
+  solver::Union{FoSolver, FomoSolver},
   nlp::AbstractNLPModel{T, V},
   stats::GenericExecutionStats{T, V};
   callback = (args...) -> nothing,
@@ -233,7 +281,7 @@ function SolverCore.solve!(
   ϵ = atol + rtol * norm_∇fk
   optimal = norm_∇fk ≤ ϵ
   header = ["iter", "f", "‖∇f‖"]
-  is_r2 ? push!(header,"σ") : push!(header,"Δ")
+  is_r2 ? push!(header, "σ") : push!(header, "Δ")
   if optimal
     @info("Optimal point found at initial point")
     if is_r2

From e6f7a229fa5c5c9e9d75e63606615bba6beaf665 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Thu, 7 Mar 2024 10:37:12 -0500
Subject: [PATCH 147/171] Update test/restart.jl

Co-authored-by: Tangi Migot <tangi.migot@gmail.com>
---
 test/restart.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/restart.jl b/test/restart.jl
index eb770739..e6b75cc1 100644
--- a/test/restart.jl
+++ b/test/restart.jl
@@ -44,7 +44,7 @@ end
 end
 
 @testset "Test restart with a different problem: $fun" for (fun, s) in (
-  (:R2, :FomoSolver),
+  (:R2, :FoSolver),
   (:fomo, :FomoSolver),
   (:lbfgs, :LBFGSSolver),
   (:tron, :TronSolver),

From 9126c0847f57cdef7409a4947898f6474bf0f58a Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Thu, 7 Mar 2024 10:37:22 -0500
Subject: [PATCH 148/171] Update test/restart.jl

Co-authored-by: Tangi Migot <tangi.migot@gmail.com>
---
 test/restart.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/restart.jl b/test/restart.jl
index e6b75cc1..38765465 100644
--- a/test/restart.jl
+++ b/test/restart.jl
@@ -1,5 +1,5 @@
 @testset "Test restart with a different initial guess: $fun" for (fun, s) in (
-  (:R2, :FomoSolver),
+  (:R2, :FoSolver),
   (:fomo, :FomoSolver),
   (:lbfgs, :LBFGSSolver),
   (:tron, :TronSolver),

From 06eb8f83b925f2237e434bed60024ddb3aa4b009 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Fri, 8 Mar 2024 11:31:07 -0500
Subject: [PATCH 149/171] deprecate R2Solver

---
 src/fomo.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 2e2527e1..002ba712 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -211,6 +211,8 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   return FoSolver{T, V}(x, g, c, T(0))
 end
 
+Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(nlp::AbstractNLPModel; kwargs...)
+
 @doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
   solver = FoSolver(nlp)
   stats = GenericExecutionStats(nlp)

From d6750c354a7b457cd86ba1ed03cd298fe5a5e8fc Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:13:41 -0500
Subject: [PATCH 150/171] Update src/fomo.jl

Co-authored-by: Tangi Migot <tangi.migot@gmail.com>
---
 src/fomo.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 002ba712..9009c36a 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -211,6 +211,11 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V}
   return FoSolver{T, V}(x, g, c, T(0))
 end
 
+"""
+    `R2Solver` is deprecated, please check the documentation of `R2`.
+"""
+mutable struct R2Solver{T, V} <: AbstractOptimizationSolver end
+
 Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(nlp::AbstractNLPModel; kwargs...)
 
 @doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}

From 39c30532f6dc1abd59d6f90147e1705bb45d5e79 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:14:27 -0500
Subject: [PATCH 151/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 9009c36a..ba22964b 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -13,7 +13,7 @@ A First-Order with MOmentum (FOMO) model-based method for unconstrained optimiza
 
 # Algorithm description
 
-The step is perform along d with
+The step is computed along
 d = - (1-βmax) .* ∇f(xk) - βmax .* mk
 with mk the memory of past gradients (initiated with 0) updated at each successful iteration as
 mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax

From 96c82d231b443064b49649e5cfc6f4653cd37798 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:14:45 -0500
Subject: [PATCH 152/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index ba22964b..20b5cce1 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -15,7 +15,7 @@ A First-Order with MOmentum (FOMO) model-based method for unconstrained optimiza
 
 The step is computed along
 d = - (1-βmax) .* ∇f(xk) - βmax .* mk
-with mk the memory of past gradients (initiated with 0) updated at each successful iteration as
+with mk the memory of past gradients (initialized at 0), and updated at each successful iteration as
 mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax
 and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied:
 (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (1)

From 0899b59e0b6fdfbd50d6027cccabf8ece2aa7d98 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:15:14 -0500
Subject: [PATCH 153/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 20b5cce1..35d497a4 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -22,6 +22,7 @@ and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the follow
 ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖       (2)
 
 # Advanced usage
+
 For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`:
 
     solver = FomoSolver(nlp)

From dbb1bbc2e9fab1d2a8783703e308a6e83bc8245b Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:15:36 -0500
Subject: [PATCH 154/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index 35d497a4..ac78ecbc 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -31,6 +31,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 **No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `fo` method.
     
 # Arguments
+
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
 # Keyword arguments 

From c00423df0d5be0a34bd4c67c65efaeb18f0de2eb Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:15:57 -0500
Subject: [PATCH 155/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/fomo.jl b/src/fomo.jl
index ac78ecbc..88f42570 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -35,6 +35,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
 # Keyword arguments 
+
 - `x::V = nlp.meta.x0`: the initial guess.
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.

From bfec5ddc668a2e1e16df2f8945fd2ec7f1fa2e31 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:16:14 -0500
Subject: [PATCH 156/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 88f42570..edc5608a 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -432,7 +432,7 @@ function SolverCore.solve!(
 end
 
 """
-find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
+    find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
 
 Compute βmax which saturates the contibution of the momentum term to the gradient.
 `βmax` is computed such that the two gradient-related conditions are ensured: 

From 825a28ab3720fab230e9dbedb963e151154e38a9 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:16:30 -0500
Subject: [PATCH 157/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index edc5608a..6cf1e221 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -434,7 +434,7 @@ end
 """
     find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2)
 
-Compute βmax which saturates the contibution of the momentum term to the gradient.
+Compute value `βmax` that saturates the contribution of the momentum term to the gradient.
 `βmax` is computed such that the two gradient-related conditions are ensured: 
 1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖²
 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖

From 79e758d15640e2a96c72db8d1343099c0e5cf45a Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:16:48 -0500
Subject: [PATCH 158/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 6cf1e221..2043fa07 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -449,8 +449,8 @@ function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where
 end
 
 """
-  init_alpha(norm_∇fk::T, ::r2_step)
-  init_alpha(norm_∇fk::T, ::tr_step)
+    init_alpha(norm_∇fk::T, ::r2_step)
+    init_alpha(norm_∇fk::T, ::tr_step)
 
 Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
 """

From 3d43e3d5fe2349cbeede31bc142bc9c57aa7238d Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:17:11 -0500
Subject: [PATCH 159/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 2043fa07..b5ebcdfc 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -452,7 +452,8 @@ end
     init_alpha(norm_∇fk::T, ::r2_step)
     init_alpha(norm_∇fk::T, ::tr_step)
 
-Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods.
+Initialize `α` step size parameter.
+Ensure first step is the same for quadratic regularization and trust region methods.
 """
 function init_alpha(norm_∇fk::T, ::r2_step) where {T}
   1 / 2^round(log2(norm_∇fk + 1))

From 40de1e47e53b425c23c1514894401a196d7b3298 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:17:25 -0500
Subject: [PATCH 160/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index b5ebcdfc..96c994c3 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -464,8 +464,8 @@ function init_alpha(norm_∇fk::T, ::tr_step) where {T}
 end
 
 """
-  step_mult(α::T, norm_∇fk::T, ::r2_step)
-  step_mult(α::T, norm_∇fk::T, ::tr_step)
+    step_mult(α::T, norm_∇fk::T, ::r2_step)
+    step_mult(α::T, norm_∇fk::T, ::tr_step)
 
 Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`).
 """

From fac145eda2f74d5293b1e65d9ae22198ddabfe89 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:17:48 -0500
Subject: [PATCH 161/171] Update src/fomo.jl

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 96c994c3..917f1fff 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -43,7 +43,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
 - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
 - `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
-- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
+- `max_eval::Int = -1`: maximum number of objective evaluations.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
 - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.

From bbc97a017da79181a861f5a2587493d8b47c8780 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:19:42 -0500
Subject: [PATCH 162/171] Apply suggestions from code review

Co-authored-by: Dominique <dominique.orban@gmail.com>
---
 src/fomo.jl | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 917f1fff..560d7505 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -46,16 +46,18 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `max_eval::Int = -1`: maximum number of objective evaluations.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum.
-- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (1).
-- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (2). 
+- `β = T(0.9) ∈ [0,1)`: target decay rate for the momentum.
+- `θ1 = T(0.1)`: momentum contribution parameter for convergence condition (1).
+- `θ2 = T(eps(T)^(1/3))`: momentum contribution parameter for convergence condition (2). 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
 # Output
+
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
 
 # Callback
+
 The callback is called at each iteration.
 The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
 Changing any of the input arguments will affect the subsequent iterations.
@@ -72,7 +74,9 @@ Notably, you can access, and modify, the following:
     - `stats.elapsed_time`: elapsed time in seconds.
 
 # Examples
+
 ## `fomo`
+
 ```jldoctest
 using JSOSolvers, ADNLPModels
 nlp = ADNLPModel(x -> sum(x.^2), ones(3))
@@ -143,9 +147,11 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 `R2` and `TR` runs `fo` with the dedicated `step_backend` keyword argument.
 
 # Arguments
+
 - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`.
 
 # Keyword arguments 
+
 - `x::V = nlp.meta.x0`: the initial guess.
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
@@ -159,9 +165,11 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
 # Output
+
 The value returned is a `GenericExecutionStats`, see `SolverCore.jl`.
 
 # Callback
+
 The callback is called at each iteration.
 The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored.
 Changing any of the input arguments will affect the subsequent iterations.

From 9035f4004c878674cd1024973dc02e11293b04cb Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 9 Mar 2024 17:34:27 -0500
Subject: [PATCH 163/171] exports fo

---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 560d7505..30acd96b 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -1,4 +1,4 @@
-export fomo, FomoSolver, FoSolver, R2, TR, tr_step, r2_step
+export fomo, FomoSolver, FoSolver, fo, R2, TR, tr_step, r2_step
 
 abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 

From ea3fcf7313ca1215e03131ef2e9592cf92a7ad79 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 9 Mar 2024 17:58:21 -0500
Subject: [PATCH 164/171] remove header to test allocation

---
 src/fomo.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 30acd96b..1d098ba9 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -298,15 +298,15 @@ function SolverCore.solve!(
   # Stopping criterion: 
   ϵ = atol + rtol * norm_∇fk
   optimal = norm_∇fk ≤ ϵ
-  header = ["iter", "f", "‖∇f‖"]
-  is_r2 ? push!(header, "σ") : push!(header, "Δ")
+  #header = ["iter", "f", "‖∇f‖"]
+  #is_r2 ? push!(header, "σ") : push!(header, "Δ")
   if optimal
     @info("Optimal point found at initial point")
     if is_r2
-      @info @sprintf "%5s  %9s  %7s  %7s " header...
+      @info @sprintf "%5s  %9s  %7s  %7s " #header...
       @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1 / solver.α
     else
-      @info @sprintf "%5s  %9s  %7s  %7s " header...
+      @info @sprintf "%5s  %9s  %7s  %7s " #header...
       @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α
     end
   end
@@ -314,12 +314,12 @@ function SolverCore.solve!(
     push!(header, "ρk")
     step_param = is_r2 ? 1 / solver.α : solver.α
     if !use_momentum
-      @info @sprintf "%5s  %9s  %7s  %7s  %7s " header...
+      @info @sprintf "%5s  %9s  %7s  %7s  %7s " #header...
       infoline =
         @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param
     else
       push!(header, "βmax")
-      @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " header...
+      @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " #header...
       infoline =
         @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' 0
     end

From 3984fe15eaa9d1d2a3e75512d667e76ffed8a9f7 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 9 Mar 2024 18:00:50 -0500
Subject: [PATCH 165/171] update readme

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d7556a3e..cd21ff94 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,8 @@ This package provides an implementation of four classic algorithms for unconstra
     > high-order regularized models. *Mathematical Programming*, 163(1), 359-368.
     > DOI: [10.1007/s10107-016-1065-8](https://doi.org/10.1007/s10107-016-1065-8)
 
-  
+- `fomo`: a first-order method with momentum for unconstrained optimization;
+
 - `tron`: a pure Julia implementation of TRON, a trust-region solver for bound-constrained optimization described in
 
     >  Chih-Jen Lin and Jorge J. Moré, *Newton's Method for Large Bound-Constrained

From fb2525ed51bb20b79864c28409edf4973381757b Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 9 Mar 2024 18:21:15 -0500
Subject: [PATCH 166/171] fix header allocation

---
 src/fomo.jl | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 1d098ba9..d3f3132c 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -298,30 +298,28 @@ function SolverCore.solve!(
   # Stopping criterion: 
   ϵ = atol + rtol * norm_∇fk
   optimal = norm_∇fk ≤ ϵ
-  #header = ["iter", "f", "‖∇f‖"]
-  #is_r2 ? push!(header, "σ") : push!(header, "Δ")
+  step_param_name = is_r2 ? "σ" : "Δ"
   if optimal
     @info("Optimal point found at initial point")
     if is_r2
-      @info @sprintf "%5s  %9s  %7s  %7s " #header...
+      @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" step_param_name
       @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk 1 / solver.α
     else
-      @info @sprintf "%5s  %9s  %7s  %7s " #header...
+      @info @sprintf "%5s  %9s  %7s  %7s " "iter" "f" "‖∇f‖" step_param_name
       @info @sprintf "%5d  %9.2e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk solver.α
     end
-  end
-  if verbose > 0 && mod(stats.iter, verbose) == 0
-    push!(header, "ρk")
-    step_param = is_r2 ? 1 / solver.α : solver.α
-    if !use_momentum
-      @info @sprintf "%5s  %9s  %7s  %7s  %7s " #header...
-      infoline =
-        @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param
-    else
-      push!(header, "βmax")
-      @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " #header...
-      infoline =
-        @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' 0
+  else
+    if verbose > 0 && mod(stats.iter, verbose) == 0
+      step_param = is_r2 ? 1 / solver.α : solver.α
+      if !use_momentum
+        @info @sprintf "%5s  %9s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" step_param_name "ρk"
+        infoline =
+          @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param ' '
+      else
+        @info @sprintf "%5s  %9s  %7s  %7s  %7s  %7s " "iter" "f" "‖∇f‖" step_param_name "ρk" "βmax"
+        infoline =
+          @sprintf "%5d  %9.2e  %7.1e  %7.1e  %7.1e  %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' 0
+      end
     end
   end
 

From 04f7709de726bb7b1cb5ef8dd7efac8bcc3694dc Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 9 Mar 2024 18:36:14 -0500
Subject: [PATCH 167/171] try to fix fomo allocation

---
 src/fomo.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index d3f3132c..9c9e39cc 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -382,7 +382,8 @@ function SolverCore.solve!(
       if use_momentum
         mdot∇f = dot(momentum, ∇fk)
         p .= momentum .- ∇fk
-        βmax = find_beta(p, mdot∇f, norm_∇fk, β, θ1, θ2)
+        diff_norm = norm(p)
+        βmax = find_beta(diff_norm, mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
       end
@@ -446,11 +447,10 @@ Compute value `βmax` that saturates the contribution of the momentum term to th
 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖
 with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` 
 """
-function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V}
+function find_beta(diff_norm::T, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V}
   n1 = norm_∇f^2 - mdot∇f
-  n2 = norm(p)
   β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / n1 : β
-  β2 = n2 != 0 ? (1 - θ2) * norm_∇f / n2 : β
+  β2 = n2 != 0 ? (1 - θ2) * norm_∇f / diff_norm : β
   return min(β, min(β1, β2))
 end
 

From a0324fdd18fde8f111ec221f8d1a839114f6b0f3 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Sat, 9 Mar 2024 18:45:46 -0500
Subject: [PATCH 168/171] fix find_beta

---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 9c9e39cc..331c7214 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -450,7 +450,7 @@ with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm`
 function find_beta(diff_norm::T, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V}
   n1 = norm_∇f^2 - mdot∇f
   β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / n1 : β
-  β2 = n2 != 0 ? (1 - θ2) * norm_∇f / diff_norm : β
+  β2 = diff_norm != 0 ? (1 - θ2) * norm_∇f / diff_norm : β
   return min(β, min(β1, β2))
 end
 

From a5c8c7d4d6de4f84805a72d462aea9633734e0dd Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Tue, 12 Mar 2024 16:29:35 -0400
Subject: [PATCH 169/171] remove unecessary if condition, comment out line
 causing possible allocation

---
 src/fomo.jl | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 331c7214..72f68209 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -345,7 +345,7 @@ function SolverCore.solve!(
   βmax = T(0)
   ρk = T(0)
   avgβmax = T(0)
-  siter = 0
+  siter::Int = 0
   oneT = T(1)
   mdot∇f = T(0) # dot(momentum,∇fk)
   while !done
@@ -386,8 +386,6 @@ function SolverCore.solve!(
         βmax = find_beta(diff_norm, mdot∇f, norm_∇fk, β, θ1, θ2)
         d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax
         norm_d = norm(d)
-      end
-      if use_momentum
         avgβmax += βmax
         siter += 1
       end
@@ -432,7 +430,7 @@ function SolverCore.solve!(
   end
   if use_momentum
     avgβmax /= siter
-    stats.solver_specific[:avgβmax] = avgβmax
+    # stats.solver_specific[:avgβmax] = avgβmax
   end
   set_solution!(stats, x)
   return stats

From eee3823ab9cd3ec2bfee29813a922164995d27f5 Mon Sep 17 00:00:00 2001
From: d-monnet <70266099+d-monnet@users.noreply.github.com>
Date: Wed, 13 Mar 2024 11:51:09 -0400
Subject: [PATCH 170/171] Update src/fomo.jl

Co-authored-by: Tangi Migot <tangi.migot@gmail.com>
---
 src/fomo.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fomo.jl b/src/fomo.jl
index 72f68209..d8e206bb 100644
--- a/src/fomo.jl
+++ b/src/fomo.jl
@@ -430,7 +430,7 @@ function SolverCore.solve!(
   end
   if use_momentum
     avgβmax /= siter
-    # stats.solver_specific[:avgβmax] = avgβmax
+    set_solver_specific!(stats, :avgβmax, avgβmax)
   end
   set_solution!(stats, x)
   return stats

From d5f409dc5bc34dd6f52911b7826511f8977dd767 Mon Sep 17 00:00:00 2001
From: d-monnet <monnetdo@gmail.com>
Date: Wed, 13 Mar 2024 11:57:19 -0400
Subject: [PATCH 171/171] fix allocation tests: pre-allocate solver_specific
 field in stats.

---
 test/allocs.jl | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/test/allocs.jl b/test/allocs.jl
index 1fe38f23..f5768c4a 100644
--- a/test/allocs.jl
+++ b/test/allocs.jl
@@ -35,7 +35,12 @@ if Sys.isunix()
         nlp = eval(Meta.parse(model))()
         if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver))
           solver = eval(symsolver)(nlp)
-          stats = GenericExecutionStats(nlp)
+          if symsolver == :FomoSolver
+            T = eltype(nlp.meta.x0)
+            stats = GenericExecutionStats(nlp, solver_specific = Dict(:avgβmax => T(0)))
+          else
+            stats = GenericExecutionStats(nlp)
+          end
           with_logger(NullLogger()) do
             SolverCore.solve!(solver, nlp, stats)
             reset!(solver)