From 544c28839cfdaef3238690b92571f7f9b7e9734c Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 21 Dec 2023 12:04:12 -0500 Subject: [PATCH 001/171] add fomo solver --- docs/src/solvers.md | 4 +- src/JSOSolvers.jl | 1 + src/fomo.jl | 288 +++++++++++++++++++++++++++++++++++++++++++ test/allocs.jl | 2 +- test/callback.jl | 5 + test/consistency.jl | 5 +- test/restart.jl | 2 + test/runtests.jl | 2 +- test/test_solvers.jl | 1 + 9 files changed, 305 insertions(+), 5 deletions(-) create mode 100644 src/fomo.jl diff --git a/docs/src/solvers.md b/docs/src/solvers.md index 06fe0eed..322f7c2e 100644 --- a/docs/src/solvers.md +++ b/docs/src/solvers.md @@ -6,10 +6,11 @@ - [`tron`](@ref) - [`trunk`](@ref) - [`R2`](@ref) +- [`fomo`](@ref) | Problem type | Solvers | | --------------------- | -------- | -| Unconstrained NLP | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref)| +| Unconstrained NLP | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref), [`fomo`](@ref)| | Unconstrained NLS | [`trunk`](@ref), [`tron`](@ref) | | Bound-constrained NLP | [`tron`](@ref) | | Bound-constrained NLS | [`tron`](@ref) | @@ -21,4 +22,5 @@ lbfgs tron trunk R2 +fomo ``` diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl index cd65c9b2..79abace3 100644 --- a/src/JSOSolvers.jl +++ b/src/JSOSolvers.jl @@ -14,6 +14,7 @@ export solve! include("lbfgs.jl") include("trunk.jl") include("R2.jl") +include("fomo.jl") # Unconstrained solvers for NLS include("trunkls.jl") diff --git a/src/fomo.jl b/src/fomo.jl new file mode 100644 index 00000000..711e7fa3 --- /dev/null +++ b/src/fomo.jl @@ -0,0 +1,288 @@ +export fomo, FomoSolver, tr, qr + +abstract type AbstractFomoMethod end + +struct tr <: AbstractFomoMethod end +struct qr <: AbstractFomoMethod end + +""" + fomo(nlp; kwargs...) + +A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods. + +For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = FomoSolver(nlp) + solve!(solver, nlp; kwargs...) + +# Arguments +- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess. +- `atol::T = √eps(T)`: absolute tolerance. +- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. +- `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters. +- `κg = T(0.8)` : maximum contribution of momentum term to the gradient, ||∇f-g||≤κg||g|| with g = (1-β)∇f + β m, with m memory of past gradients. Must satisfy 0 < κg < 1 - η2. +- `γ1 = T(0.8)`, `γ2 = T(1.2)`: regularization update parameters. +- `αmax = 1/eps(T)`: step parameter for fomo algorithm. +- `max_eval::Int = -1`: maximum number of evaluation of the objective function. +- `max_time::Float64 = 30.0`: maximum time limit in seconds. +- `max_iter::Int = typemax(Int)`: maximum number of iterations. +- `β = T(0) ∈ [0,1)` : constant in the momentum term. +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. +- `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.x`: current iterate; +- `solver.gx`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.dual_feas`: norm of current gradient; + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. + +# Examples +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +stats = fomo(nlp) + +# output + +"Execution stats: first-order stationary" +``` + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +solver = FomoSolver(nlp); +stats = solve!(solver, nlp) + +# output + +"Execution stats: first-order stationary" +``` +""" +mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver + x::V + g::V + c::V + m::V +end + +function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} + x = similar(nlp.meta.x0) + g = similar(nlp.meta.x0) + c = similar(nlp.meta.x0) + m = fill!(similar(nlp.meta.x0), 0) + return FomoSolver{T, V}(x, g, c, m) +end + +@doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} + solver = FomoSolver(nlp) + return solve!(solver, nlp; kwargs...) +end + +function SolverCore.reset!(solver::FomoSolver{T}) where {T} + fill!(solver.m,0) + solver +end +SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) + +function SolverCore.solve!( + solver::FomoSolver{T, V}, + nlp::AbstractNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = nlp.meta.x0, + atol::T = √eps(T), + rtol::T = √eps(T), + η1 = eps(T)^(1 / 4), + η2 = T(0.2), + κg = T(0.8), + γ1 = T(0.5), + γ2 = T(2), + αmax = 1/eps(T), + max_time::Float64 = 30.0, + max_eval::Int = -1, + max_iter::Int = typemax(Int), + β::T = T(0.9), + verbose::Int = 0, + backend = qr() +) where {T, V} + unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") + + reset!(stats) + start_time = time() + set_time!(stats, 0.0) + + x = solver.x .= x + ∇fk = solver.g + c = solver.c + m = solver.m + + set_iter!(stats, 0) + set_objective!(stats, obj(nlp, x)) + + grad!(nlp, x, ∇fk) + norm_∇fk = norm(∇fk) + set_dual_residual!(stats, norm_∇fk) + + αk = init_alpha(norm_∇fk,backend) + + # Stopping criterion: + ϵ = atol + rtol * norm_∇fk + optimal = norm_∇fk ≤ ϵ + if optimal + @info("Optimal point found at initial point") + @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk + end + if verbose > 0 && mod(stats.iter, verbose) == 0 + @info @sprintf "%5s %9s %7s %7s %7s" "iter" "f" "‖∇f‖" "α" "staβ" + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN + end + + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + optimal = optimal, + max_eval = max_eval, + iter = stats.iter, + max_iter = max_iter, + max_time = max_time, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + + while !done + λk = step_mult(αk,norm_∇fk,backend) + if β == 0 + c .= x .- λk .* (∇fk) + else + satβ = find_beta(β, κg, m, ∇fk) + c .= x .- λk .* (∇fk .* (T(1) - satβ) .+ m .* satβ) + m .= ∇fk .* (T(1) - β) .+ m .* β + end + ΔTk = norm_∇fk^2 * λk + fck = obj(nlp, c) + if fck == -Inf + set_status!(stats, :unbounded) + break + end + + ρk = (stats.objective - fck) / ΔTk + + # Update regularization parameters + if ρk >= η2 + αk = min(αmax, γ2 * αk) + elseif ρk < η1 + αk = αk * γ1 + end + + # Acceptance of the new candidate + if ρk >= η1 + x .= c + set_objective!(stats, fck) + grad!(nlp, x, ∇fk) + norm_∇fk = norm(∇fk) + end + + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + set_dual_residual!(stats, norm_∇fk) + optimal = norm_∇fk ≤ ϵ + + if verbose > 0 && mod(stats.iter, verbose) == 0 + @info infoline + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk satβ + end + + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + optimal = optimal, + max_eval = max_eval, + iter = stats.iter, + max_iter = max_iter, + max_time = max_time, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + end + + set_solution!(stats, x) + return stats +end + +""" + find_beta(β,κg,d,∇f;tol=0.01) + +Compute satβ which saturates the contibution of the momentum term to the gradient. +Use bisection method to solve satβ * ||∇f .- d|| = κg * ||(1-satβ) .* ∇f + satβ .* d|| where d is the momentum term. +""" +function find_beta(β::T,κg::T,d::V,∇f::V;tol=0.01) where {T,V} + if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0. + return β + end + a = T(0) + b = β + while b-a > tol + β = (b+a) / 2 + if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + b .* d) <= 0 + a = β + else + b = β + end + end + return β +end + +""" + init_alpha(norm_∇fk::T, ::qr) + init_alpha(norm_∇fk::T, ::tr) + +Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. +""" +function init_alpha(norm_∇fk::T, ::qr) where{T} + 1/2^round(log2(norm_∇fk + 1)) +end + +function init_alpha(norm_∇fk::T, ::tr) where{T} + norm_∇fk/2^round(log2(norm_∇fk + 1)) +end + +""" + step_mult(αk::T, norm_∇fk::T, ::qr) + step_mult(αk::T, norm_∇fk::T, ::tr) + +Compute step size multiplier: `αk` for quadratic regularization(`::qr`) and `αk/norm_∇fk` for trust region (`::tr`). +""" +function step_mult(αk::T, norm_∇fk::T, ::qr) where{T} + αk +end + +function step_mult(αk::T, norm_∇fk::T, ::tr) where{T} + αk/norm_∇fk +end \ No newline at end of file diff --git a/test/allocs.jl b/test/allocs.jl index 88a70f5f..5906ef84 100644 --- a/test/allocs.jl +++ b/test/allocs.jl @@ -30,7 +30,7 @@ end if Sys.isunix() @testset "Allocation tests" begin - @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :TrunkSolver, :TronSolver) + @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver) for model in NLPModelsTest.nlp_problems nlp = eval(Meta.parse(model))() if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver)) diff --git a/test/callback.jl b/test/callback.jl index db6177cc..187e0be1 100644 --- a/test/callback.jl +++ b/test/callback.jl @@ -31,6 +31,11 @@ using ADNLPModels, JSOSolvers, LinearAlgebra, Logging #, Plots tron(nlp, callback = cb) end @test stats.iter == 8 + + stats = with_logger(NullLogger()) do + fomo(nlp, callback = cb) + end + @test stats.iter == 8 end @testset "Test callback for NLS" begin diff --git a/test/consistency.jl b/test/consistency.jl index af115661..321f798d 100644 --- a/test/consistency.jl +++ b/test/consistency.jl @@ -10,8 +10,9 @@ function consistency() @testset "Consistency" begin args = Pair{Symbol, Number}[:atol => 1e-6, :rtol => 1e-6, :max_eval => 20000, :max_time => 60.0] - @testset "NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2] + @testset "NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo] with_logger(NullLogger()) do + reset!(unlp) stats = mtd(unlp; args...) @test stats isa GenericExecutionStats @test stats.status == :first_order @@ -27,7 +28,7 @@ function consistency() end end - @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2] + @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo] with_logger(NullLogger()) do stats = mtd(qnlp; args...) @test stats isa GenericExecutionStats diff --git a/test/restart.jl b/test/restart.jl index 02d90902..98f82103 100644 --- a/test/restart.jl +++ b/test/restart.jl @@ -1,5 +1,6 @@ @testset "Test restart with a different initial guess: $fun" for (fun, s) in ( (:R2, :R2Solver), + (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), (:trunk, :TrunkSolver), @@ -44,6 +45,7 @@ end @testset "Test restart with a different problem: $fun" for (fun, s) in ( (:R2, :R2Solver), + (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), (:trunk, :TrunkSolver), diff --git a/test/runtests.jl b/test/runtests.jl index de0295ed..bb41eeba 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,7 +18,7 @@ using JSOSolvers end @testset "Test iteration limit" begin - @testset "$fun" for fun in (R2, lbfgs, tron, trunk) + @testset "$fun" for fun in (R2, fomo, lbfgs, tron, trunk) f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 nlp = ADNLPModel(f, [-1.2; 1.0]) diff --git a/test/test_solvers.jl b/test/test_solvers.jl index cb41e83e..ddad51e8 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -8,6 +8,7 @@ function tests() ("lbfgs", lbfgs), ("tron", tron), ("R2", R2), + ("fomo", fomo), ] unconstrained_nlp(solver) multiprecision_nlp(solver, :unc) From 9aeca32232477fbc9d52b6a95c0c065075438204 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 21 Dec 2023 12:32:03 -0500 Subject: [PATCH 002/171] fix consistency test --- test/consistency.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/consistency.jl b/test/consistency.jl index 321f798d..fb725b5b 100644 --- a/test/consistency.jl +++ b/test/consistency.jl @@ -30,6 +30,7 @@ function consistency() @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo] with_logger(NullLogger()) do + reset!(qnlp) stats = mtd(qnlp; args...) @test stats isa GenericExecutionStats @test stats.status == :first_order From 2a2bbbf270f9314991acc100fed67eafa7f89e73 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 28 Dec 2023 16:54:39 -0500 Subject: [PATCH 003/171] fix update rule, fix find_beta algo --- src/fomo.jl | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 711e7fa3..41e74145 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -151,7 +151,7 @@ function SolverCore.solve!( end if verbose > 0 && mod(stats.iter, verbose) == 0 @info @sprintf "%5s %9s %7s %7s %7s" "iter" "f" "‖∇f‖" "α" "staβ" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk 0 end set_status!( @@ -171,14 +171,13 @@ function SolverCore.solve!( done = stats.status != :unknown + satβ = T(0) while !done λk = step_mult(αk,norm_∇fk,backend) if β == 0 c .= x .- λk .* (∇fk) else - satβ = find_beta(β, κg, m, ∇fk) c .= x .- λk .* (∇fk .* (T(1) - satβ) .+ m .* satβ) - m .= ∇fk .* (T(1) - β) .+ m .* β end ΔTk = norm_∇fk^2 * λk fck = obj(nlp, c) @@ -186,9 +185,9 @@ function SolverCore.solve!( set_status!(stats, :unbounded) break end - + ρk = (stats.objective - fck) / ΔTk - + # Update regularization parameters if ρk >= η2 αk = min(αmax, γ2 * αk) @@ -199,9 +198,15 @@ function SolverCore.solve!( # Acceptance of the new candidate if ρk >= η1 x .= c + if β!=0 + m .= ∇fk .* (T(1) - β) .+ m .* β + end set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) + if β!=0 + satβ = find_beta(β, κg, m, ∇fk) + end end set_iter!(stats, stats.iter + 1) @@ -250,13 +255,13 @@ function find_beta(β::T,κg::T,d::V,∇f::V;tol=0.01) where {T,V} b = β while b-a > tol β = (b+a) / 2 - if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + b .* d) <= 0 + if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0 a = β else b = β end end - return β + return a end """ From 52a4e16f06d3005a6801e0d0208b8c207d480037 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Fri, 5 Jan 2024 13:37:59 -0500 Subject: [PATCH 004/171] modify with gradient related strategy --- src/fomo.jl | 69 +++++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 41e74145..db77937e 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -23,8 +23,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. - `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters. -- `κg = T(0.8)` : maximum contribution of momentum term to the gradient, ||∇f-g||≤κg||g|| with g = (1-β)∇f + β m, with m memory of past gradients. Must satisfy 0 < κg < 1 - η2. -- `γ1 = T(0.8)`, `γ2 = T(1.2)`: regularization update parameters. +- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. - `αmax = 1/eps(T)`: step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. @@ -79,6 +78,7 @@ mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver g::V c::V m::V + d::V end function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} @@ -86,7 +86,8 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} g = similar(nlp.meta.x0) c = similar(nlp.meta.x0) m = fill!(similar(nlp.meta.x0), 0) - return FomoSolver{T, V}(x, g, c, m) + d = fill!(similar(nlp.meta.x0), 0) + return FomoSolver{T, V}(x, g, c, m, d) end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} @@ -109,8 +110,7 @@ function SolverCore.solve!( atol::T = √eps(T), rtol::T = √eps(T), η1 = eps(T)^(1 / 4), - η2 = T(0.2), - κg = T(0.8), + η2 = T(0.95), γ1 = T(0.5), γ2 = T(2), αmax = 1/eps(T), @@ -131,7 +131,7 @@ function SolverCore.solve!( ∇fk = solver.g c = solver.c m = solver.m - + d = solver.d set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) @@ -171,15 +171,23 @@ function SolverCore.solve!( done = stats.status != :unknown + d .= ∇fk + norm_d = norm_∇fk satβ = T(0) + ρk = T(0) while !done - λk = step_mult(αk,norm_∇fk,backend) - if β == 0 - c .= x .- λk .* (∇fk) - else - c .= x .- λk .* (∇fk .* (T(1) - satβ) .+ m .* satβ) - end - ΔTk = norm_∇fk^2 * λk + # if β!=0 + # satβ = find_beta(β, m, ∇fk, norm_∇fk) + # d .= ∇fk .* (T(1) - satβ) .+ m .* satβ + # m .= ∇fk .* (T(1) - β) .+ m .* β + # norm_d = norm(d) + # else + # d .= ∇fk + # norm_d = norm_∇fk + # end + λk = step_mult(αk,norm_d,backend) + c .= x .- λk .* d + ΔTk = norm_∇fk^2 *λk fck = obj(nlp, c) if fck == -Inf set_status!(stats, :unbounded) @@ -187,6 +195,7 @@ function SolverCore.solve!( end ρk = (stats.objective - fck) / ΔTk + # ρk = (1-β) * (stats.objective - fck) / ΔTk +β * ρk # Update regularization parameters if ρk >= η2 @@ -204,9 +213,15 @@ function SolverCore.solve!( set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) - if β!=0 - satβ = find_beta(β, κg, m, ∇fk) + if β!= 0 + satβ = find_beta(β, m, ∇fk, norm_∇fk) + d .= ∇fk .* (T(1) - satβ) .+ m .* satβ + norm_d = norm(d) + else + d .= ∇fk + norm_d = norm_∇fk end + end set_iter!(stats, stats.iter + 1) @@ -216,7 +231,7 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk satβ + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk satβ end set_status!( @@ -242,26 +257,18 @@ function SolverCore.solve!( end """ - find_beta(β,κg,d,∇f;tol=0.01) + find_beta(β,m,∇f,norm_∇f,θ) Compute satβ which saturates the contibution of the momentum term to the gradient. -Use bisection method to solve satβ * ||∇f .- d|| = κg * ||(1-satβ) .* ∇f + satβ .* d|| where d is the momentum term. +satβ is computed such that m.∇f > θ * norm_∇f^2 """ -function find_beta(β::T,κg::T,d::V,∇f::V;tol=0.01) where {T,V} - if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0. +function find_beta(β::T,m::V,∇f::V,norm_∇f::T;θ = T(1e-1)) where {T,V} + dotprod = dot(m,∇f) + if dotprod > θ * norm_∇f^2 return β + else + return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β) end - a = T(0) - b = β - while b-a > tol - β = (b+a) / 2 - if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0 - a = β - else - b = β - end - end - return a end """ From 1423d51e00502ead41adb720be9df759c6bc7bbd Mon Sep 17 00:00:00 2001 From: d-monnet Date: Fri, 5 Jan 2024 15:11:20 -0500 Subject: [PATCH 005/171] fix model decrease computation --- src/fomo.jl | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index db77937e..e2b582e8 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -176,18 +176,9 @@ function SolverCore.solve!( satβ = T(0) ρk = T(0) while !done - # if β!=0 - # satβ = find_beta(β, m, ∇fk, norm_∇fk) - # d .= ∇fk .* (T(1) - satβ) .+ m .* satβ - # m .= ∇fk .* (T(1) - β) .+ m .* β - # norm_d = norm(d) - # else - # d .= ∇fk - # norm_d = norm_∇fk - # end λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d - ΔTk = norm_∇fk^2 *λk + ΔTk = dot(∇fk , d) * λk fck = obj(nlp, c) if fck == -Inf set_status!(stats, :unbounded) From 7f6727beb399c7abccee4ff682cf192f6b23fb5a Mon Sep 17 00:00:00 2001 From: d-monnet Date: Mon, 15 Jan 2024 15:48:56 -0500 Subject: [PATCH 006/171] fix find_beta function --- src/fomo.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index e2b582e8..ed3a1340 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -175,6 +175,7 @@ function SolverCore.solve!( norm_d = norm_∇fk satβ = T(0) ρk = T(0) + #μ = αk while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d @@ -184,10 +185,7 @@ function SolverCore.solve!( set_status!(stats, :unbounded) break end - ρk = (stats.objective - fck) / ΔTk - # ρk = (1-β) * (stats.objective - fck) / ΔTk +β * ρk - # Update regularization parameters if ρk >= η2 αk = min(αmax, γ2 * αk) @@ -199,8 +197,11 @@ function SolverCore.solve!( if ρk >= η1 x .= c if β!=0 + #μ = αk * (T(1) - β) + αk * β + #m .= (αk/μ) .* ∇fk .* (T(1) - β) .+ m .* β m .= ∇fk .* (T(1) - β) .+ m .* β end + #αk = μ set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) @@ -222,7 +223,7 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk satβ + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk satβ end set_status!( @@ -255,10 +256,11 @@ satβ is computed such that m.∇f > θ * norm_∇f^2 """ function find_beta(β::T,m::V,∇f::V,norm_∇f::T;θ = T(1e-1)) where {T,V} dotprod = dot(m,∇f) - if dotprod > θ * norm_∇f^2 + if (1-β)*norm_∇f^2 + β*dotprod > θ * norm_∇f^2 return β else - return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β) + return ((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod) + #return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β) end end From 880759d4fd295d380029a2a5653ecd558d33e578 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 25 Jan 2024 11:53:34 -0500 Subject: [PATCH 007/171] fix null step size issue --- src/fomo.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index ed3a1340..a0a543ee 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -238,7 +238,8 @@ function SolverCore.solve!( max_time = max_time, ), ) - + + αk == 0 && set_status!(stats,:exception) callback(nlp, solver, stats) done = stats.status != :unknown From bd2b8f598ca04e86975ed8898953489f7a2086d5 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 25 Jan 2024 12:33:40 -0500 Subject: [PATCH 008/171] fix test --- test/test_solvers.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_solvers.jl b/test/test_solvers.jl index ddad51e8..ba182731 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -8,7 +8,8 @@ function tests() ("lbfgs", lbfgs), ("tron", tron), ("R2", R2), - ("fomo", fomo), + ("fomo_r2", fomo), + ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,backend = JSOSolvers.tr(); kwargs...)), ] unconstrained_nlp(solver) multiprecision_nlp(solver, :unc) From 3ce7b96a2a3fac2191b5f767473043af698a9c55 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 25 Jan 2024 12:49:29 -0500 Subject: [PATCH 009/171] update docstring --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index a0a543ee..215d2d94 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -28,7 +28,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `β = T(0) ∈ [0,1)` : constant in the momentum term. +- `β = T(0) ∈ [0,1)` : decay rate for the momentum. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region From 91890772c105762b75626998d23d0ad1cf753901 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Mon, 29 Jan 2024 15:14:16 -0500 Subject: [PATCH 010/171] add average sat beta to genericexecutionstat --- src/fomo.jl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 215d2d94..af4f521f 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -92,7 +92,9 @@ end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FomoSolver(nlp) - return solve!(solver, nlp; kwargs...) + solver_specific = Dict(:avgsatβ => T(0.)) + stats = GenericExecutionStats(nlp;solver_specific=solver_specific) + return solve!(solver, nlp, stats; kwargs...) end function SolverCore.reset!(solver::FomoSolver{T}) where {T} @@ -175,6 +177,8 @@ function SolverCore.solve!( norm_d = norm_∇fk satβ = T(0) ρk = T(0) + avgsatβ = T(0.) + siter = 0 #μ = αk while !done λk = step_mult(αk,norm_d,backend) @@ -213,7 +217,8 @@ function SolverCore.solve!( d .= ∇fk norm_d = norm_∇fk end - + avgsatβ += satβ + siter += 1 end set_iter!(stats, stats.iter + 1) @@ -245,6 +250,8 @@ function SolverCore.solve!( done = stats.status != :unknown end + avgsatβ /= siter + stats.solver_specific[:avgsatβ] = avgsatβ set_solution!(stats, x) return stats end From 2b8e3498e3a2ca07268ecd7c59b681d4402cc51c Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 30 Jan 2024 15:14:18 -0500 Subject: [PATCH 011/171] add theta param as key arg --- src/fomo.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index af4f521f..a746483e 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -29,6 +29,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0) ∈ [0,1)` : decay rate for the momentum. +- `θ = T(0.1)` : momentum contribution restriction parameter. [(1-β)∇f(xk) + β mk].[∇f(xk)] ≥ θ||∇f(xk)||², with mk memory of past gradient. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region @@ -120,6 +121,7 @@ function SolverCore.solve!( max_eval::Int = -1, max_iter::Int = typemax(Int), β::T = T(0.9), + θ::T = T(0.1), verbose::Int = 0, backend = qr() ) where {T, V} @@ -210,7 +212,7 @@ function SolverCore.solve!( grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if β!= 0 - satβ = find_beta(β, m, ∇fk, norm_∇fk) + satβ = find_beta(β, m, ∇fk, norm_∇fk, θ) d .= ∇fk .* (T(1) - satβ) .+ m .* satβ norm_d = norm(d) else @@ -262,13 +264,12 @@ end Compute satβ which saturates the contibution of the momentum term to the gradient. satβ is computed such that m.∇f > θ * norm_∇f^2 """ -function find_beta(β::T,m::V,∇f::V,norm_∇f::T;θ = T(1e-1)) where {T,V} +function find_beta(β::T,m::V,∇f::V,norm_∇f::T, θ::T) where {T,V} dotprod = dot(m,∇f) if (1-β)*norm_∇f^2 + β*dotprod > θ * norm_∇f^2 return β else return ((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod) - #return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β) end end From 2ca4813b1a3a9d523213e3b747c8a971444abd24 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 1 Feb 2024 12:25:34 -0500 Subject: [PATCH 012/171] update convergence conditions in find_beta. add satbeta decrease strategy if iteration is unsuccessful. --- src/fomo.jl | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index a746483e..557b1a29 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -24,12 +24,14 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. - `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters. - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. +- `γ3 = T(1/2)` : momentum factor satβ update parameter in case of unsuccessful iteration. - `αmax = 1/eps(T)`: step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `β = T(0) ∈ [0,1)` : decay rate for the momentum. -- `θ = T(0.1)` : momentum contribution restriction parameter. [(1-β)∇f(xk) + β mk].[∇f(xk)] ≥ θ||∇f(xk)||², with mk memory of past gradient. +- `β = T(0) ∈ [0,1)` : target decay rate for the momentum. +- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β]. +- `θ2 = T(1e-5)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region @@ -116,12 +118,14 @@ function SolverCore.solve!( η2 = T(0.95), γ1 = T(0.5), γ2 = T(2), + γ3 = T(1/2), αmax = 1/eps(T), max_time::Float64 = 30.0, max_eval::Int = -1, max_iter::Int = typemax(Int), β::T = T(0.9), - θ::T = T(0.1), + θ1::T = T(1e-5), + θ2::T = T(1e-5), verbose::Int = 0, backend = qr() ) where {T, V} @@ -181,6 +185,7 @@ function SolverCore.solve!( ρk = T(0) avgsatβ = T(0.) siter = 0 + #μ = αk while !done λk = step_mult(αk,norm_d,backend) @@ -197,6 +202,8 @@ function SolverCore.solve!( αk = min(αmax, γ2 * αk) elseif ρk < η1 αk = αk * γ1 + satβ *= γ3 + d .= ∇fk .* (T(1) - satβ) .+ m .* satβ end # Acceptance of the new candidate @@ -212,7 +219,7 @@ function SolverCore.solve!( grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if β!= 0 - satβ = find_beta(β, m, ∇fk, norm_∇fk, θ) + satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (T(1) - satβ) .+ m .* satβ norm_d = norm(d) else @@ -259,18 +266,16 @@ function SolverCore.solve!( end """ - find_beta(β,m,∇f,norm_∇f,θ) +find_beta(m, ∇f, norm_∇f, β, θ1, θ2) Compute satβ which saturates the contibution of the momentum term to the gradient. satβ is computed such that m.∇f > θ * norm_∇f^2 """ -function find_beta(β::T,m::V,∇f::V,norm_∇f::T, θ::T) where {T,V} +function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} dotprod = dot(m,∇f) - if (1-β)*norm_∇f^2 + β*dotprod > θ * norm_∇f^2 - return β - else - return ((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod) - end + β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β + β2 = (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f)) + return min(β,min(β1,β2)) end """ From 9d7bac2e2c9037614e757eaaffb7e321052d91ac Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 1 Feb 2024 12:57:44 -0500 Subject: [PATCH 013/171] fix possible 0 division in find_beta --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 557b1a29..ff24afb9 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -274,7 +274,7 @@ satβ is computed such that m.∇f > θ * norm_∇f^2 function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} dotprod = dot(m,∇f) β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β - β2 = (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f)) + β2 = m != ∇f ? (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f)) : β return min(β,min(β1,β2)) end From b9a3aef83905d484febfdf8ba39514dda52ecc9a Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 3 Feb 2024 00:16:09 -0500 Subject: [PATCH 014/171] Misc improvments: - update docstrings - rename qr -> r2 - remove dead code --- src/fomo.jl | 62 ++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index ff24afb9..6a7628a3 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,9 +1,9 @@ -export fomo, FomoSolver, tr, qr +export fomo, FomoSolver, tr, r2 abstract type AbstractFomoMethod end struct tr <: AbstractFomoMethod end -struct qr <: AbstractFomoMethod end +struct r2 <: AbstractFomoMethod end """ fomo(nlp; kwargs...) @@ -22,18 +22,18 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `x::V = nlp.meta.x0`: the initial guess. - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. -- `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters. +- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. - `γ3 = T(1/2)` : momentum factor satβ update parameter in case of unsuccessful iteration. - `αmax = 1/eps(T)`: step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `β = T(0) ∈ [0,1)` : target decay rate for the momentum. +- `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β]. -- `θ2 = T(1e-5)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. +- `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. -- `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region +- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -114,20 +114,20 @@ function SolverCore.solve!( x::V = nlp.meta.x0, atol::T = √eps(T), rtol::T = √eps(T), - η1 = eps(T)^(1 / 4), - η2 = T(0.95), - γ1 = T(0.5), - γ2 = T(2), - γ3 = T(1/2), - αmax = 1/eps(T), + η1::T = T(eps(T)^(1 / 4)), + η2::T = T(0.95), + γ1::T = T(1/2), + γ2::T = T(2), + γ3::T = T(1/2), + αmax::T = 1/eps(T), max_time::Float64 = 30.0, max_eval::Int = -1, max_iter::Int = typemax(Int), β::T = T(0.9), - θ1::T = T(1e-5), - θ2::T = T(1e-5), + θ1::T = T(0.1), + θ2::T = T(eps(T)^(1/3)), verbose::Int = 0, - backend = qr() + backend = r2() ) where {T, V} unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") @@ -183,10 +183,9 @@ function SolverCore.solve!( norm_d = norm_∇fk satβ = T(0) ρk = T(0) - avgsatβ = T(0.) + avgsatβ = T(0) siter = 0 - - #μ = αk + oneT = T(1) while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d @@ -203,24 +202,21 @@ function SolverCore.solve!( elseif ρk < η1 αk = αk * γ1 satβ *= γ3 - d .= ∇fk .* (T(1) - satβ) .+ m .* satβ + d .= ∇fk .* (oneT - satβ) .+ m .* satβ end # Acceptance of the new candidate if ρk >= η1 x .= c if β!=0 - #μ = αk * (T(1) - β) + αk * β - #m .= (αk/μ) .* ∇fk .* (T(1) - β) .+ m .* β - m .= ∇fk .* (T(1) - β) .+ m .* β + m .= ∇fk .* (oneT - β) .+ m .* β end - #αk = μ set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if β!= 0 satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2) - d .= ∇fk .* (T(1) - satβ) .+ m .* satβ + d .= ∇fk .* (oneT - satβ) .+ m .* satβ norm_d = norm(d) else d .= ∇fk @@ -269,22 +265,26 @@ end find_beta(m, ∇f, norm_∇f, β, θ1, θ2) Compute satβ which saturates the contibution of the momentum term to the gradient. -satβ is computed such that m.∇f > θ * norm_∇f^2 +`satβ` is computed such that the two gradient-related conditions are ensured: +1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||² +2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)|| +with `m` memory of past gradient/ """ function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} dotprod = dot(m,∇f) + diffnorm = norm(m .- ∇f) β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β - β2 = m != ∇f ? (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f)) : β + β2 = diffnorm != 0 ? (1-θ2)*norm_∇f/(θ2*diffnorm) : β return min(β,min(β1,β2)) end """ - init_alpha(norm_∇fk::T, ::qr) + init_alpha(norm_∇fk::T, ::r2) init_alpha(norm_∇fk::T, ::tr) Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. """ -function init_alpha(norm_∇fk::T, ::qr) where{T} +function init_alpha(norm_∇fk::T, ::r2) where{T} 1/2^round(log2(norm_∇fk + 1)) end @@ -293,12 +293,12 @@ function init_alpha(norm_∇fk::T, ::tr) where{T} end """ - step_mult(αk::T, norm_∇fk::T, ::qr) + step_mult(αk::T, norm_∇fk::T, ::r2) step_mult(αk::T, norm_∇fk::T, ::tr) -Compute step size multiplier: `αk` for quadratic regularization(`::qr`) and `αk/norm_∇fk` for trust region (`::tr`). +Compute step size multiplier: `αk` for quadratic regularization(`::r2`) and `αk/norm_∇fk` for trust region (`::tr`). """ -function step_mult(αk::T, norm_∇fk::T, ::qr) where{T} +function step_mult(αk::T, norm_∇fk::T, ::r2) where{T} αk end From 9acd09bba9f343f39c3e9388dff13081547e7087 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 3 Feb 2024 14:54:29 -0500 Subject: [PATCH 015/171] fix null denominator in find_beta --- src/fomo.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 6a7628a3..c983205b 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -272,9 +272,10 @@ with `m` memory of past gradient/ """ function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} dotprod = dot(m,∇f) - diffnorm = norm(m .- ∇f) - β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β - β2 = diffnorm != 0 ? (1-θ2)*norm_∇f/(θ2*diffnorm) : β + n1 = norm_∇f^2 - dotprod + n2 = norm(m .- ∇f) + β1 = n1 > 0 ? (1-θ1)*norm_∇f^2/(n1) : β + β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β return min(β,min(β1,β2)) end From ef258a8e8382d93b245d6ded4942198fe1b3b24a Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 4 Feb 2024 02:41:30 -0500 Subject: [PATCH 016/171] Make R2 and R2Solver interfaces to fomo solver. Delete obsolete R2.jl file. --- src/JSOSolvers.jl | 1 - src/R2.jl | 231 ---------------------------------------------- src/fomo.jl | 150 ++++++++++++++++++++++++++---- test/allocs.jl | 2 +- test/callback.jl | 26 +++--- test/restart.jl | 4 +- 6 files changed, 147 insertions(+), 267 deletions(-) delete mode 100644 src/R2.jl diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl index 79abace3..85afc4fe 100644 --- a/src/JSOSolvers.jl +++ b/src/JSOSolvers.jl @@ -13,7 +13,6 @@ export solve! # Unconstrained solvers include("lbfgs.jl") include("trunk.jl") -include("R2.jl") include("fomo.jl") # Unconstrained solvers for NLS diff --git a/src/R2.jl b/src/R2.jl deleted file mode 100644 index 79b7d7c0..00000000 --- a/src/R2.jl +++ /dev/null @@ -1,231 +0,0 @@ -export R2, R2Solver - -""" - R2(nlp; kwargs...) - -A first-order quadratic regularization method for unconstrained optimization. - -For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`: - - solver = R2Solver(nlp) - solve!(solver, nlp; kwargs...) - -# Arguments -- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. - -# Keyword arguments -- `x::V = nlp.meta.x0`: the initial guess. -- `atol::T = √eps(T)`: absolute tolerance. -- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. -- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. -- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters. -- `σmin = eps(T)`: step parameter for R2 algorithm. -- `max_eval::Int = -1`: maximum number of evaluation of the objective function. -- `max_time::Float64 = 30.0`: maximum time limit in seconds. -- `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `β = T(0) ∈ [0,1]` is the constant in the momentum term. If `β == 0`, R2 does not use momentum. -- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - -# Output -The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. - -# Callback -The callback is called at each iteration. -The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. -Changing any of the input arguments will affect the subsequent iterations. -In particular, setting `stats.status = :user` will stop the algorithm. -All relevant information should be available in `nlp` and `solver`. -Notably, you can access, and modify, the following: -- `solver.x`: current iterate; -- `solver.gx`: current gradient; -- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: - - `stats.dual_feas`: norm of current gradient; - - `stats.iter`: current iteration counter; - - `stats.objective`: current objective function value; - - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. - - `stats.elapsed_time`: elapsed time in seconds. - -# Examples -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -stats = R2(nlp) - -# output - -"Execution stats: first-order stationary" -``` - -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -solver = R2Solver(nlp); -stats = solve!(solver, nlp) - -# output - -"Execution stats: first-order stationary" -``` -""" -mutable struct R2Solver{T, V} <: AbstractOptimizationSolver - x::V - gx::V - cx::V - d::V # used for momentum term - σ::T -end - -function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} - x = similar(nlp.meta.x0) - gx = similar(nlp.meta.x0) - cx = similar(nlp.meta.x0) - d = fill!(similar(nlp.meta.x0), 0) - σ = zero(T) # init it to zero for now - return R2Solver{T, V}(x, gx, cx, d, σ) -end - -@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} - solver = R2Solver(nlp) - return solve!(solver, nlp; kwargs...) -end - -function SolverCore.reset!(solver::R2Solver{T}) where {T} - solver.d .= zero(T) - solver -end -SolverCore.reset!(solver::R2Solver, ::AbstractNLPModel) = reset!(solver) - -function SolverCore.solve!( - solver::R2Solver{T, V}, - nlp::AbstractNLPModel{T, V}, - stats::GenericExecutionStats{T, V}; - callback = (args...) -> nothing, - x::V = nlp.meta.x0, - atol::T = √eps(T), - rtol::T = √eps(T), - η1 = eps(T)^(1 / 4), - η2 = T(0.95), - γ1 = T(1 / 2), - γ2 = 1 / γ1, - σmin = zero(T), - max_time::Float64 = 30.0, - max_eval::Int = -1, - max_iter::Int = typemax(Int), - β::T = T(0), - verbose::Int = 0, -) where {T, V} - unconstrained(nlp) || error("R2 should only be called on unconstrained problems.") - - reset!(stats) - start_time = time() - set_time!(stats, 0.0) - - x = solver.x .= x - ∇fk = solver.gx - ck = solver.cx - d = solver.d - σk = solver.σ - - set_iter!(stats, 0) - set_objective!(stats, obj(nlp, x)) - - grad!(nlp, x, ∇fk) - norm_∇fk = norm(∇fk) - set_dual_residual!(stats, norm_∇fk) - - σk = 2^round(log2(norm_∇fk + 1)) - # Stopping criterion: - ϵ = atol + rtol * norm_∇fk - optimal = norm_∇fk ≤ ϵ - if optimal - @info("Optimal point found at initial point") - @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "σ" - @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk σk - end - if verbose > 0 && mod(stats.iter, verbose) == 0 - @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "σ" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk σk - end - - set_status!( - stats, - get_status( - nlp, - elapsed_time = stats.elapsed_time, - optimal = optimal, - max_eval = max_eval, - iter = stats.iter, - max_iter = max_iter, - max_time = max_time, - ), - ) - - solver.σ = σk - callback(nlp, solver, stats) - σk = solver.σ - - done = stats.status != :unknown - - while !done - if β == 0 - ck .= x .- (∇fk ./ σk) - else - d .= ∇fk .* (T(1) - β) .+ d .* β - ck .= x .- (d ./ σk) - end - ΔTk = norm_∇fk^2 / σk - fck = obj(nlp, ck) - if fck == -Inf - set_status!(stats, :unbounded) - break - end - - ρk = (stats.objective - fck) / ΔTk - - # Update regularization parameters - if ρk >= η2 - σk = max(σmin, γ1 * σk) - elseif ρk < η1 - σk = σk * γ2 - end - - # Acceptance of the new candidate - if ρk >= η1 - x .= ck - set_objective!(stats, fck) - grad!(nlp, x, ∇fk) - norm_∇fk = norm(∇fk) - end - - set_iter!(stats, stats.iter + 1) - set_time!(stats, time() - start_time) - set_dual_residual!(stats, norm_∇fk) - optimal = norm_∇fk ≤ ϵ - - if verbose > 0 && mod(stats.iter, verbose) == 0 - @info infoline - infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk σk - end - - set_status!( - stats, - get_status( - nlp, - elapsed_time = stats.elapsed_time, - optimal = optimal, - max_eval = max_eval, - iter = stats.iter, - max_iter = max_iter, - max_time = max_time, - ), - ) - solver.σ = σk - callback(nlp, solver, stats) - σk = solver.σ - - done = stats.status != :unknown - end - - set_solution!(stats, x) - return stats -end diff --git a/src/fomo.jl b/src/fomo.jl index c983205b..78e151d5 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,9 +1,10 @@ -export fomo, FomoSolver, tr, r2 +export fomo, FomoSolver, tr, r2, R2 abstract type AbstractFomoMethod end -struct tr <: AbstractFomoMethod end -struct r2 <: AbstractFomoMethod end +struct tr <: AbstractFomoMethod end +struct r2 <: AbstractFomoMethod end +struct R2og <: AbstractFomoMethod end """ fomo(nlp; kwargs...) @@ -100,10 +101,97 @@ end return solve!(solver, nlp, stats; kwargs...) end +""" + R2(nlp; kwargs...) + +A first-order quadratic regularization method for unconstrained optimization. + +For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = R2Solver(nlp) + solve!(solver, nlp; kwargs...) +Important: `R2` and `R2Solver` are only interfaces to `FomoSolver`, a first order solver that includes momentum strategy. The momentum strategy is ignore with `R2`. + +# Arguments +- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess. +- `atol::T = √eps(T)`: absolute tolerance. +- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. +- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. +- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters. +- `σmin = eps(T)`: step parameter for R2 algorithm. +- `max_eval::Int = -1`: maximum number of evaluation of the objective function. +- `max_time::Float64 = 30.0`: maximum time limit in seconds. +- `max_iter::Int = typemax(Int)`: maximum number of iterations. +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.x`: current iterate; +- `solver.gx`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.dual_feas`: norm of current gradient; + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. + +# Examples +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +stats = R2(nlp) + +# output + +"Execution stats: first-order stationary" +``` + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +solver = R2Solver(nlp); +stats = solve!(solver, nlp) + +# output + +"Execution stats: first-order stationary" +``` +""" +function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} + x = similar(nlp.meta.x0) + g = similar(nlp.meta.x0) + c = similar(nlp.meta.x0) + m = Vector{T}() + d = g # similar without momentum + return FomoSolver{T, V}(x, g, c, m, d) +end + +@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} + solver = R2Solver(nlp) + stats = GenericExecutionStats(nlp) + if haskey(kwargs,:σmax) + return solve!(solver, nlp, stats; β = T(0), backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...) + else + return solve!(solver, nlp, stats; β = T(0), backend = R2og(), kwargs...) + end +end + function SolverCore.reset!(solver::FomoSolver{T}) where {T} fill!(solver.m,0) solver end + SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) function SolverCore.solve!( @@ -127,10 +215,13 @@ function SolverCore.solve!( θ1::T = T(0.1), θ2::T = T(eps(T)^(1/3)), verbose::Int = 0, - backend = r2() + backend = r2(), + σmin = nothing # keep consistency with R2 interface. kwargs immutable, can't delete it in `R2` ) where {T, V} - unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") - + r2mode = (backend == R2og()) + mthname = r2mode ? "R2" : "fomo" + unconstrained(nlp) || error("$mthname should only be called on unconstrained problems.") + reset!(stats) start_time = time() set_time!(stats, 0.0) @@ -154,12 +245,24 @@ function SolverCore.solve!( optimal = norm_∇fk ≤ ϵ if optimal @info("Optimal point found at initial point") - @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" - @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk + if r2mode + @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "σ" + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk + else + @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk + end + end if verbose > 0 && mod(stats.iter, verbose) == 0 - @info @sprintf "%5s %9s %7s %7s %7s" "iter" "f" "‖∇f‖" "α" "staβ" + if r2mode + @info @sprintf "%5s %9s %7s %7s" "iter" "f" "‖∇f‖" "σ" + infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk + else + @info @sprintf "%5s %9s %7s %7s %7s" "iter" "f" "‖∇f‖" "α" "staβ" infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk 0 + end + end set_status!( @@ -201,8 +304,10 @@ function SolverCore.solve!( αk = min(αmax, γ2 * αk) elseif ρk < η1 αk = αk * γ1 - satβ *= γ3 - d .= ∇fk .* (oneT - satβ) .+ m .* satβ + if !r2mode + satβ *= γ3 + (d .= ∇fk .* (oneT - satβ) .+ m .* satβ) + end end # Acceptance of the new candidate @@ -222,8 +327,10 @@ function SolverCore.solve!( d .= ∇fk norm_d = norm_∇fk end - avgsatβ += satβ - siter += 1 + if !r2mode + (avgsatβ += satβ) + (siter += 1) + end end set_iter!(stats, stats.iter + 1) @@ -233,7 +340,11 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk satβ + if r2mode + infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk + else + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk satβ + end end set_status!( @@ -254,9 +365,10 @@ function SolverCore.solve!( done = stats.status != :unknown end - - avgsatβ /= siter - stats.solver_specific[:avgsatβ] = avgsatβ + if !r2mode + avgsatβ /= siter + stats.solver_specific[:avgsatβ] = avgsatβ + end set_solution!(stats, x) return stats end @@ -285,7 +397,7 @@ end Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. """ -function init_alpha(norm_∇fk::T, ::r2) where{T} +function init_alpha(norm_∇fk::T, ::Union{r2,R2og}) where{T} 1/2^round(log2(norm_∇fk + 1)) end @@ -299,7 +411,7 @@ end Compute step size multiplier: `αk` for quadratic regularization(`::r2`) and `αk/norm_∇fk` for trust region (`::tr`). """ -function step_mult(αk::T, norm_∇fk::T, ::r2) where{T} +function step_mult(αk::T, norm_∇fk::T, ::Union{r2,R2og}) where{T} αk end diff --git a/test/allocs.jl b/test/allocs.jl index 5906ef84..42d266ab 100644 --- a/test/allocs.jl +++ b/test/allocs.jl @@ -30,7 +30,7 @@ end if Sys.isunix() @testset "Allocation tests" begin - @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver) + @testset "$symsolver" for symsolver in (:LBFGSSolver, :FomoSolver, :TrunkSolver, :TronSolver) for model in NLPModelsTest.nlp_problems nlp = eval(Meta.parse(model))() if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver)) diff --git a/test/callback.jl b/test/callback.jl index 187e0be1..2511ee6b 100644 --- a/test/callback.jl +++ b/test/callback.jl @@ -58,16 +58,16 @@ end @test stats.iter == 8 end -@testset "Testing Solver Values" begin - f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 - nlp = ADNLPModel(f, [-1.2; 1.0]) - function cb(nlp, solver, stats) - if stats.iter == 4 - @test solver.σ > 0.0 - stats.status = :user - end - end - stats = with_logger(NullLogger()) do - R2(nlp, callback = cb) - end -end +# @testset "Testing Solver Values" begin +# f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 +# nlp = ADNLPModel(f, [-1.2; 1.0]) +# function cb(nlp, solver, stats) +# if stats.iter == 4 +# @test solver.σ > 0.0 +# stats.status = :user +# end +# end +# stats = with_logger(NullLogger()) do +# R2(nlp, callback = cb) +# end +# end diff --git a/test/restart.jl b/test/restart.jl index 98f82103..eb770739 100644 --- a/test/restart.jl +++ b/test/restart.jl @@ -1,5 +1,5 @@ @testset "Test restart with a different initial guess: $fun" for (fun, s) in ( - (:R2, :R2Solver), + (:R2, :FomoSolver), (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), @@ -44,7 +44,7 @@ end end @testset "Test restart with a different problem: $fun" for (fun, s) in ( - (:R2, :R2Solver), + (:R2, :FomoSolver), (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), From 85022e16a22b83c98abead21b23cc99b2e7fb959 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 4 Feb 2024 10:40:13 -0500 Subject: [PATCH 017/171] Rxport R2Solver (fix doc build issue) and backend for R2 classic --- src/fomo.jl | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 78e151d5..fcd60978 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,4 +1,4 @@ -export fomo, FomoSolver, tr, r2, R2 +export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og abstract type AbstractFomoMethod end @@ -34,7 +34,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β]. - `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. -- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region +- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization ( no momentum, optimized for β = 0). # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -161,7 +161,7 @@ stats = R2(nlp) using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) solver = R2Solver(nlp); -stats = solve!(solver, nlp) +stats = solve!(solver, nlp, backend = R2og()) # output @@ -181,9 +181,9 @@ end solver = R2Solver(nlp) stats = GenericExecutionStats(nlp) if haskey(kwargs,:σmax) - return solve!(solver, nlp, stats; β = T(0), backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...) + return solve!(solver, nlp, stats; backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...) else - return solve!(solver, nlp, stats; β = T(0), backend = R2og(), kwargs...) + return solve!(solver, nlp, stats; backend = R2og(), kwargs...) end end @@ -292,7 +292,11 @@ function SolverCore.solve!( while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d - ΔTk = dot(∇fk , d) * λk + if r2mode + ΔTk = norm_∇fk^2 * λk + else + ΔTk = dot(∇fk , d) * λk + end fck = obj(nlp, c) if fck == -Inf set_status!(stats, :unbounded) @@ -306,20 +310,20 @@ function SolverCore.solve!( αk = αk * γ1 if !r2mode satβ *= γ3 - (d .= ∇fk .* (oneT - satβ) .+ m .* satβ) + d .= ∇fk .* (oneT - satβ) .+ m .* satβ end end # Acceptance of the new candidate if ρk >= η1 x .= c - if β!=0 + if !r2mode m .= ∇fk .* (oneT - β) .+ m .* β end set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) - if β!= 0 + if !r2mode satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - satβ) .+ m .* satβ norm_d = norm(d) @@ -328,8 +332,8 @@ function SolverCore.solve!( norm_d = norm_∇fk end if !r2mode - (avgsatβ += satβ) - (siter += 1) + avgsatβ += satβ + siter += 1 end end From 07e79b16ffa625991cd95bacfe0ed3563792bca7 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 4 Feb 2024 11:09:20 -0500 Subject: [PATCH 018/171] add :smallstep exception (step addition underflow) --- src/fomo.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index fcd60978..49b535c6 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -292,6 +292,7 @@ function SolverCore.solve!( while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d + x == c && set_status!(stats,:smallstep) # step addition underfow on every dimensions, should happen before αk == 0 if r2mode ΔTk = norm_∇fk^2 * λk else @@ -364,7 +365,8 @@ function SolverCore.solve!( ), ) - αk == 0 && set_status!(stats,:exception) + αk == 0 && set_status!(stats,:exception) # :smallstep exception should happen before + callback(nlp, solver, stats) done = stats.status != :unknown From f2e8e678993e992a7f14d87702ef2c902a836e8c Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 4 Feb 2024 11:17:28 -0500 Subject: [PATCH 019/171] fix small_step exception --- src/fomo.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 49b535c6..95655ea4 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -292,7 +292,8 @@ function SolverCore.solve!( while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d - x == c && set_status!(stats,:smallstep) # step addition underfow on every dimensions, should happen before αk == 0 + uf = x == c # step addition underfow on every dimensions, should happen before αk == 0 + @show stats.status if r2mode ΔTk = norm_∇fk^2 * λk else @@ -365,11 +366,13 @@ function SolverCore.solve!( ), ) - αk == 0 && set_status!(stats,:exception) # :smallstep exception should happen before - callback(nlp, solver, stats) + uf && set_status!(stats,:small_step) + αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before + done = stats.status != :unknown + @show stats.status αk end if !r2mode avgsatβ /= siter From ad68ceb6b5edc7b6dde7e73e3b8b27a258be5620 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Mon, 5 Feb 2024 11:20:03 -0500 Subject: [PATCH 020/171] remove terminal ouput --- src/fomo.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 95655ea4..497bdfb3 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -293,7 +293,6 @@ function SolverCore.solve!( λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d uf = x == c # step addition underfow on every dimensions, should happen before αk == 0 - @show stats.status if r2mode ΔTk = norm_∇fk^2 * λk else @@ -372,7 +371,6 @@ function SolverCore.solve!( αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before done = stats.status != :unknown - @show stats.status αk end if !r2mode avgsatβ /= siter From 1875d2c831ffc92b6b218145b83c55f8aa75df22 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Mon, 5 Feb 2024 15:27:13 -0500 Subject: [PATCH 021/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 497bdfb3..96c92b4b 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -34,7 +34,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β]. - `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. -- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization ( no momentum, optimized for β = 0). +- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0). # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. From 39d4e4fdda5445bd778147136b76b63d62c43da2 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Mon, 5 Feb 2024 15:35:49 -0500 Subject: [PATCH 022/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 96c92b4b..02f53395 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -389,7 +389,7 @@ Compute satβ which saturates the contibution of the momentum term to the gradie 2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)|| with `m` memory of past gradient/ """ -function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} +function find_beta(m::V, ∇f::V, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} dotprod = dot(m,∇f) n1 = norm_∇f^2 - dotprod n2 = norm(m .- ∇f) From 2f72dad0a45494d4c78aaf05ea2b017ba238fdff Mon Sep 17 00:00:00 2001 From: d-monnet Date: Mon, 5 Feb 2024 16:24:01 -0500 Subject: [PATCH 023/171] update docstring, add rhok to the output --- src/fomo.jl | 134 +++++++++++++++------------------------------------- 1 file changed, 37 insertions(+), 97 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 497bdfb3..30fc236f 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -8,6 +8,7 @@ struct R2og <: AbstractFomoMethod end """ fomo(nlp; kwargs...) + R2(nlp; kwargs...) A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods. @@ -16,6 +17,12 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i solver = FomoSolver(nlp) solve!(solver, nlp; kwargs...) +*Quadratic Regularization (R2)*: if the user do not want to use momentum (β = 0), it is recommended to use the memory-optimized `R2` method. +For advanced usage: + + solver = R2Solver(nlp) + solve!(solver, nlp; kwargs...) + # Arguments - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. @@ -25,16 +32,16 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. -- `γ3 = T(1/2)` : momentum factor satβ update parameter in case of unsuccessful iteration. +- `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. - `αmax = 1/eps(T)`: step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. -- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β]. -- `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. +- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ||∇f(xk)||², with m memory of past gradient and βmax ∈ [0,β]. +- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))||, with m memory of past gradient and βmax ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. -- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization ( no momentum, optimized for β = 0). +- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0). # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -96,78 +103,11 @@ end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FomoSolver(nlp) - solver_specific = Dict(:avgsatβ => T(0.)) + solver_specific = Dict(:avgβmax => T(0.)) stats = GenericExecutionStats(nlp;solver_specific=solver_specific) return solve!(solver, nlp, stats; kwargs...) end -""" - R2(nlp; kwargs...) - -A first-order quadratic regularization method for unconstrained optimization. - -For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`: - - solver = R2Solver(nlp) - solve!(solver, nlp; kwargs...) -Important: `R2` and `R2Solver` are only interfaces to `FomoSolver`, a first order solver that includes momentum strategy. The momentum strategy is ignore with `R2`. - -# Arguments -- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. - -# Keyword arguments -- `x::V = nlp.meta.x0`: the initial guess. -- `atol::T = √eps(T)`: absolute tolerance. -- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. -- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. -- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters. -- `σmin = eps(T)`: step parameter for R2 algorithm. -- `max_eval::Int = -1`: maximum number of evaluation of the objective function. -- `max_time::Float64 = 30.0`: maximum time limit in seconds. -- `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - -# Output -The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. - -# Callback -The callback is called at each iteration. -The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. -Changing any of the input arguments will affect the subsequent iterations. -In particular, setting `stats.status = :user` will stop the algorithm. -All relevant information should be available in `nlp` and `solver`. -Notably, you can access, and modify, the following: -- `solver.x`: current iterate; -- `solver.gx`: current gradient; -- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: - - `stats.dual_feas`: norm of current gradient; - - `stats.iter`: current iteration counter; - - `stats.objective`: current objective function value; - - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. - - `stats.elapsed_time`: elapsed time in seconds. - -# Examples -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -stats = R2(nlp) - -# output - -"Execution stats: first-order stationary" -``` - -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -solver = R2Solver(nlp); -stats = solve!(solver, nlp, backend = R2og()) - -# output - -"Execution stats: first-order stationary" -``` -""" function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} x = similar(nlp.meta.x0) g = similar(nlp.meta.x0) @@ -177,7 +117,7 @@ function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} return FomoSolver{T, V}(x, g, c, m, d) end -@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} +@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = R2Solver(nlp) stats = GenericExecutionStats(nlp) if haskey(kwargs,:σmax) @@ -256,11 +196,11 @@ function SolverCore.solve!( end if verbose > 0 && mod(stats.iter, verbose) == 0 if r2mode - @info @sprintf "%5s %9s %7s %7s" "iter" "f" "‖∇f‖" "σ" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk + @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" "σ" "ρk" + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN else - @info @sprintf "%5s %9s %7s %7s %7s" "iter" "f" "‖∇f‖" "α" "staβ" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk 0 + @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0 end end @@ -284,15 +224,15 @@ function SolverCore.solve!( d .= ∇fk norm_d = norm_∇fk - satβ = T(0) + βmax = T(0) ρk = T(0) - avgsatβ = T(0) + avgβmax = T(0) siter = 0 oneT = T(1) while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d - uf = x == c # step addition underfow on every dimensions, should happen before αk == 0 + step_underflow = x == c # step addition underfow on every dimensions, should happen before αk == 0 if r2mode ΔTk = norm_∇fk^2 * λk else @@ -310,8 +250,8 @@ function SolverCore.solve!( elseif ρk < η1 αk = αk * γ1 if !r2mode - satβ *= γ3 - d .= ∇fk .* (oneT - satβ) .+ m .* satβ + βmax *= γ3 + d .= ∇fk .* (oneT - βmax) .+ m .* βmax end end @@ -325,15 +265,15 @@ function SolverCore.solve!( grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if !r2mode - satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2) - d .= ∇fk .* (oneT - satβ) .+ m .* satβ + βmax = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2) + d .= ∇fk .* (oneT - βmax) .+ m .* βmax norm_d = norm(d) else d .= ∇fk norm_d = norm_∇fk end if !r2mode - avgsatβ += satβ + avgβmax += βmax siter += 1 end end @@ -346,9 +286,9 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline if r2mode - infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk ρk else - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk satβ + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk ρk βmax end end @@ -367,14 +307,14 @@ function SolverCore.solve!( callback(nlp, solver, stats) - uf && set_status!(stats,:small_step) - αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before + step_underflow && set_status!(stats,:small_step) + αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before done = stats.status != :unknown end if !r2mode - avgsatβ /= siter - stats.solver_specific[:avgsatβ] = avgsatβ + avgβmax /= siter + stats.solver_specific[:avgβmax] = avgβmax end set_solution!(stats, x) return stats @@ -383,11 +323,11 @@ end """ find_beta(m, ∇f, norm_∇f, β, θ1, θ2) -Compute satβ which saturates the contibution of the momentum term to the gradient. -`satβ` is computed such that the two gradient-related conditions are ensured: -1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||² -2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)|| -with `m` memory of past gradient/ +Compute βmax which saturates the contibution of the momentum term to the gradient. +`βmax` is computed such that the two gradient-related conditions are ensured: +1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||² +2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m.∇f(xk))|| +with `m` memory of past gradient """ function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} dotprod = dot(m,∇f) @@ -395,7 +335,7 @@ function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} n2 = norm(m .- ∇f) β1 = n1 > 0 ? (1-θ1)*norm_∇f^2/(n1) : β β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β - return min(β,min(β1,β2)) + return min(β,min(β1,β2)) end """ From d22d162dbccf026336e70e1c5c572b1784c3dca4 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 6 Feb 2024 12:22:54 -0500 Subject: [PATCH 024/171] - create variable for dot(m,nabla f): avoid computation of dot(d, nabla f) in model decrease, is used in find_beta (interface updated) - update docstrings --- src/fomo.jl | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index cf4697ae..83fe6648 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -21,7 +21,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i For advanced usage: solver = R2Solver(nlp) - solve!(solver, nlp; kwargs...) + solve!(solver, nlp; backend = R2og(), kwargs...) # Arguments - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. @@ -39,10 +39,12 @@ For advanced usage: - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ||∇f(xk)||², with m memory of past gradient and βmax ∈ [0,β]. -- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))||, with m memory of past gradient and βmax ∈ [0,β]. +- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||, with m memory of past gradient and βmax ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0). +*Warning:* `R2og()` backend should be used only for advanced usage as described above. + # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -174,6 +176,7 @@ function SolverCore.solve!( set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) + grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) @@ -229,15 +232,12 @@ function SolverCore.solve!( avgβmax = T(0) siter = 0 oneT = T(1) + mdot∇f = T(0) # dot(m,∇fk) while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d step_underflow = x == c # step addition underfow on every dimensions, should happen before αk == 0 - if r2mode - ΔTk = norm_∇fk^2 * λk - else - ΔTk = dot(∇fk , d) * λk - end + ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk fck = obj(nlp, c) if fck == -Inf set_status!(stats, :unbounded) @@ -260,17 +260,15 @@ function SolverCore.solve!( x .= c if !r2mode m .= ∇fk .* (oneT - β) .+ m .* β + mdot∇f = dot(m,∇fk) end set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if !r2mode - βmax = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2) + βmax = find_beta(m, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ m .* βmax norm_d = norm(d) - else - d .= ∇fk - norm_d = norm_∇fk end if !r2mode avgβmax += βmax @@ -321,17 +319,16 @@ function SolverCore.solve!( end """ -find_beta(m, ∇f, norm_∇f, β, θ1, θ2) +find_beta(m, md∇f, norm_∇f, β, θ1, θ2) Compute βmax which saturates the contibution of the momentum term to the gradient. `βmax` is computed such that the two gradient-related conditions are ensured: 1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||² -2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m.∇f(xk))|| -with `m` memory of past gradient +2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m|| +with `m` memory of past gradient and `mdot∇f = dot(m,∇f(xk))` """ -function find_beta(m::V, ∇f::V, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} - dotprod = dot(m,∇f) - n1 = norm_∇f^2 - dotprod +function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} + n1 = norm_∇f^2 - mdot∇f n2 = norm(m .- ∇f) β1 = n1 > 0 ? (1-θ1)*norm_∇f^2/(n1) : β β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β @@ -354,9 +351,10 @@ end """ step_mult(αk::T, norm_∇fk::T, ::r2) + step_mult(αk::T, norm_∇fk::T, ::R2og) step_mult(αk::T, norm_∇fk::T, ::tr) -Compute step size multiplier: `αk` for quadratic regularization(`::r2`) and `αk/norm_∇fk` for trust region (`::tr`). +Compute step size multiplier: `αk` for quadratic regularization(`::r2` and `::R2og`) and `αk/norm_∇fk` for trust region (`::tr`). """ function step_mult(αk::T, norm_∇fk::T, ::Union{r2,R2og}) where{T} αk From ae801991589c7d483fa7b463b3572527e243cee4 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Tue, 6 Feb 2024 12:47:50 -0500 Subject: [PATCH 025/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 83fe6648..45aa9ade 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -200,7 +200,7 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 if r2mode @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" "σ" "ρk" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN else @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0 From f56d1839bd578fc3c803578d093d573a52e3f5b5 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Tue, 6 Feb 2024 12:48:10 -0500 Subject: [PATCH 026/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 45aa9ade..6a12c997 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -203,7 +203,7 @@ function SolverCore.solve!( infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN else @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0 + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0 end end From be56ea50545f4e12ec652f61f5be37c949e3e81a Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Tue, 6 Feb 2024 12:48:26 -0500 Subject: [PATCH 027/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 6a12c997..cb3a1f58 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -205,7 +205,6 @@ function SolverCore.solve!( @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0 end - end set_status!( From bd953325a9c95544331b7231c9f99dd640399586 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 6 Feb 2024 12:54:53 -0500 Subject: [PATCH 028/171] rename `m` as `momentum` --- src/fomo.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 83fe6648..b234622e 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -171,7 +171,7 @@ function SolverCore.solve!( x = solver.x .= x ∇fk = solver.g c = solver.c - m = solver.m + momentum = solver.m d = solver.d set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) @@ -232,7 +232,7 @@ function SolverCore.solve!( avgβmax = T(0) siter = 0 oneT = T(1) - mdot∇f = T(0) # dot(m,∇fk) + mdot∇f = T(0) # dot(momentum,∇fk) while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d @@ -251,7 +251,7 @@ function SolverCore.solve!( αk = αk * γ1 if !r2mode βmax *= γ3 - d .= ∇fk .* (oneT - βmax) .+ m .* βmax + d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax end end @@ -259,7 +259,7 @@ function SolverCore.solve!( if ρk >= η1 x .= c if !r2mode - m .= ∇fk .* (oneT - β) .+ m .* β + momentum .= ∇fk .* (oneT - β) .+ momentum .* β mdot∇f = dot(m,∇fk) end set_objective!(stats, fck) @@ -267,7 +267,7 @@ function SolverCore.solve!( norm_∇fk = norm(∇fk) if !r2mode βmax = find_beta(m, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2) - d .= ∇fk .* (oneT - βmax) .+ m .* βmax + d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) end if !r2mode @@ -325,7 +325,7 @@ Compute βmax which saturates the contibution of the momentum term to the gradie `βmax` is computed such that the two gradient-related conditions are ensured: 1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||² 2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m|| -with `m` memory of past gradient and `mdot∇f = dot(m,∇f(xk))` +with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` """ function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} n1 = norm_∇f^2 - mdot∇f From 1c4d3da6117a118f38ebee0cd514560255173bdb Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 6 Feb 2024 13:05:58 -0500 Subject: [PATCH 029/171] update docstring, fix `m` to `momentum` renaming. --- src/fomo.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 0a202466..93a5c11a 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -38,8 +38,8 @@ For advanced usage: - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. -- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ||∇f(xk)||², with m memory of past gradient and βmax ∈ [0,β]. -- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||, with m memory of past gradient and βmax ∈ [0,β]. +- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β]. +- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖, with m memory of past gradient and βmax ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0). @@ -259,13 +259,13 @@ function SolverCore.solve!( x .= c if !r2mode momentum .= ∇fk .* (oneT - β) .+ momentum .* β - mdot∇f = dot(m,∇fk) + mdot∇f = dot(momentum,∇fk) end set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if !r2mode - βmax = find_beta(m, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2) + βmax = find_beta(momentum, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) end @@ -322,8 +322,8 @@ find_beta(m, md∇f, norm_∇f, β, θ1, θ2) Compute βmax which saturates the contibution of the momentum term to the gradient. `βmax` is computed such that the two gradient-related conditions are ensured: -1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||² -2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m|| +1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ‖∇f(xk)‖² +2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖ with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` """ function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} @@ -336,6 +336,7 @@ end """ init_alpha(norm_∇fk::T, ::r2) + init_alpha(norm_∇fk::T, ::R2og) init_alpha(norm_∇fk::T, ::tr) Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. From f952eca5e43d55fdcbf9cc7dc6efd1b9121dcec8 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Fri, 16 Feb 2024 09:31:41 -0500 Subject: [PATCH 030/171] Update src/fomo.jl Co-authored-by: tmigot --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 93a5c11a..c04d5725 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -122,7 +122,7 @@ end @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = R2Solver(nlp) stats = GenericExecutionStats(nlp) - if haskey(kwargs,:σmax) + if haskey(kwargs,:σmin) return solve!(solver, nlp, stats; backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...) else return solve!(solver, nlp, stats; backend = R2og(), kwargs...) From ca78bb4a96bc162ab5488737a3a964103c8e1c22 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 17 Feb 2024 17:08:10 -0500 Subject: [PATCH 031/171] Update test/allocs.jl Co-authored-by: tmigot --- test/allocs.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/allocs.jl b/test/allocs.jl index 42d266ab..5906ef84 100644 --- a/test/allocs.jl +++ b/test/allocs.jl @@ -30,7 +30,7 @@ end if Sys.isunix() @testset "Allocation tests" begin - @testset "$symsolver" for symsolver in (:LBFGSSolver, :FomoSolver, :TrunkSolver, :TronSolver) + @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver) for model in NLPModelsTest.nlp_problems nlp = eval(Meta.parse(model))() if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver)) From 58d7dc3b9bb1fe230b6fa423279438e30ca7bc4b Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 17 Feb 2024 17:09:55 -0500 Subject: [PATCH 032/171] Update src/fomo.jl Co-authored-by: tmigot --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index c04d5725..b84c8664 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,6 +1,6 @@ export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og -abstract type AbstractFomoMethod end +abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end struct tr <: AbstractFomoMethod end struct r2 <: AbstractFomoMethod end From 9aac4044acc8b2ce9d8d1c63bbadc262fa7c01e6 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 17 Feb 2024 17:48:27 -0500 Subject: [PATCH 033/171] put alpha in solver structure, uncomment callback test --- src/fomo.jl | 47 ++++++++++++++++++++++++----------------------- test/callback.jl | 26 +++++++++++++------------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index b84c8664..6e2c018a 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,6 +1,6 @@ export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og -abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end +abstract type AbstractFomoMethod end struct tr <: AbstractFomoMethod end struct r2 <: AbstractFomoMethod end @@ -92,6 +92,7 @@ mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver c::V m::V d::V + α::T end function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} @@ -100,7 +101,7 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} c = similar(nlp.meta.x0) m = fill!(similar(nlp.meta.x0), 0) d = fill!(similar(nlp.meta.x0), 0) - return FomoSolver{T, V}(x, g, c, m, d) + return FomoSolver{T, V}(x, g, c, m, d, T(0)) end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} @@ -116,7 +117,7 @@ function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} c = similar(nlp.meta.x0) m = Vector{T}() d = g # similar without momentum - return FomoSolver{T, V}(x, g, c, m, d) + return FomoSolver{T, V}(x, g, c, m, d, T(0)) end @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} @@ -181,7 +182,7 @@ function SolverCore.solve!( norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) - αk = init_alpha(norm_∇fk,backend) + solver.α = init_alpha(norm_∇fk,backend) # Stopping criterion: ϵ = atol + rtol * norm_∇fk @@ -190,20 +191,20 @@ function SolverCore.solve!( @info("Optimal point found at initial point") if r2mode @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "σ" - @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α else @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" - @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α end end if verbose > 0 && mod(stats.iter, verbose) == 0 if r2mode @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" "σ" "ρk" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α NaN else @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0 + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α NaN 0 end end @@ -233,9 +234,9 @@ function SolverCore.solve!( oneT = T(1) mdot∇f = T(0) # dot(momentum,∇fk) while !done - λk = step_mult(αk,norm_d,backend) + λk = step_mult(solver.α,norm_d,backend) c .= x .- λk .* d - step_underflow = x == c # step addition underfow on every dimensions, should happen before αk == 0 + step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0 ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk fck = obj(nlp, c) if fck == -Inf @@ -245,9 +246,9 @@ function SolverCore.solve!( ρk = (stats.objective - fck) / ΔTk # Update regularization parameters if ρk >= η2 - αk = min(αmax, γ2 * αk) + solver.α = min(αmax, γ2 * solver.α) elseif ρk < η1 - αk = αk * γ1 + solver.α = solver.α * γ1 if !r2mode βmax *= γ3 d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax @@ -283,9 +284,9 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline if r2mode - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk ρk + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α ρk else - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk ρk βmax + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α ρk βmax end end @@ -305,7 +306,7 @@ function SolverCore.solve!( callback(nlp, solver, stats) step_underflow && set_status!(stats,:small_step) - αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before + solver.α == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before done = stats.status != :unknown end @@ -350,16 +351,16 @@ function init_alpha(norm_∇fk::T, ::tr) where{T} end """ - step_mult(αk::T, norm_∇fk::T, ::r2) - step_mult(αk::T, norm_∇fk::T, ::R2og) - step_mult(αk::T, norm_∇fk::T, ::tr) + step_mult(α::T, norm_∇fk::T, ::r2) + step_mult(α::T, norm_∇fk::T, ::R2og) + step_mult(α::T, norm_∇fk::T, ::tr) -Compute step size multiplier: `αk` for quadratic regularization(`::r2` and `::R2og`) and `αk/norm_∇fk` for trust region (`::tr`). +Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`). """ -function step_mult(αk::T, norm_∇fk::T, ::Union{r2,R2og}) where{T} - αk +function step_mult(α::T, norm_∇fk::T, ::Union{r2,R2og}) where{T} + α end -function step_mult(αk::T, norm_∇fk::T, ::tr) where{T} - αk/norm_∇fk +function step_mult(α::T, norm_∇fk::T, ::tr) where{T} + α/norm_∇fk end \ No newline at end of file diff --git a/test/callback.jl b/test/callback.jl index 2511ee6b..ddadc799 100644 --- a/test/callback.jl +++ b/test/callback.jl @@ -58,16 +58,16 @@ end @test stats.iter == 8 end -# @testset "Testing Solver Values" begin -# f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 -# nlp = ADNLPModel(f, [-1.2; 1.0]) -# function cb(nlp, solver, stats) -# if stats.iter == 4 -# @test solver.σ > 0.0 -# stats.status = :user -# end -# end -# stats = with_logger(NullLogger()) do -# R2(nlp, callback = cb) -# end -# end +@testset "Testing Solver Values" begin + f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 + nlp = ADNLPModel(f, [-1.2; 1.0]) + function cb(nlp, solver, stats) + if stats.iter == 4 + @test solver.α > 0.0 + stats.status = :user + end + end + stats = with_logger(NullLogger()) do + R2(nlp, callback = cb) + end +end From e5497c506aa461e2daf5b758dfcad2bf1e4a6a39 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 17 Feb 2024 18:14:08 -0500 Subject: [PATCH 034/171] allocate memory for norm vector in find_beta --- src/fomo.jl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 6e2c018a..f047169d 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -92,6 +92,7 @@ mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver c::V m::V d::V + p::V α::T end @@ -101,7 +102,8 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} c = similar(nlp.meta.x0) m = fill!(similar(nlp.meta.x0), 0) d = fill!(similar(nlp.meta.x0), 0) - return FomoSolver{T, V}(x, g, c, m, d, T(0)) + p = similar(nlp.meta.x0) + return FomoSolver{T, V}(x, g, c, m, d, p, T(0)) end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} @@ -117,7 +119,8 @@ function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} c = similar(nlp.meta.x0) m = Vector{T}() d = g # similar without momentum - return FomoSolver{T, V}(x, g, c, m, d, T(0)) + p = Vector{T}() + return FomoSolver{T, V}(x, g, c, m, d, p, T(0)) end @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} @@ -174,6 +177,7 @@ function SolverCore.solve!( c = solver.c momentum = solver.m d = solver.d + p = solver.p set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) @@ -266,7 +270,8 @@ function SolverCore.solve!( grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if !r2mode - βmax = find_beta(momentum, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2) + p .= momentum .- ∇fk + βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) end @@ -327,9 +332,9 @@ Compute βmax which saturates the contibution of the momentum term to the gradie 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖ with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` """ -function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} +function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} n1 = norm_∇f^2 - mdot∇f - n2 = norm(m .- ∇f) + n2 = norm(p) β1 = n1 > 0 ? (1-θ1)*norm_∇f^2/(n1) : β β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β return min(β,min(β1,β2)) From 21791241abac892fc562b4fec0b898a8b34b141c Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 17 Feb 2024 22:45:36 -0500 Subject: [PATCH 035/171] Add FoSolver structure for no-momentum case. Change backend name to step_backend to avoid confusion. Update docstrings and tests. --- src/fomo.jl | 123 +++++++++++++++++++++++++------------------ test/test_solvers.jl | 2 +- 2 files changed, 74 insertions(+), 51 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index f047169d..c672b7b7 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,10 +1,10 @@ -export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og +export fomo, FomoSolver, FoSolver, R2, R2Solver, tr_step, r2_step -abstract type AbstractFomoMethod end +abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end -struct tr <: AbstractFomoMethod end -struct r2 <: AbstractFomoMethod end -struct R2og <: AbstractFomoMethod end +abstract type AbstractFomoMethod end +struct tr_step <: AbstractFomoMethod end +struct r2_step <: AbstractFomoMethod end """ fomo(nlp; kwargs...) @@ -17,11 +17,12 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i solver = FomoSolver(nlp) solve!(solver, nlp; kwargs...) -*Quadratic Regularization (R2)*: if the user do not want to use momentum (β = 0), it is recommended to use the memory-optimized `R2` method. +**Quadratic Regularization (R2)**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` method. For advanced usage: - solver = R2Solver(nlp) - solve!(solver, nlp; backend = R2og(), kwargs...) + solver = FoSolver(nlp) + solve!(solver, nlp; kwargs...) +Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). # Arguments - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. @@ -33,7 +34,7 @@ For advanced usage: - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. -- `αmax = 1/eps(T)`: step parameter for fomo algorithm. +- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. @@ -41,9 +42,7 @@ For advanced usage: - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β]. - `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖, with m memory of past gradient and βmax ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. -- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0). - -*Warning:* `R2og()` backend should be used only for advanced usage as described above. +- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -65,6 +64,7 @@ Notably, you can access, and modify, the following: - `stats.elapsed_time`: elapsed time in seconds. # Examples +## `fomo` ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) @@ -83,10 +83,31 @@ stats = solve!(solver, nlp) # output +"Execution stats: first-order stationary" +``` +## `R2` +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +stats = R2(nlp) + +# output + +"Execution stats: first-order stationary" +``` + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +solver = FoSolver(nlp); +stats = solve!(solver, nlp) + +# output + "Execution stats: first-order stationary" ``` """ -mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver +mutable struct FomoSolver{T, V} <: AbstractFirstOrderSolver x::V g::V c::V @@ -113,23 +134,28 @@ end return solve!(solver, nlp, stats; kwargs...) end -function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} + +mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver + x::V + g::V + c::V + α::T +end + +function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} x = similar(nlp.meta.x0) g = similar(nlp.meta.x0) c = similar(nlp.meta.x0) - m = Vector{T}() - d = g # similar without momentum - p = Vector{T}() - return FomoSolver{T, V}(x, g, c, m, d, p, T(0)) + return FoSolver{T, V}(x, g, c, T(0)) end @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} - solver = R2Solver(nlp) + solver = FoSolver(nlp) stats = GenericExecutionStats(nlp) if haskey(kwargs,:σmin) - return solve!(solver, nlp, stats; backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...) + return solve!(solver, nlp, stats; step_backend = r2_step(), αmax = 1/kwargs[:σmin], kwargs...) else - return solve!(solver, nlp, stats; backend = R2og(), kwargs...) + return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...) end end @@ -141,7 +167,7 @@ end SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) function SolverCore.solve!( - solver::FomoSolver{T, V}, + solver::AbstractFirstOrderSolver, nlp::AbstractNLPModel{T, V}, stats::GenericExecutionStats{T, V}; callback = (args...) -> nothing, @@ -161,12 +187,11 @@ function SolverCore.solve!( θ1::T = T(0.1), θ2::T = T(eps(T)^(1/3)), verbose::Int = 0, - backend = r2(), + step_backend = r2_step(), σmin = nothing # keep consistency with R2 interface. kwargs immutable, can't delete it in `R2` ) where {T, V} - r2mode = (backend == R2og()) - mthname = r2mode ? "R2" : "fomo" - unconstrained(nlp) || error("$mthname should only be called on unconstrained problems.") + use_momentum = typeof(solver) <: FomoSolver + unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") reset!(stats) start_time = time() @@ -175,9 +200,9 @@ function SolverCore.solve!( x = solver.x .= x ∇fk = solver.g c = solver.c - momentum = solver.m - d = solver.d - p = solver.p + momentum = use_momentum ? solver.m : nothing # not used if no momentum + d = use_momentum ? solver.d : solver.g # g = d if no momentum + p = use_momentum ? solver.p : nothing # not used if no momentum set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) @@ -186,14 +211,14 @@ function SolverCore.solve!( norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) - solver.α = init_alpha(norm_∇fk,backend) + solver.α = init_alpha(norm_∇fk,step_backend) # Stopping criterion: ϵ = atol + rtol * norm_∇fk optimal = norm_∇fk ≤ ϵ if optimal @info("Optimal point found at initial point") - if r2mode + if !use_momentum @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "σ" @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α else @@ -203,7 +228,7 @@ function SolverCore.solve!( end if verbose > 0 && mod(stats.iter, verbose) == 0 - if r2mode + if !use_momentum @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" "σ" "ρk" infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α NaN else @@ -238,10 +263,10 @@ function SolverCore.solve!( oneT = T(1) mdot∇f = T(0) # dot(momentum,∇fk) while !done - λk = step_mult(solver.α,norm_d,backend) + λk = step_mult(solver.α,norm_d,step_backend) c .= x .- λk .* d step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0 - ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk + ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk with momentum, ‖∇fk‖²λk without momentum fck = obj(nlp, c) if fck == -Inf set_status!(stats, :unbounded) @@ -253,7 +278,7 @@ function SolverCore.solve!( solver.α = min(αmax, γ2 * solver.α) elseif ρk < η1 solver.α = solver.α * γ1 - if !r2mode + if use_momentum βmax *= γ3 d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax end @@ -262,20 +287,20 @@ function SolverCore.solve!( # Acceptance of the new candidate if ρk >= η1 x .= c - if !r2mode + if use_momentum momentum .= ∇fk .* (oneT - β) .+ momentum .* β mdot∇f = dot(momentum,∇fk) end set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) - if !r2mode + if use_momentum p .= momentum .- ∇fk βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) end - if !r2mode + if use_momentum avgβmax += βmax siter += 1 end @@ -288,7 +313,7 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline - if r2mode + if !use_momentum infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α ρk else infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α ρk βmax @@ -315,7 +340,7 @@ function SolverCore.solve!( done = stats.status != :unknown end - if !r2mode + if use_momentum avgβmax /= siter stats.solver_specific[:avgβmax] = avgβmax end @@ -341,31 +366,29 @@ function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where end """ - init_alpha(norm_∇fk::T, ::r2) - init_alpha(norm_∇fk::T, ::R2og) - init_alpha(norm_∇fk::T, ::tr) + init_alpha(norm_∇fk::T, ::r2_step) + init_alpha(norm_∇fk::T, ::tr_step) Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. """ -function init_alpha(norm_∇fk::T, ::Union{r2,R2og}) where{T} +function init_alpha(norm_∇fk::T, ::r2_step) where{T} 1/2^round(log2(norm_∇fk + 1)) end -function init_alpha(norm_∇fk::T, ::tr) where{T} +function init_alpha(norm_∇fk::T, ::tr_step) where{T} norm_∇fk/2^round(log2(norm_∇fk + 1)) end """ - step_mult(α::T, norm_∇fk::T, ::r2) - step_mult(α::T, norm_∇fk::T, ::R2og) - step_mult(α::T, norm_∇fk::T, ::tr) + step_mult(α::T, norm_∇fk::T, ::r2_step) + step_mult(α::T, norm_∇fk::T, ::tr_step) Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`). """ -function step_mult(α::T, norm_∇fk::T, ::Union{r2,R2og}) where{T} +function step_mult(α::T, norm_∇fk::T, ::r2_step) where{T} α end -function step_mult(α::T, norm_∇fk::T, ::tr) where{T} +function step_mult(α::T, norm_∇fk::T, ::tr_step) where{T} α/norm_∇fk end \ No newline at end of file diff --git a/test/test_solvers.jl b/test/test_solvers.jl index ba182731..d9266d29 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -9,7 +9,7 @@ function tests() ("tron", tron), ("R2", R2), ("fomo_r2", fomo), - ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,backend = JSOSolvers.tr(); kwargs...)), + ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,step_backend = JSOSolvers.tr_step(); kwargs...)), ] unconstrained_nlp(solver) multiprecision_nlp(solver, :unc) From 57bf9c2d399cf500c5fba4754caf6cc39bc26de6 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 18 Feb 2024 12:56:34 -0500 Subject: [PATCH 036/171] fix allocs tests --- test/allocs.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/allocs.jl b/test/allocs.jl index 5906ef84..63c33f01 100644 --- a/test/allocs.jl +++ b/test/allocs.jl @@ -30,7 +30,7 @@ end if Sys.isunix() @testset "Allocation tests" begin - @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver) + @testset "$symsolver" for symsolver in (:LBFGSSolver, :FoSolver, :FomoSolver, :TrunkSolver, :TronSolver) for model in NLPModelsTest.nlp_problems nlp = eval(Meta.parse(model))() if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver)) From a1acef6d0f8da5e83040a6d486a071125fc82369 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 18 Feb 2024 14:39:24 -0500 Subject: [PATCH 037/171] add reset! function to FoSolver --- src/fomo.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/fomo.jl b/src/fomo.jl index c672b7b7..7a320173 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -166,6 +166,13 @@ end SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) + +function SolverCore.reset!(solver::FoSolver{T}) where {T} + solver +end + +SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver) + function SolverCore.solve!( solver::AbstractFirstOrderSolver, nlp::AbstractNLPModel{T, V}, From 7ba442a33d5d72bf13d347a92fa595e5c23258c1 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 18 Feb 2024 15:22:43 -0500 Subject: [PATCH 038/171] remove `R2Solver`, replaced by `FoSolver` --- src/fomo.jl | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 7a320173..c9a95e81 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,4 +1,4 @@ -export fomo, FomoSolver, FoSolver, R2, R2Solver, tr_step, r2_step +export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end @@ -134,6 +134,12 @@ end return solve!(solver, nlp, stats; kwargs...) end +function SolverCore.reset!(solver::FomoSolver{T}) where {T} + fill!(solver.m,0) + solver +end + +SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver x::V @@ -159,14 +165,6 @@ end end end -function SolverCore.reset!(solver::FomoSolver{T}) where {T} - fill!(solver.m,0) - solver -end - -SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) - - function SolverCore.reset!(solver::FoSolver{T}) where {T} solver end From 8278977d2c21c49bab6bb10ed9ae48e21372bb00 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 22:53:40 -0500 Subject: [PATCH 039/171] Update test/test_solvers.jl Co-authored-by: Dominique --- test/test_solvers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_solvers.jl b/test/test_solvers.jl index d9266d29..eb9029e1 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -9,7 +9,7 @@ function tests() ("tron", tron), ("R2", R2), ("fomo_r2", fomo), - ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,step_backend = JSOSolvers.tr_step(); kwargs...)), + ("fomo_tr", (nlp; kwargs...) -> fomo(nlp, step_backend = JSOSolvers.tr_step(); kwargs...)), ] unconstrained_nlp(solver) multiprecision_nlp(solver, :unc) From 31fd68dc950fdece036579522289c4fac6338c62 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 22:54:14 -0500 Subject: [PATCH 040/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index c9a95e81..d28ff085 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -10,7 +10,7 @@ struct r2_step <: AbstractFomoMethod end fomo(nlp; kwargs...) R2(nlp; kwargs...) -A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods. +A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region steps. For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: From 602ca50e74b425744ef526da48aecaf3185d7509 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 22:56:37 -0500 Subject: [PATCH 041/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index d28ff085..c96a7aff 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -12,7 +12,7 @@ struct r2_step <: AbstractFomoMethod end A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region steps. -For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: +For advanced usage, first define a `FomoSolver` or `FoSolver` to preallocate the memory used in the solver, and then call `solve!`: solver = FomoSolver(nlp) solve!(solver, nlp; kwargs...) From ac736a4bfce27dfce618219672c9b6890e6b712b Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:00:58 -0500 Subject: [PATCH 042/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index c96a7aff..1f2f4d4f 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -32,7 +32,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. -- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. +- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters. - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. - `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. From f4c3481fae11bc25dcb01ba97bdc69181ebea2e8 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:06:07 -0500 Subject: [PATCH 043/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 1f2f4d4f..6c66ae4a 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -34,7 +34,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters. - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. -- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. +- `αmax = 1/eps(T)`: maximum step parameter for fomo solver. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. From 701221c3e8e3acd02d6a97432bd23c0bdd59090a Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:07:13 -0500 Subject: [PATCH 044/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 6c66ae4a..0cc734ee 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -35,7 +35,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters. - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. - `αmax = 1/eps(T)`: maximum step parameter for fomo solver. -- `max_eval::Int = -1`: maximum number of evaluation of the objective function. +- `max_eval::Int = -1`: maximum number of evaluation of the objective function (-1 means unlimited). - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. From 90763a248d65f839f331424f271b6cb686e910e3 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:08:03 -0500 Subject: [PATCH 045/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index 0cc734ee..c59f9791 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -25,6 +25,7 @@ For advanced usage: Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). # Arguments + - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. # Keyword arguments From d0a99297355a2506a1dee7cdc937e7b47ee1b8e6 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:10:04 -0500 Subject: [PATCH 046/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index c59f9791..40865fa2 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -29,6 +29,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. # Keyword arguments + - `x::V = nlp.meta.x0`: the initial guess. - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. From f8c5a63428361ef930d63e921d98b582b43cf626 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:10:45 -0500 Subject: [PATCH 047/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index 40865fa2..40bf8bec 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -47,6 +47,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. # Output + The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback From 998f926be4879cfde6af798204d8e2ca509782b6 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:11:28 -0500 Subject: [PATCH 048/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index 40bf8bec..deac5778 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -51,6 +51,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback + The callback is called at each iteration. The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. Changing any of the input arguments will affect the subsequent iterations. From e692afe540dc11959556e2fe90612e612dbf2d6d Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:12:15 -0500 Subject: [PATCH 049/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index deac5778..645a0d68 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -68,6 +68,7 @@ Notably, you can access, and modify, the following: - `stats.elapsed_time`: elapsed time in seconds. # Examples + ## `fomo` ```jldoctest using JSOSolvers, ADNLPModels From d588e729ac80fea092f56cb72a47ba511a75477d Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:12:59 -0500 Subject: [PATCH 050/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index 645a0d68..b1574045 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -70,6 +70,7 @@ Notably, you can access, and modify, the following: # Examples ## `fomo` + ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) From a35720a490b10920ce3650e3f257f8eaa049f0f1 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:15:43 -0500 Subject: [PATCH 051/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index b1574045..c955b365 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -136,7 +136,7 @@ end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FomoSolver(nlp) solver_specific = Dict(:avgβmax => T(0.)) - stats = GenericExecutionStats(nlp;solver_specific=solver_specific) + stats = GenericExecutionStats(nlp; solver_specific = solver_specific) return solve!(solver, nlp, stats; kwargs...) end From 2665fbf6f3b3190dfb1d000c6fca447492d30ad2 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:16:23 -0500 Subject: [PATCH 052/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index c955b365..425ee41d 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -92,6 +92,7 @@ stats = solve!(solver, nlp) "Execution stats: first-order stationary" ``` ## `R2` + ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) From b00fd12da96aced7203edb6fdfdb1b7c55adf5cc Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:16:50 -0500 Subject: [PATCH 053/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 425ee41d..edf16b14 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -142,7 +142,7 @@ end end function SolverCore.reset!(solver::FomoSolver{T}) where {T} - fill!(solver.m,0) + fill!(solver.m, 0) solver end From dda1ca78f3f3471f847afe56f867eeabbd113b0a Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:17:15 -0500 Subject: [PATCH 054/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index edf16b14..141fe4f0 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -165,7 +165,7 @@ end @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FoSolver(nlp) stats = GenericExecutionStats(nlp) - if haskey(kwargs,:σmin) + if haskey(kwargs, :σmin) return solve!(solver, nlp, stats; step_backend = r2_step(), αmax = 1/kwargs[:σmin], kwargs...) else return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...) From 719dc5f6192b83b2d5a7ac4d38caac77802b8d33 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:18:07 -0500 Subject: [PATCH 055/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 141fe4f0..69426ffe 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -311,8 +311,6 @@ function SolverCore.solve!( βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) - end - if use_momentum avgβmax += βmax siter += 1 end From 8f99dbe13facaf7ba4ae18114b7affaa3d5f7bb9 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:19:48 -0500 Subject: [PATCH 056/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 69426ffe..414bfb70 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -218,7 +218,6 @@ function SolverCore.solve!( set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) - grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) From 497b99ec0a4529ba7ffccd9c0d45706c52ffe464 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:20:43 -0500 Subject: [PATCH 057/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 414bfb70..79ba88b6 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -222,7 +222,7 @@ function SolverCore.solve!( norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) - solver.α = init_alpha(norm_∇fk,step_backend) + solver.α = init_alpha(norm_∇fk, step_backend) # Stopping criterion: ϵ = atol + rtol * norm_∇fk From a6a439f79dd14949575487ea665b35f58aed38de Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:21:29 -0500 Subject: [PATCH 058/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 79ba88b6..5c200a9e 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -236,7 +236,6 @@ function SolverCore.solve!( @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α end - end if verbose > 0 && mod(stats.iter, verbose) == 0 if !use_momentum From b0164773122a1bb1220b7b97a8d60991527d29e2 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 22 Feb 2024 00:04:15 -0500 Subject: [PATCH 059/171] Add TR solver (trust region with linear model) fix spacing update docstrings --- src/fomo.jl | 92 ++++++++++++++++++++++++++--------------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 5c200a9e..fa811924 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -2,75 +2,69 @@ export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end -abstract type AbstractFomoMethod end -struct tr_step <: AbstractFomoMethod end -struct r2_step <: AbstractFomoMethod end +abstract type AbstractFOMethod end +struct tr_step <: AbstractFOMethod end +struct r2_step <: AbstractFOMethod end """ fomo(nlp; kwargs...) R2(nlp; kwargs...) -A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region steps. +A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods. -For advanced usage, first define a `FomoSolver` or `FoSolver` to preallocate the memory used in the solver, and then call `solve!`: +For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: solver = FomoSolver(nlp) solve!(solver, nlp; kwargs...) -**Quadratic Regularization (R2)**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` method. +**No momentum**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods. For advanced usage: solver = FoSolver(nlp) - solve!(solver, nlp; kwargs...) -Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). - + solve!(solver, nlp; step_bakckend = r2_step(),kwargs...) # for Quadratic Regularization (R2) step: s = - α .* ∇f(x) + solve!(solver, nlp; step_bakckend = tr_step(),kwargs...) # for linear model Trust Region (TR) step: s = - α .* ∇f(x) ./ ‖∇f(x)‖ + # Arguments - - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. # Keyword arguments - - `x::V = nlp.meta.x0`: the initial guess. - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. -- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters. +- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. -- `αmax = 1/eps(T)`: maximum step parameter for fomo solver. -- `max_eval::Int = -1`: maximum number of evaluation of the objective function (-1 means unlimited). +- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. +- `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. -- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β]. -- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖, with m memory of past gradient and βmax ∈ [0,β]. +- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β]. +- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* m‖, with m memory of past gradient and βmax ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. # Output - The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback - The callback is called at each iteration. The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. Changing any of the input arguments will affect the subsequent iterations. -In particular, setting `stats.status = :user` will stop the algorithm. +In particular, setting `stats.status = :user || stats.stats = :unknown` will stop the algorithm. All relevant information should be available in `nlp` and `solver`. Notably, you can access, and modify, the following: - `solver.x`: current iterate; - `solver.gx`: current gradient; - `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: - - `stats.dual_feas`: norm of current gradient; - - `stats.iter`: current iteration counter; - - `stats.objective`: current objective function value; - - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. - - `stats.elapsed_time`: elapsed time in seconds. + - `stats.dual_feas`: norm of current gradient; + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. # Examples - ## `fomo` - ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) @@ -92,7 +86,6 @@ stats = solve!(solver, nlp) "Execution stats: first-order stationary" ``` ## `R2` - ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) @@ -137,12 +130,12 @@ end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FomoSolver(nlp) solver_specific = Dict(:avgβmax => T(0.)) - stats = GenericExecutionStats(nlp; solver_specific = solver_specific) + stats = GenericExecutionStats(nlp;solver_specific=solver_specific) return solve!(solver, nlp, stats; kwargs...) end function SolverCore.reset!(solver::FomoSolver{T}) where {T} - fill!(solver.m, 0) + fill!(solver.m,0) solver end @@ -162,14 +155,18 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} return FoSolver{T, V}(x, g, c, T(0)) end -@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} +@doc (@doc FomoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FoSolver(nlp) stats = GenericExecutionStats(nlp) - if haskey(kwargs, :σmin) - return solve!(solver, nlp, stats; step_backend = r2_step(), αmax = 1/kwargs[:σmin], kwargs...) - else - return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...) - end + return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...) +end + +@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} + fo(nlp; step_backend = r2_step(), kwargs...) +end + +@doc (@doc FomoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} + fo(nlp; step_backend = tr_step(), kwargs...) end function SolverCore.reset!(solver::FoSolver{T}) where {T} @@ -200,7 +197,6 @@ function SolverCore.solve!( θ2::T = T(eps(T)^(1/3)), verbose::Int = 0, step_backend = r2_step(), - σmin = nothing # keep consistency with R2 interface. kwargs immutable, can't delete it in `R2` ) where {T, V} use_momentum = typeof(solver) <: FomoSolver unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") @@ -218,6 +214,7 @@ function SolverCore.solve!( set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) + grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) @@ -236,14 +233,15 @@ function SolverCore.solve!( @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α end + end if verbose > 0 && mod(stats.iter, verbose) == 0 if !use_momentum @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" "σ" "ρk" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α NaN + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α 0 else @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α NaN 0 + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α 0 0 end end @@ -273,7 +271,7 @@ function SolverCore.solve!( oneT = T(1) mdot∇f = T(0) # dot(momentum,∇fk) while !done - λk = step_mult(solver.α,norm_d,step_backend) + λk = step_mult(solver.α, norm_d, step_backend) c .= x .- λk .* d step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0 ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk with momentum, ‖∇fk‖²λk without momentum @@ -299,7 +297,7 @@ function SolverCore.solve!( x .= c if use_momentum momentum .= ∇fk .* (oneT - β) .+ momentum .* β - mdot∇f = dot(momentum,∇fk) + mdot∇f = dot(momentum, ∇fk) end set_objective!(stats, fck) grad!(nlp, x, ∇fk) @@ -309,6 +307,8 @@ function SolverCore.solve!( βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) + end + if use_momentum avgβmax += βmax siter += 1 end @@ -343,8 +343,8 @@ function SolverCore.solve!( callback(nlp, solver, stats) - step_underflow && set_status!(stats,:small_step) - solver.α == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before + step_underflow && set_status!(stats, :small_step) + solver.α == 0 && set_status!(stats, :exception) # :small_nlstep exception should happen before done = stats.status != :unknown end @@ -357,13 +357,13 @@ function SolverCore.solve!( end """ -find_beta(m, md∇f, norm_∇f, β, θ1, θ2) +find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2) Compute βmax which saturates the contibution of the momentum term to the gradient. `βmax` is computed such that the two gradient-related conditions are ensured: -1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ‖∇f(xk)‖² -2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖ -with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` +1. [(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖² +2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖ +with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` """ function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} n1 = norm_∇f^2 - mdot∇f From 773ac53a367ad7d71bad85737466676dbdf926e0 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Mon, 4 Mar 2024 14:33:19 -0500 Subject: [PATCH 060/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index fa811924..d1795391 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -18,6 +18,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i solve!(solver, nlp; kwargs...) **No momentum**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods. + For advanced usage: solver = FoSolver(nlp) From 20c3fa9659df5fbd357e1e4a47157a82a1109056 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Mon, 4 Mar 2024 15:36:16 -0500 Subject: [PATCH 061/171] fix docstrings and verbose display --- src/fomo.jl | 90 +++++++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index d1795391..40b14e8c 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -3,8 +3,8 @@ export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end abstract type AbstractFOMethod end -struct tr_step <: AbstractFOMethod end -struct r2_step <: AbstractFOMethod end +struct tr_step <: AbstractFOMethod end +struct r2_step <: AbstractFOMethod end """ fomo(nlp; kwargs...) @@ -130,19 +130,19 @@ end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FomoSolver(nlp) - solver_specific = Dict(:avgβmax => T(0.)) - stats = GenericExecutionStats(nlp;solver_specific=solver_specific) + solver_specific = Dict(:avgβmax => T(0.0)) + stats = GenericExecutionStats(nlp; solver_specific = solver_specific) return solve!(solver, nlp, stats; kwargs...) end function SolverCore.reset!(solver::FomoSolver{T}) where {T} - fill!(solver.m,0) + fill!(solver.m, 0) solver end SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) -mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver +@doc (@doc FomoSolver) mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver x::V g::V c::V @@ -186,22 +186,23 @@ function SolverCore.solve!( rtol::T = √eps(T), η1::T = T(eps(T)^(1 / 4)), η2::T = T(0.95), - γ1::T = T(1/2), + γ1::T = T(1 / 2), γ2::T = T(2), - γ3::T = T(1/2), - αmax::T = 1/eps(T), + γ3::T = T(1 / 2), + αmax::T = 1 / eps(T), max_time::Float64 = 30.0, max_eval::Int = -1, max_iter::Int = typemax(Int), β::T = T(0.9), θ1::T = T(0.1), - θ2::T = T(eps(T)^(1/3)), + θ2::T = T(eps(T)^(1 / 3)), verbose::Int = 0, step_backend = r2_step(), ) where {T, V} use_momentum = typeof(solver) <: FomoSolver + is_r2 = typeof(step_backend) <: r2_step unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") - + reset!(stats) start_time = time() set_time!(stats, 0.0) @@ -215,34 +216,38 @@ function SolverCore.solve!( set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) - grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) solver.α = init_alpha(norm_∇fk, step_backend) - + # Stopping criterion: ϵ = atol + rtol * norm_∇fk optimal = norm_∇fk ≤ ϵ + header = ["iter", "f", "‖∇f‖", "α"] if optimal @info("Optimal point found at initial point") - if !use_momentum - @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "σ" - @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α + if is_r2 + @info @sprintf "%5s %9s %7s %7s " header... + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1 / solver.α else - @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" + @info @sprintf "%5s %9s %7s %7s " header... @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α end - end if verbose > 0 && mod(stats.iter, verbose) == 0 + push!(header, "ρk") + step_param = is_r2 ? 1 / solver.α : solver.α if !use_momentum - @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" "σ" "ρk" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α 0 + @info @sprintf "%5s %9s %7s %7s %7s " header... + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param else - @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α 0 0 + push!(header, "βmax") + @info @sprintf "%5s %9s %7s %7s %7s %7s " header... + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' 0 end end @@ -305,7 +310,7 @@ function SolverCore.solve!( norm_∇fk = norm(∇fk) if use_momentum p .= momentum .- ∇fk - βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2) + βmax = find_beta(p, mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) end @@ -322,10 +327,13 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline + step_param = is_r2 ? 1 / solver.α : solver.α if !use_momentum - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α ρk + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ρk else - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α ρk βmax + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ρk βmax end end @@ -341,11 +349,11 @@ function SolverCore.solve!( max_time = max_time, ), ) - + callback(nlp, solver, stats) - step_underflow && set_status!(stats, :small_step) - solver.α == 0 && set_status!(stats, :exception) # :small_nlstep exception should happen before + step_underflow && set_status!(stats, :small_step) + solver.α == 0 && set_status!(stats, :exception) # :small_nlstep exception should happen before done = stats.status != :unknown end @@ -362,16 +370,16 @@ find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2) Compute βmax which saturates the contibution of the momentum term to the gradient. `βmax` is computed such that the two gradient-related conditions are ensured: -1. [(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖² +1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖² 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖ with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` -""" -function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} +""" +function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V} n1 = norm_∇f^2 - mdot∇f n2 = norm(p) - β1 = n1 > 0 ? (1-θ1)*norm_∇f^2/(n1) : β - β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β - return min(β,min(β1,β2)) + β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / (n1) : β + β2 = n2 != 0 ? (1 - θ2) * norm_∇f / (n2) : β + return min(β, min(β1, β2)) end """ @@ -380,12 +388,12 @@ end Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. """ -function init_alpha(norm_∇fk::T, ::r2_step) where{T} - 1/2^round(log2(norm_∇fk + 1)) +function init_alpha(norm_∇fk::T, ::r2_step) where {T} + 1 / 2^round(log2(norm_∇fk + 1)) end -function init_alpha(norm_∇fk::T, ::tr_step) where{T} - norm_∇fk/2^round(log2(norm_∇fk + 1)) +function init_alpha(norm_∇fk::T, ::tr_step) where {T} + norm_∇fk / 2^round(log2(norm_∇fk + 1)) end """ @@ -394,10 +402,10 @@ end Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`). """ -function step_mult(α::T, norm_∇fk::T, ::r2_step) where{T} +function step_mult(α::T, norm_∇fk::T, ::r2_step) where {T} α end -function step_mult(α::T, norm_∇fk::T, ::tr_step) where{T} - α/norm_∇fk +function step_mult(α::T, norm_∇fk::T, ::tr_step) where {T} + α / norm_∇fk end \ No newline at end of file From 7950f6bfc0f9bb269fa06753660a0ab48e0d6d24 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 5 Mar 2024 14:18:18 -0500 Subject: [PATCH 062/171] update docstring, update info display, fix solver arg type, export TR --- src/fomo.jl | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 40b14e8c..e51e5b8a 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,4 +1,4 @@ -export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step +export fomo, FomoSolver, FoSolver, R2, TR, tr_step, r2_step abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end @@ -10,14 +10,21 @@ struct r2_step <: AbstractFOMethod end fomo(nlp; kwargs...) R2(nlp; kwargs...) -A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods. +A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model. +The step is perform along d with +d = - (1-βmax) .* ∇f(xk) - βmax .* mk (1) +with mk the memory of past gradients updated at each successful iteration as +mk .= ∇f(xk) .* (1 - βmax) .+ momentum .* βmax (2) +and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied: +(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (3) +‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (4) For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: solver = FomoSolver(nlp) solve!(solver, nlp; kwargs...) -**No momentum**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods. +**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods. For advanced usage: @@ -40,8 +47,8 @@ For advanced usage: - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. -- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β]. -- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* m‖, with m memory of past gradient and βmax ∈ [0,β]. +- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (3). +- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. @@ -177,7 +184,7 @@ end SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver) function SolverCore.solve!( - solver::AbstractFirstOrderSolver, + solver::Union{FoSolver,FomoSolver}, nlp::AbstractNLPModel{T, V}, stats::GenericExecutionStats{T, V}; callback = (args...) -> nothing, @@ -225,7 +232,8 @@ function SolverCore.solve!( # Stopping criterion: ϵ = atol + rtol * norm_∇fk optimal = norm_∇fk ≤ ϵ - header = ["iter", "f", "‖∇f‖", "α"] + header = ["iter", "f", "‖∇f‖"] + is_r2 ? push!(header,"σ") : push!(header,"Δ") if optimal @info("Optimal point found at initial point") if is_r2 From 02caa7b206194d67196af214c71ebaab766d4d39 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 5 Mar 2024 15:14:32 -0500 Subject: [PATCH 063/171] update docstring --- src/fomo.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index e51e5b8a..ed07d668 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -13,8 +13,8 @@ struct r2_step <: AbstractFOMethod end A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model. The step is perform along d with d = - (1-βmax) .* ∇f(xk) - βmax .* mk (1) -with mk the memory of past gradients updated at each successful iteration as -mk .= ∇f(xk) .* (1 - βmax) .+ momentum .* βmax (2) +with mk the memory of past gradients (initiated with 0) updated at each successful iteration as +mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax (2) and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied: (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (3) ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (4) From d0a02b9d5e40e390b87c971bc1eec0435d85438e Mon Sep 17 00:00:00 2001 From: d-monnet Date: Wed, 6 Mar 2024 12:02:58 -0500 Subject: [PATCH 064/171] fix grad and momentum dot product --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index ed07d668..d31c6b0e 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -311,12 +311,12 @@ function SolverCore.solve!( x .= c if use_momentum momentum .= ∇fk .* (oneT - β) .+ momentum .* β - mdot∇f = dot(momentum, ∇fk) end set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if use_momentum + mdot∇f = dot(momentum, ∇fk) p .= momentum .- ∇fk βmax = find_beta(p, mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax From efcba5030ead91c5fb438f681e5d6371eabb2bb3 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Wed, 6 Mar 2024 12:19:05 -0500 Subject: [PATCH 065/171] cosmetics --- src/fomo.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index d31c6b0e..0d15d1cd 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -48,7 +48,7 @@ For advanced usage: - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (3). -- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). +- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. @@ -385,8 +385,8 @@ with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V} n1 = norm_∇f^2 - mdot∇f n2 = norm(p) - β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / (n1) : β - β2 = n2 != 0 ? (1 - θ2) * norm_∇f / (n2) : β + β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / n1 : β + β2 = n2 != 0 ? (1 - θ2) * norm_∇f / n2 : β return min(β, min(β1, β2)) end From c89db514d67abeb8547f592c1dbe6ac1e320cc97 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Thu, 7 Mar 2024 10:37:12 -0500 Subject: [PATCH 066/171] Update test/restart.jl Co-authored-by: Tangi Migot --- test/restart.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/restart.jl b/test/restart.jl index eb770739..e6b75cc1 100644 --- a/test/restart.jl +++ b/test/restart.jl @@ -44,7 +44,7 @@ end end @testset "Test restart with a different problem: $fun" for (fun, s) in ( - (:R2, :FomoSolver), + (:R2, :FoSolver), (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), From 8a655c78d6a93cf3eedf4ccd936d8dd69d099d61 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Thu, 7 Mar 2024 10:37:22 -0500 Subject: [PATCH 067/171] Update test/restart.jl Co-authored-by: Tangi Migot --- test/restart.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/restart.jl b/test/restart.jl index e6b75cc1..38765465 100644 --- a/test/restart.jl +++ b/test/restart.jl @@ -1,5 +1,5 @@ @testset "Test restart with a different initial guess: $fun" for (fun, s) in ( - (:R2, :FomoSolver), + (:R2, :FoSolver), (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), From 1f67091b49288952fbd8e02c540a4ec1652bf98e Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 7 Mar 2024 11:18:24 -0500 Subject: [PATCH 068/171] Create FoSolver docstring --- src/fomo.jl | 130 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 89 insertions(+), 41 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 0d15d1cd..2e2527e1 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -8,29 +8,26 @@ struct r2_step <: AbstractFOMethod end """ fomo(nlp; kwargs...) - R2(nlp; kwargs...) A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model. + +# Algorithm description + The step is perform along d with -d = - (1-βmax) .* ∇f(xk) - βmax .* mk (1) +d = - (1-βmax) .* ∇f(xk) - βmax .* mk with mk the memory of past gradients (initiated with 0) updated at each successful iteration as -mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax (2) +mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied: -(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (3) -‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (4) +(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (1) +‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (2) +# Advanced usage For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: solver = FomoSolver(nlp) solve!(solver, nlp; kwargs...) -**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods. - -For advanced usage: - - solver = FoSolver(nlp) - solve!(solver, nlp; step_bakckend = r2_step(),kwargs...) # for Quadratic Regularization (R2) step: s = - α .* ∇f(x) - solve!(solver, nlp; step_bakckend = tr_step(),kwargs...) # for linear model Trust Region (TR) step: s = - α .* ∇f(x) ./ ‖∇f(x)‖ +**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `fo` method. # Arguments - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. @@ -47,8 +44,8 @@ For advanced usage: - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. -- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (3). -- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). +- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (1). +- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (2). - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. @@ -91,27 +88,6 @@ stats = solve!(solver, nlp) # output -"Execution stats: first-order stationary" -``` -## `R2` -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -stats = R2(nlp) - -# output - -"Execution stats: first-order stationary" -``` - -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -solver = FoSolver(nlp); -stats = solve!(solver, nlp) - -# output - "Execution stats: first-order stationary" ``` """ @@ -149,7 +125,79 @@ end SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) -@doc (@doc FomoSolver) mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver +""" + fo(nlp; kwargs...) + R2(nlp; kwargs...) + TR(nlp; kwargs...) + +A First-Order (FO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model. + +For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = FoSolver(nlp) + solve!(solver, nlp; kwargs...) + +`R2` and `TR` runs `fo` with the dedicated `step_backend` keyword argument. + +# Arguments +- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess. +- `atol::T = √eps(T)`: absolute tolerance. +- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. +- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. +- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. +- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. +- `max_eval::Int = -1`: maximum number of evaluation of the objective function. +- `max_time::Float64 = 30.0`: maximum time limit in seconds. +- `max_iter::Int = typemax(Int)`: maximum number of iterations. +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. +- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user || stats.stats = :unknown` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.x`: current iterate; +- `solver.gx`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.dual_feas`: norm of current gradient; + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. + +# Examples + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +stats = fo(nlp) # run with step_backend = r2_step(), equivalent to R2(nlp) + +# output + +"Execution stats: first-order stationary" +``` + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +solver = FoSolver(nlp); +stats = solve!(solver, nlp) + +# output + +"Execution stats: first-order stationary" +``` +""" +mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver x::V g::V c::V @@ -163,17 +211,17 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} return FoSolver{T, V}(x, g, c, T(0)) end -@doc (@doc FomoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} +@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FoSolver(nlp) stats = GenericExecutionStats(nlp) return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...) end -@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} +@doc (@doc FoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} fo(nlp; step_backend = r2_step(), kwargs...) end -@doc (@doc FomoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} +@doc (@doc FoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} fo(nlp; step_backend = tr_step(), kwargs...) end @@ -184,7 +232,7 @@ end SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver) function SolverCore.solve!( - solver::Union{FoSolver,FomoSolver}, + solver::Union{FoSolver, FomoSolver}, nlp::AbstractNLPModel{T, V}, stats::GenericExecutionStats{T, V}; callback = (args...) -> nothing, @@ -233,7 +281,7 @@ function SolverCore.solve!( ϵ = atol + rtol * norm_∇fk optimal = norm_∇fk ≤ ϵ header = ["iter", "f", "‖∇f‖"] - is_r2 ? push!(header,"σ") : push!(header,"Δ") + is_r2 ? push!(header, "σ") : push!(header, "Δ") if optimal @info("Optimal point found at initial point") if is_r2 From 0e2fdff0b05797b8d2598a9ca916c422c765fb02 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Fri, 8 Mar 2024 11:31:07 -0500 Subject: [PATCH 069/171] deprecate R2Solver --- src/fomo.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fomo.jl b/src/fomo.jl index 2e2527e1..002ba712 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -211,6 +211,8 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} return FoSolver{T, V}(x, g, c, T(0)) end +Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(nlp::AbstractNLPModel; kwargs...) + @doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FoSolver(nlp) stats = GenericExecutionStats(nlp) From 9e58f9011d829f1d047c607a2c129321e4d166e0 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:13:41 -0500 Subject: [PATCH 070/171] Update src/fomo.jl Co-authored-by: Tangi Migot --- src/fomo.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/fomo.jl b/src/fomo.jl index 002ba712..9009c36a 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -211,6 +211,11 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} return FoSolver{T, V}(x, g, c, T(0)) end +""" + `R2Solver` is deprecated, please check the documentation of `R2`. +""" +mutable struct R2Solver{T, V} <: AbstractOptimizationSolver end + Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(nlp::AbstractNLPModel; kwargs...) @doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} From b02a20f73fdfdcbacea9231053fa55785a11b3d6 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:14:27 -0500 Subject: [PATCH 071/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 9009c36a..ba22964b 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -13,7 +13,7 @@ A First-Order with MOmentum (FOMO) model-based method for unconstrained optimiza # Algorithm description -The step is perform along d with +The step is computed along d = - (1-βmax) .* ∇f(xk) - βmax .* mk with mk the memory of past gradients (initiated with 0) updated at each successful iteration as mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax From 670e815761736defa4a2c6a4754258886d4782bb Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:14:45 -0500 Subject: [PATCH 072/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index ba22964b..20b5cce1 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -15,7 +15,7 @@ A First-Order with MOmentum (FOMO) model-based method for unconstrained optimiza The step is computed along d = - (1-βmax) .* ∇f(xk) - βmax .* mk -with mk the memory of past gradients (initiated with 0) updated at each successful iteration as +with mk the memory of past gradients (initialized at 0), and updated at each successful iteration as mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied: (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (1) From ee02796e626743bad740f9a92a7f66228d2dd2c0 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:15:14 -0500 Subject: [PATCH 073/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index 20b5cce1..35d497a4 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -22,6 +22,7 @@ and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the follow ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (2) # Advanced usage + For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: solver = FomoSolver(nlp) From 4d6e0e335fbf99c9776a5c1c57a9194809a57445 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:15:36 -0500 Subject: [PATCH 074/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index 35d497a4..ac78ecbc 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -31,6 +31,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i **No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `fo` method. # Arguments + - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. # Keyword arguments From 1eafb0d36dcf9ecccdc75676ee970ab1becff227 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:15:57 -0500 Subject: [PATCH 075/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index ac78ecbc..88f42570 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -35,6 +35,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. # Keyword arguments + - `x::V = nlp.meta.x0`: the initial guess. - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. From ae75016ec86efa8f031b7797d3194c7bb729690f Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:16:14 -0500 Subject: [PATCH 076/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 88f42570..edc5608a 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -432,7 +432,7 @@ function SolverCore.solve!( end """ -find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2) + find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2) Compute βmax which saturates the contibution of the momentum term to the gradient. `βmax` is computed such that the two gradient-related conditions are ensured: From 4fe1a1403b8de444f936a176bec40f602540e0b5 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:16:30 -0500 Subject: [PATCH 077/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index edc5608a..6cf1e221 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -434,7 +434,7 @@ end """ find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2) -Compute βmax which saturates the contibution of the momentum term to the gradient. +Compute value `βmax` that saturates the contribution of the momentum term to the gradient. `βmax` is computed such that the two gradient-related conditions are ensured: 1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖² 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖ From df12e042e6e474c9ea8cf1c1dbb79fb8ffa983ab Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:16:48 -0500 Subject: [PATCH 078/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 6cf1e221..2043fa07 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -449,8 +449,8 @@ function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where end """ - init_alpha(norm_∇fk::T, ::r2_step) - init_alpha(norm_∇fk::T, ::tr_step) + init_alpha(norm_∇fk::T, ::r2_step) + init_alpha(norm_∇fk::T, ::tr_step) Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. """ From 22c1de5a7c048a18a701386b6f4952e2b8d6dbfb Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:17:11 -0500 Subject: [PATCH 079/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 2043fa07..b5ebcdfc 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -452,7 +452,8 @@ end init_alpha(norm_∇fk::T, ::r2_step) init_alpha(norm_∇fk::T, ::tr_step) -Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. +Initialize `α` step size parameter. +Ensure first step is the same for quadratic regularization and trust region methods. """ function init_alpha(norm_∇fk::T, ::r2_step) where {T} 1 / 2^round(log2(norm_∇fk + 1)) From 6f63df08b0198beca21001a3792c2b66961a6899 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:17:25 -0500 Subject: [PATCH 080/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index b5ebcdfc..96c994c3 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -464,8 +464,8 @@ function init_alpha(norm_∇fk::T, ::tr_step) where {T} end """ - step_mult(α::T, norm_∇fk::T, ::r2_step) - step_mult(α::T, norm_∇fk::T, ::tr_step) + step_mult(α::T, norm_∇fk::T, ::r2_step) + step_mult(α::T, norm_∇fk::T, ::tr_step) Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`). """ From d350cdbd1b3fb894aead9df71ef2c608fc439258 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:17:48 -0500 Subject: [PATCH 081/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 96c994c3..917f1fff 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -43,7 +43,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. - `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. -- `max_eval::Int = -1`: maximum number of evaluation of the objective function. +- `max_eval::Int = -1`: maximum number of objective evaluations. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. From 9ec41f8de9d77a0f75b3337d836c3d891be3c9e3 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:19:42 -0500 Subject: [PATCH 082/171] Apply suggestions from code review Co-authored-by: Dominique --- src/fomo.jl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 917f1fff..560d7505 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -46,16 +46,18 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `max_eval::Int = -1`: maximum number of objective evaluations. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. -- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (1). -- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (2). +- `β = T(0.9) ∈ [0,1)`: target decay rate for the momentum. +- `θ1 = T(0.1)`: momentum contribution parameter for convergence condition (1). +- `θ2 = T(eps(T)^(1/3))`: momentum contribution parameter for convergence condition (2). - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. # Output + The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback + The callback is called at each iteration. The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. Changing any of the input arguments will affect the subsequent iterations. @@ -72,7 +74,9 @@ Notably, you can access, and modify, the following: - `stats.elapsed_time`: elapsed time in seconds. # Examples + ## `fomo` + ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) @@ -143,9 +147,11 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i `R2` and `TR` runs `fo` with the dedicated `step_backend` keyword argument. # Arguments + - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. # Keyword arguments + - `x::V = nlp.meta.x0`: the initial guess. - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. @@ -159,9 +165,11 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. # Output + The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback + The callback is called at each iteration. The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. Changing any of the input arguments will affect the subsequent iterations. From eacf70b81ab2b314a97019cb8b72a5c95e273e7e Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 21 Dec 2023 12:04:12 -0500 Subject: [PATCH 083/171] add fomo solver --- docs/src/solvers.md | 4 +- src/JSOSolvers.jl | 1 + src/fomo.jl | 288 +++++++++++++++++++++++++++++++++++++++++++ test/allocs.jl | 2 +- test/callback.jl | 5 + test/consistency.jl | 5 +- test/restart.jl | 2 + test/runtests.jl | 2 +- test/test_solvers.jl | 1 + 9 files changed, 305 insertions(+), 5 deletions(-) create mode 100644 src/fomo.jl diff --git a/docs/src/solvers.md b/docs/src/solvers.md index 06fe0eed..322f7c2e 100644 --- a/docs/src/solvers.md +++ b/docs/src/solvers.md @@ -6,10 +6,11 @@ - [`tron`](@ref) - [`trunk`](@ref) - [`R2`](@ref) +- [`fomo`](@ref) | Problem type | Solvers | | --------------------- | -------- | -| Unconstrained NLP | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref)| +| Unconstrained NLP | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref), [`fomo`](@ref)| | Unconstrained NLS | [`trunk`](@ref), [`tron`](@ref) | | Bound-constrained NLP | [`tron`](@ref) | | Bound-constrained NLS | [`tron`](@ref) | @@ -21,4 +22,5 @@ lbfgs tron trunk R2 +fomo ``` diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl index cd65c9b2..79abace3 100644 --- a/src/JSOSolvers.jl +++ b/src/JSOSolvers.jl @@ -14,6 +14,7 @@ export solve! include("lbfgs.jl") include("trunk.jl") include("R2.jl") +include("fomo.jl") # Unconstrained solvers for NLS include("trunkls.jl") diff --git a/src/fomo.jl b/src/fomo.jl new file mode 100644 index 00000000..711e7fa3 --- /dev/null +++ b/src/fomo.jl @@ -0,0 +1,288 @@ +export fomo, FomoSolver, tr, qr + +abstract type AbstractFomoMethod end + +struct tr <: AbstractFomoMethod end +struct qr <: AbstractFomoMethod end + +""" + fomo(nlp; kwargs...) + +A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods. + +For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = FomoSolver(nlp) + solve!(solver, nlp; kwargs...) + +# Arguments +- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess. +- `atol::T = √eps(T)`: absolute tolerance. +- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. +- `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters. +- `κg = T(0.8)` : maximum contribution of momentum term to the gradient, ||∇f-g||≤κg||g|| with g = (1-β)∇f + β m, with m memory of past gradients. Must satisfy 0 < κg < 1 - η2. +- `γ1 = T(0.8)`, `γ2 = T(1.2)`: regularization update parameters. +- `αmax = 1/eps(T)`: step parameter for fomo algorithm. +- `max_eval::Int = -1`: maximum number of evaluation of the objective function. +- `max_time::Float64 = 30.0`: maximum time limit in seconds. +- `max_iter::Int = typemax(Int)`: maximum number of iterations. +- `β = T(0) ∈ [0,1)` : constant in the momentum term. +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. +- `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.x`: current iterate; +- `solver.gx`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.dual_feas`: norm of current gradient; + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. + +# Examples +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +stats = fomo(nlp) + +# output + +"Execution stats: first-order stationary" +``` + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +solver = FomoSolver(nlp); +stats = solve!(solver, nlp) + +# output + +"Execution stats: first-order stationary" +``` +""" +mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver + x::V + g::V + c::V + m::V +end + +function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} + x = similar(nlp.meta.x0) + g = similar(nlp.meta.x0) + c = similar(nlp.meta.x0) + m = fill!(similar(nlp.meta.x0), 0) + return FomoSolver{T, V}(x, g, c, m) +end + +@doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} + solver = FomoSolver(nlp) + return solve!(solver, nlp; kwargs...) +end + +function SolverCore.reset!(solver::FomoSolver{T}) where {T} + fill!(solver.m,0) + solver +end +SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) + +function SolverCore.solve!( + solver::FomoSolver{T, V}, + nlp::AbstractNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = nlp.meta.x0, + atol::T = √eps(T), + rtol::T = √eps(T), + η1 = eps(T)^(1 / 4), + η2 = T(0.2), + κg = T(0.8), + γ1 = T(0.5), + γ2 = T(2), + αmax = 1/eps(T), + max_time::Float64 = 30.0, + max_eval::Int = -1, + max_iter::Int = typemax(Int), + β::T = T(0.9), + verbose::Int = 0, + backend = qr() +) where {T, V} + unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") + + reset!(stats) + start_time = time() + set_time!(stats, 0.0) + + x = solver.x .= x + ∇fk = solver.g + c = solver.c + m = solver.m + + set_iter!(stats, 0) + set_objective!(stats, obj(nlp, x)) + + grad!(nlp, x, ∇fk) + norm_∇fk = norm(∇fk) + set_dual_residual!(stats, norm_∇fk) + + αk = init_alpha(norm_∇fk,backend) + + # Stopping criterion: + ϵ = atol + rtol * norm_∇fk + optimal = norm_∇fk ≤ ϵ + if optimal + @info("Optimal point found at initial point") + @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk + end + if verbose > 0 && mod(stats.iter, verbose) == 0 + @info @sprintf "%5s %9s %7s %7s %7s" "iter" "f" "‖∇f‖" "α" "staβ" + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN + end + + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + optimal = optimal, + max_eval = max_eval, + iter = stats.iter, + max_iter = max_iter, + max_time = max_time, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + + while !done + λk = step_mult(αk,norm_∇fk,backend) + if β == 0 + c .= x .- λk .* (∇fk) + else + satβ = find_beta(β, κg, m, ∇fk) + c .= x .- λk .* (∇fk .* (T(1) - satβ) .+ m .* satβ) + m .= ∇fk .* (T(1) - β) .+ m .* β + end + ΔTk = norm_∇fk^2 * λk + fck = obj(nlp, c) + if fck == -Inf + set_status!(stats, :unbounded) + break + end + + ρk = (stats.objective - fck) / ΔTk + + # Update regularization parameters + if ρk >= η2 + αk = min(αmax, γ2 * αk) + elseif ρk < η1 + αk = αk * γ1 + end + + # Acceptance of the new candidate + if ρk >= η1 + x .= c + set_objective!(stats, fck) + grad!(nlp, x, ∇fk) + norm_∇fk = norm(∇fk) + end + + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + set_dual_residual!(stats, norm_∇fk) + optimal = norm_∇fk ≤ ϵ + + if verbose > 0 && mod(stats.iter, verbose) == 0 + @info infoline + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk satβ + end + + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + optimal = optimal, + max_eval = max_eval, + iter = stats.iter, + max_iter = max_iter, + max_time = max_time, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + end + + set_solution!(stats, x) + return stats +end + +""" + find_beta(β,κg,d,∇f;tol=0.01) + +Compute satβ which saturates the contibution of the momentum term to the gradient. +Use bisection method to solve satβ * ||∇f .- d|| = κg * ||(1-satβ) .* ∇f + satβ .* d|| where d is the momentum term. +""" +function find_beta(β::T,κg::T,d::V,∇f::V;tol=0.01) where {T,V} + if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0. + return β + end + a = T(0) + b = β + while b-a > tol + β = (b+a) / 2 + if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + b .* d) <= 0 + a = β + else + b = β + end + end + return β +end + +""" + init_alpha(norm_∇fk::T, ::qr) + init_alpha(norm_∇fk::T, ::tr) + +Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. +""" +function init_alpha(norm_∇fk::T, ::qr) where{T} + 1/2^round(log2(norm_∇fk + 1)) +end + +function init_alpha(norm_∇fk::T, ::tr) where{T} + norm_∇fk/2^round(log2(norm_∇fk + 1)) +end + +""" + step_mult(αk::T, norm_∇fk::T, ::qr) + step_mult(αk::T, norm_∇fk::T, ::tr) + +Compute step size multiplier: `αk` for quadratic regularization(`::qr`) and `αk/norm_∇fk` for trust region (`::tr`). +""" +function step_mult(αk::T, norm_∇fk::T, ::qr) where{T} + αk +end + +function step_mult(αk::T, norm_∇fk::T, ::tr) where{T} + αk/norm_∇fk +end \ No newline at end of file diff --git a/test/allocs.jl b/test/allocs.jl index b02b6621..f029c5f1 100644 --- a/test/allocs.jl +++ b/test/allocs.jl @@ -30,7 +30,7 @@ end if Sys.isunix() @testset "Allocation tests" begin - @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :TrunkSolver, :TronSolver) + @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver) for model in NLPModelsTest.nlp_problems nlp = eval(Meta.parse(model))() if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver)) diff --git a/test/callback.jl b/test/callback.jl index f43796fd..ddadc799 100644 --- a/test/callback.jl +++ b/test/callback.jl @@ -31,6 +31,11 @@ using ADNLPModels, JSOSolvers, LinearAlgebra, Logging #, Plots tron(nlp, callback = cb) end @test stats.iter == 8 + + stats = with_logger(NullLogger()) do + fomo(nlp, callback = cb) + end + @test stats.iter == 8 end @testset "Test callback for NLS" begin diff --git a/test/consistency.jl b/test/consistency.jl index 94569dca..fb725b5b 100644 --- a/test/consistency.jl +++ b/test/consistency.jl @@ -10,8 +10,9 @@ function consistency() @testset "Consistency" begin args = Pair{Symbol, Number}[:atol => 1e-6, :rtol => 1e-6, :max_eval => 20000, :max_time => 60.0] - @testset "NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2] + @testset "NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo] with_logger(NullLogger()) do + reset!(unlp) stats = mtd(unlp; args...) @test stats isa GenericExecutionStats @test stats.status == :first_order @@ -27,7 +28,7 @@ function consistency() end end - @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2] + @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo] with_logger(NullLogger()) do reset!(qnlp) stats = mtd(qnlp; args...) diff --git a/test/restart.jl b/test/restart.jl index 02d90902..98f82103 100644 --- a/test/restart.jl +++ b/test/restart.jl @@ -1,5 +1,6 @@ @testset "Test restart with a different initial guess: $fun" for (fun, s) in ( (:R2, :R2Solver), + (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), (:trunk, :TrunkSolver), @@ -44,6 +45,7 @@ end @testset "Test restart with a different problem: $fun" for (fun, s) in ( (:R2, :R2Solver), + (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), (:trunk, :TrunkSolver), diff --git a/test/runtests.jl b/test/runtests.jl index de0295ed..bb41eeba 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,7 +18,7 @@ using JSOSolvers end @testset "Test iteration limit" begin - @testset "$fun" for fun in (R2, lbfgs, tron, trunk) + @testset "$fun" for fun in (R2, fomo, lbfgs, tron, trunk) f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 nlp = ADNLPModel(f, [-1.2; 1.0]) diff --git a/test/test_solvers.jl b/test/test_solvers.jl index cb41e83e..ddad51e8 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -8,6 +8,7 @@ function tests() ("lbfgs", lbfgs), ("tron", tron), ("R2", R2), + ("fomo", fomo), ] unconstrained_nlp(solver) multiprecision_nlp(solver, :unc) From 48ca6f65df3d16b038ab1e76c23c1f190d98fb16 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 28 Dec 2023 16:54:39 -0500 Subject: [PATCH 084/171] fix update rule, fix find_beta algo --- src/fomo.jl | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 711e7fa3..41e74145 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -151,7 +151,7 @@ function SolverCore.solve!( end if verbose > 0 && mod(stats.iter, verbose) == 0 @info @sprintf "%5s %9s %7s %7s %7s" "iter" "f" "‖∇f‖" "α" "staβ" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk 0 end set_status!( @@ -171,14 +171,13 @@ function SolverCore.solve!( done = stats.status != :unknown + satβ = T(0) while !done λk = step_mult(αk,norm_∇fk,backend) if β == 0 c .= x .- λk .* (∇fk) else - satβ = find_beta(β, κg, m, ∇fk) c .= x .- λk .* (∇fk .* (T(1) - satβ) .+ m .* satβ) - m .= ∇fk .* (T(1) - β) .+ m .* β end ΔTk = norm_∇fk^2 * λk fck = obj(nlp, c) @@ -186,9 +185,9 @@ function SolverCore.solve!( set_status!(stats, :unbounded) break end - + ρk = (stats.objective - fck) / ΔTk - + # Update regularization parameters if ρk >= η2 αk = min(αmax, γ2 * αk) @@ -199,9 +198,15 @@ function SolverCore.solve!( # Acceptance of the new candidate if ρk >= η1 x .= c + if β!=0 + m .= ∇fk .* (T(1) - β) .+ m .* β + end set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) + if β!=0 + satβ = find_beta(β, κg, m, ∇fk) + end end set_iter!(stats, stats.iter + 1) @@ -250,13 +255,13 @@ function find_beta(β::T,κg::T,d::V,∇f::V;tol=0.01) where {T,V} b = β while b-a > tol β = (b+a) / 2 - if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + b .* d) <= 0 + if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0 a = β else b = β end end - return β + return a end """ From 454a8e1238e2ae4c56949fb4f77724485574548e Mon Sep 17 00:00:00 2001 From: d-monnet Date: Fri, 5 Jan 2024 13:37:59 -0500 Subject: [PATCH 085/171] modify with gradient related strategy --- src/fomo.jl | 69 +++++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 41e74145..db77937e 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -23,8 +23,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. - `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters. -- `κg = T(0.8)` : maximum contribution of momentum term to the gradient, ||∇f-g||≤κg||g|| with g = (1-β)∇f + β m, with m memory of past gradients. Must satisfy 0 < κg < 1 - η2. -- `γ1 = T(0.8)`, `γ2 = T(1.2)`: regularization update parameters. +- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. - `αmax = 1/eps(T)`: step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. @@ -79,6 +78,7 @@ mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver g::V c::V m::V + d::V end function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} @@ -86,7 +86,8 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} g = similar(nlp.meta.x0) c = similar(nlp.meta.x0) m = fill!(similar(nlp.meta.x0), 0) - return FomoSolver{T, V}(x, g, c, m) + d = fill!(similar(nlp.meta.x0), 0) + return FomoSolver{T, V}(x, g, c, m, d) end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} @@ -109,8 +110,7 @@ function SolverCore.solve!( atol::T = √eps(T), rtol::T = √eps(T), η1 = eps(T)^(1 / 4), - η2 = T(0.2), - κg = T(0.8), + η2 = T(0.95), γ1 = T(0.5), γ2 = T(2), αmax = 1/eps(T), @@ -131,7 +131,7 @@ function SolverCore.solve!( ∇fk = solver.g c = solver.c m = solver.m - + d = solver.d set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) @@ -171,15 +171,23 @@ function SolverCore.solve!( done = stats.status != :unknown + d .= ∇fk + norm_d = norm_∇fk satβ = T(0) + ρk = T(0) while !done - λk = step_mult(αk,norm_∇fk,backend) - if β == 0 - c .= x .- λk .* (∇fk) - else - c .= x .- λk .* (∇fk .* (T(1) - satβ) .+ m .* satβ) - end - ΔTk = norm_∇fk^2 * λk + # if β!=0 + # satβ = find_beta(β, m, ∇fk, norm_∇fk) + # d .= ∇fk .* (T(1) - satβ) .+ m .* satβ + # m .= ∇fk .* (T(1) - β) .+ m .* β + # norm_d = norm(d) + # else + # d .= ∇fk + # norm_d = norm_∇fk + # end + λk = step_mult(αk,norm_d,backend) + c .= x .- λk .* d + ΔTk = norm_∇fk^2 *λk fck = obj(nlp, c) if fck == -Inf set_status!(stats, :unbounded) @@ -187,6 +195,7 @@ function SolverCore.solve!( end ρk = (stats.objective - fck) / ΔTk + # ρk = (1-β) * (stats.objective - fck) / ΔTk +β * ρk # Update regularization parameters if ρk >= η2 @@ -204,9 +213,15 @@ function SolverCore.solve!( set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) - if β!=0 - satβ = find_beta(β, κg, m, ∇fk) + if β!= 0 + satβ = find_beta(β, m, ∇fk, norm_∇fk) + d .= ∇fk .* (T(1) - satβ) .+ m .* satβ + norm_d = norm(d) + else + d .= ∇fk + norm_d = norm_∇fk end + end set_iter!(stats, stats.iter + 1) @@ -216,7 +231,7 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk satβ + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk satβ end set_status!( @@ -242,26 +257,18 @@ function SolverCore.solve!( end """ - find_beta(β,κg,d,∇f;tol=0.01) + find_beta(β,m,∇f,norm_∇f,θ) Compute satβ which saturates the contibution of the momentum term to the gradient. -Use bisection method to solve satβ * ||∇f .- d|| = κg * ||(1-satβ) .* ∇f + satβ .* d|| where d is the momentum term. +satβ is computed such that m.∇f > θ * norm_∇f^2 """ -function find_beta(β::T,κg::T,d::V,∇f::V;tol=0.01) where {T,V} - if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0. +function find_beta(β::T,m::V,∇f::V,norm_∇f::T;θ = T(1e-1)) where {T,V} + dotprod = dot(m,∇f) + if dotprod > θ * norm_∇f^2 return β + else + return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β) end - a = T(0) - b = β - while b-a > tol - β = (b+a) / 2 - if β * norm( ∇f .- d) - κg * norm((1-β) .* ∇f + β .* d) <= 0 - a = β - else - b = β - end - end - return a end """ From 0a75bb257d6cab537e58fb919aae9338ac560d28 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Fri, 5 Jan 2024 15:11:20 -0500 Subject: [PATCH 086/171] fix model decrease computation --- src/fomo.jl | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index db77937e..e2b582e8 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -176,18 +176,9 @@ function SolverCore.solve!( satβ = T(0) ρk = T(0) while !done - # if β!=0 - # satβ = find_beta(β, m, ∇fk, norm_∇fk) - # d .= ∇fk .* (T(1) - satβ) .+ m .* satβ - # m .= ∇fk .* (T(1) - β) .+ m .* β - # norm_d = norm(d) - # else - # d .= ∇fk - # norm_d = norm_∇fk - # end λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d - ΔTk = norm_∇fk^2 *λk + ΔTk = dot(∇fk , d) * λk fck = obj(nlp, c) if fck == -Inf set_status!(stats, :unbounded) From f981b2feb1351fd0b3ba34de74a517bf52353a20 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Mon, 15 Jan 2024 15:48:56 -0500 Subject: [PATCH 087/171] fix find_beta function --- src/fomo.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index e2b582e8..ed3a1340 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -175,6 +175,7 @@ function SolverCore.solve!( norm_d = norm_∇fk satβ = T(0) ρk = T(0) + #μ = αk while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d @@ -184,10 +185,7 @@ function SolverCore.solve!( set_status!(stats, :unbounded) break end - ρk = (stats.objective - fck) / ΔTk - # ρk = (1-β) * (stats.objective - fck) / ΔTk +β * ρk - # Update regularization parameters if ρk >= η2 αk = min(αmax, γ2 * αk) @@ -199,8 +197,11 @@ function SolverCore.solve!( if ρk >= η1 x .= c if β!=0 + #μ = αk * (T(1) - β) + αk * β + #m .= (αk/μ) .* ∇fk .* (T(1) - β) .+ m .* β m .= ∇fk .* (T(1) - β) .+ m .* β end + #αk = μ set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) @@ -222,7 +223,7 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk satβ + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk satβ end set_status!( @@ -255,10 +256,11 @@ satβ is computed such that m.∇f > θ * norm_∇f^2 """ function find_beta(β::T,m::V,∇f::V,norm_∇f::T;θ = T(1e-1)) where {T,V} dotprod = dot(m,∇f) - if dotprod > θ * norm_∇f^2 + if (1-β)*norm_∇f^2 + β*dotprod > θ * norm_∇f^2 return β else - return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β) + return ((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod) + #return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β) end end From 7007e521fb33db94588e69e5b73c0586418cc94d Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 25 Jan 2024 11:53:34 -0500 Subject: [PATCH 088/171] fix null step size issue --- src/fomo.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index ed3a1340..a0a543ee 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -238,7 +238,8 @@ function SolverCore.solve!( max_time = max_time, ), ) - + + αk == 0 && set_status!(stats,:exception) callback(nlp, solver, stats) done = stats.status != :unknown From 943d788a969d74bcc62eb02d7ffb2230cd0f4b71 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 25 Jan 2024 12:33:40 -0500 Subject: [PATCH 089/171] fix test --- test/test_solvers.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_solvers.jl b/test/test_solvers.jl index ddad51e8..ba182731 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -8,7 +8,8 @@ function tests() ("lbfgs", lbfgs), ("tron", tron), ("R2", R2), - ("fomo", fomo), + ("fomo_r2", fomo), + ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,backend = JSOSolvers.tr(); kwargs...)), ] unconstrained_nlp(solver) multiprecision_nlp(solver, :unc) From 3555443b73785acb416465917addd8505b82dbb2 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 25 Jan 2024 12:49:29 -0500 Subject: [PATCH 090/171] update docstring --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index a0a543ee..215d2d94 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -28,7 +28,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `β = T(0) ∈ [0,1)` : constant in the momentum term. +- `β = T(0) ∈ [0,1)` : decay rate for the momentum. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region From 5e637393738f7974634c35861c874956c43f7d0a Mon Sep 17 00:00:00 2001 From: d-monnet Date: Mon, 29 Jan 2024 15:14:16 -0500 Subject: [PATCH 091/171] add average sat beta to genericexecutionstat --- src/fomo.jl | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 215d2d94..af4f521f 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -92,7 +92,9 @@ end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FomoSolver(nlp) - return solve!(solver, nlp; kwargs...) + solver_specific = Dict(:avgsatβ => T(0.)) + stats = GenericExecutionStats(nlp;solver_specific=solver_specific) + return solve!(solver, nlp, stats; kwargs...) end function SolverCore.reset!(solver::FomoSolver{T}) where {T} @@ -175,6 +177,8 @@ function SolverCore.solve!( norm_d = norm_∇fk satβ = T(0) ρk = T(0) + avgsatβ = T(0.) + siter = 0 #μ = αk while !done λk = step_mult(αk,norm_d,backend) @@ -213,7 +217,8 @@ function SolverCore.solve!( d .= ∇fk norm_d = norm_∇fk end - + avgsatβ += satβ + siter += 1 end set_iter!(stats, stats.iter + 1) @@ -245,6 +250,8 @@ function SolverCore.solve!( done = stats.status != :unknown end + avgsatβ /= siter + stats.solver_specific[:avgsatβ] = avgsatβ set_solution!(stats, x) return stats end From 23bc42216e0b284d5abebbc5b6a607acf55e52b7 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 30 Jan 2024 15:14:18 -0500 Subject: [PATCH 092/171] add theta param as key arg --- src/fomo.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index af4f521f..a746483e 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -29,6 +29,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0) ∈ [0,1)` : decay rate for the momentum. +- `θ = T(0.1)` : momentum contribution restriction parameter. [(1-β)∇f(xk) + β mk].[∇f(xk)] ≥ θ||∇f(xk)||², with mk memory of past gradient. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region @@ -120,6 +121,7 @@ function SolverCore.solve!( max_eval::Int = -1, max_iter::Int = typemax(Int), β::T = T(0.9), + θ::T = T(0.1), verbose::Int = 0, backend = qr() ) where {T, V} @@ -210,7 +212,7 @@ function SolverCore.solve!( grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if β!= 0 - satβ = find_beta(β, m, ∇fk, norm_∇fk) + satβ = find_beta(β, m, ∇fk, norm_∇fk, θ) d .= ∇fk .* (T(1) - satβ) .+ m .* satβ norm_d = norm(d) else @@ -262,13 +264,12 @@ end Compute satβ which saturates the contibution of the momentum term to the gradient. satβ is computed such that m.∇f > θ * norm_∇f^2 """ -function find_beta(β::T,m::V,∇f::V,norm_∇f::T;θ = T(1e-1)) where {T,V} +function find_beta(β::T,m::V,∇f::V,norm_∇f::T, θ::T) where {T,V} dotprod = dot(m,∇f) if (1-β)*norm_∇f^2 + β*dotprod > θ * norm_∇f^2 return β else return ((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod) - #return min(((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod),β) end end From bee01c3b80fc80ac740f85d5fa287b4f8ca454b6 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 1 Feb 2024 12:25:34 -0500 Subject: [PATCH 093/171] update convergence conditions in find_beta. add satbeta decrease strategy if iteration is unsuccessful. --- src/fomo.jl | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index a746483e..557b1a29 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -24,12 +24,14 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. - `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters. - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. +- `γ3 = T(1/2)` : momentum factor satβ update parameter in case of unsuccessful iteration. - `αmax = 1/eps(T)`: step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `β = T(0) ∈ [0,1)` : decay rate for the momentum. -- `θ = T(0.1)` : momentum contribution restriction parameter. [(1-β)∇f(xk) + β mk].[∇f(xk)] ≥ θ||∇f(xk)||², with mk memory of past gradient. +- `β = T(0) ∈ [0,1)` : target decay rate for the momentum. +- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β]. +- `θ2 = T(1e-5)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region @@ -116,12 +118,14 @@ function SolverCore.solve!( η2 = T(0.95), γ1 = T(0.5), γ2 = T(2), + γ3 = T(1/2), αmax = 1/eps(T), max_time::Float64 = 30.0, max_eval::Int = -1, max_iter::Int = typemax(Int), β::T = T(0.9), - θ::T = T(0.1), + θ1::T = T(1e-5), + θ2::T = T(1e-5), verbose::Int = 0, backend = qr() ) where {T, V} @@ -181,6 +185,7 @@ function SolverCore.solve!( ρk = T(0) avgsatβ = T(0.) siter = 0 + #μ = αk while !done λk = step_mult(αk,norm_d,backend) @@ -197,6 +202,8 @@ function SolverCore.solve!( αk = min(αmax, γ2 * αk) elseif ρk < η1 αk = αk * γ1 + satβ *= γ3 + d .= ∇fk .* (T(1) - satβ) .+ m .* satβ end # Acceptance of the new candidate @@ -212,7 +219,7 @@ function SolverCore.solve!( grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if β!= 0 - satβ = find_beta(β, m, ∇fk, norm_∇fk, θ) + satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (T(1) - satβ) .+ m .* satβ norm_d = norm(d) else @@ -259,18 +266,16 @@ function SolverCore.solve!( end """ - find_beta(β,m,∇f,norm_∇f,θ) +find_beta(m, ∇f, norm_∇f, β, θ1, θ2) Compute satβ which saturates the contibution of the momentum term to the gradient. satβ is computed such that m.∇f > θ * norm_∇f^2 """ -function find_beta(β::T,m::V,∇f::V,norm_∇f::T, θ::T) where {T,V} +function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} dotprod = dot(m,∇f) - if (1-β)*norm_∇f^2 + β*dotprod > θ * norm_∇f^2 - return β - else - return ((1-θ)norm_∇f^2)/(norm_∇f^2 - dotprod) - end + β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β + β2 = (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f)) + return min(β,min(β1,β2)) end """ From 6ae60cddc5200c2e479f9212a9a3d9d1f2997597 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 1 Feb 2024 12:57:44 -0500 Subject: [PATCH 094/171] fix possible 0 division in find_beta --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 557b1a29..ff24afb9 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -274,7 +274,7 @@ satβ is computed such that m.∇f > θ * norm_∇f^2 function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} dotprod = dot(m,∇f) β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β - β2 = (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f)) + β2 = m != ∇f ? (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f)) : β return min(β,min(β1,β2)) end From 6e4acfdee309510d48a69decc1d6575834491d44 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 3 Feb 2024 00:16:09 -0500 Subject: [PATCH 095/171] Misc improvments: - update docstrings - rename qr -> r2 - remove dead code --- src/fomo.jl | 62 ++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index ff24afb9..6a7628a3 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,9 +1,9 @@ -export fomo, FomoSolver, tr, qr +export fomo, FomoSolver, tr, r2 abstract type AbstractFomoMethod end struct tr <: AbstractFomoMethod end -struct qr <: AbstractFomoMethod end +struct r2 <: AbstractFomoMethod end """ fomo(nlp; kwargs...) @@ -22,18 +22,18 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `x::V = nlp.meta.x0`: the initial guess. - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. -- `η1 = eps(T)^(1/4)`, `η2 = T(0.2)`: step acceptance parameters. +- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. - `γ3 = T(1/2)` : momentum factor satβ update parameter in case of unsuccessful iteration. - `αmax = 1/eps(T)`: step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `β = T(0) ∈ [0,1)` : target decay rate for the momentum. +- `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β]. -- `θ2 = T(1e-5)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. +- `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. -- `backend = qr()`: model-based method employed. Options are `qr()` for quadratic regulation and `tr()` for trust-region +- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -114,20 +114,20 @@ function SolverCore.solve!( x::V = nlp.meta.x0, atol::T = √eps(T), rtol::T = √eps(T), - η1 = eps(T)^(1 / 4), - η2 = T(0.95), - γ1 = T(0.5), - γ2 = T(2), - γ3 = T(1/2), - αmax = 1/eps(T), + η1::T = T(eps(T)^(1 / 4)), + η2::T = T(0.95), + γ1::T = T(1/2), + γ2::T = T(2), + γ3::T = T(1/2), + αmax::T = 1/eps(T), max_time::Float64 = 30.0, max_eval::Int = -1, max_iter::Int = typemax(Int), β::T = T(0.9), - θ1::T = T(1e-5), - θ2::T = T(1e-5), + θ1::T = T(0.1), + θ2::T = T(eps(T)^(1/3)), verbose::Int = 0, - backend = qr() + backend = r2() ) where {T, V} unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") @@ -183,10 +183,9 @@ function SolverCore.solve!( norm_d = norm_∇fk satβ = T(0) ρk = T(0) - avgsatβ = T(0.) + avgsatβ = T(0) siter = 0 - - #μ = αk + oneT = T(1) while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d @@ -203,24 +202,21 @@ function SolverCore.solve!( elseif ρk < η1 αk = αk * γ1 satβ *= γ3 - d .= ∇fk .* (T(1) - satβ) .+ m .* satβ + d .= ∇fk .* (oneT - satβ) .+ m .* satβ end # Acceptance of the new candidate if ρk >= η1 x .= c if β!=0 - #μ = αk * (T(1) - β) + αk * β - #m .= (αk/μ) .* ∇fk .* (T(1) - β) .+ m .* β - m .= ∇fk .* (T(1) - β) .+ m .* β + m .= ∇fk .* (oneT - β) .+ m .* β end - #αk = μ set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if β!= 0 satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2) - d .= ∇fk .* (T(1) - satβ) .+ m .* satβ + d .= ∇fk .* (oneT - satβ) .+ m .* satβ norm_d = norm(d) else d .= ∇fk @@ -269,22 +265,26 @@ end find_beta(m, ∇f, norm_∇f, β, θ1, θ2) Compute satβ which saturates the contibution of the momentum term to the gradient. -satβ is computed such that m.∇f > θ * norm_∇f^2 +`satβ` is computed such that the two gradient-related conditions are ensured: +1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||² +2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)|| +with `m` memory of past gradient/ """ function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} dotprod = dot(m,∇f) + diffnorm = norm(m .- ∇f) β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β - β2 = m != ∇f ? (1-θ2)*norm_∇f/(θ2*norm(m .- ∇f)) : β + β2 = diffnorm != 0 ? (1-θ2)*norm_∇f/(θ2*diffnorm) : β return min(β,min(β1,β2)) end """ - init_alpha(norm_∇fk::T, ::qr) + init_alpha(norm_∇fk::T, ::r2) init_alpha(norm_∇fk::T, ::tr) Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. """ -function init_alpha(norm_∇fk::T, ::qr) where{T} +function init_alpha(norm_∇fk::T, ::r2) where{T} 1/2^round(log2(norm_∇fk + 1)) end @@ -293,12 +293,12 @@ function init_alpha(norm_∇fk::T, ::tr) where{T} end """ - step_mult(αk::T, norm_∇fk::T, ::qr) + step_mult(αk::T, norm_∇fk::T, ::r2) step_mult(αk::T, norm_∇fk::T, ::tr) -Compute step size multiplier: `αk` for quadratic regularization(`::qr`) and `αk/norm_∇fk` for trust region (`::tr`). +Compute step size multiplier: `αk` for quadratic regularization(`::r2`) and `αk/norm_∇fk` for trust region (`::tr`). """ -function step_mult(αk::T, norm_∇fk::T, ::qr) where{T} +function step_mult(αk::T, norm_∇fk::T, ::r2) where{T} αk end From 69d1b4a040036b199eb418d9c87c5bfdf684b7ca Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 3 Feb 2024 14:54:29 -0500 Subject: [PATCH 096/171] fix null denominator in find_beta --- src/fomo.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 6a7628a3..c983205b 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -272,9 +272,10 @@ with `m` memory of past gradient/ """ function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} dotprod = dot(m,∇f) - diffnorm = norm(m .- ∇f) - β1 = dotprod < norm_∇f^2 ? (1-θ1)*norm_∇f^2/(norm_∇f^2 - dotprod) : β - β2 = diffnorm != 0 ? (1-θ2)*norm_∇f/(θ2*diffnorm) : β + n1 = norm_∇f^2 - dotprod + n2 = norm(m .- ∇f) + β1 = n1 > 0 ? (1-θ1)*norm_∇f^2/(n1) : β + β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β return min(β,min(β1,β2)) end From 9d1997501b7106a218fd57b41bdf4bba3ad57308 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 4 Feb 2024 02:41:30 -0500 Subject: [PATCH 097/171] Make R2 and R2Solver interfaces to fomo solver. Delete obsolete R2.jl file. --- src/JSOSolvers.jl | 1 - src/R2.jl | 231 ---------------------------------------------- src/fomo.jl | 150 ++++++++++++++++++++++++++---- test/allocs.jl | 2 +- test/restart.jl | 4 +- 5 files changed, 134 insertions(+), 254 deletions(-) delete mode 100644 src/R2.jl diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl index 79abace3..85afc4fe 100644 --- a/src/JSOSolvers.jl +++ b/src/JSOSolvers.jl @@ -13,7 +13,6 @@ export solve! # Unconstrained solvers include("lbfgs.jl") include("trunk.jl") -include("R2.jl") include("fomo.jl") # Unconstrained solvers for NLS diff --git a/src/R2.jl b/src/R2.jl deleted file mode 100644 index b7304dc3..00000000 --- a/src/R2.jl +++ /dev/null @@ -1,231 +0,0 @@ -export R2, R2Solver - -""" - R2(nlp; kwargs...) - -A first-order quadratic regularization method for unconstrained optimization. - -For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`: - - solver = R2Solver(nlp) - solve!(solver, nlp; kwargs...) - -# Arguments -- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. - -# Keyword arguments -- `x::V = nlp.meta.x0`: the initial guess. -- `atol::T = √eps(T)`: absolute tolerance. -- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. -- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. -- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters. -- `αmax = 1/eps(T)`: maximum value for step size parameter for R2 algorithm. -- `max_eval::Int = -1`: maximum number of evaluation of the objective function. -- `max_time::Float64 = 30.0`: maximum time limit in seconds. -- `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `β = T(0) ∈ [0,1]` is the constant in the momentum term. If `β == 0`, R2 does not use momentum. -- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - -# Output -The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. - -# Callback -The callback is called at each iteration. -The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. -Changing any of the input arguments will affect the subsequent iterations. -In particular, setting `stats.status = :user` will stop the algorithm. -All relevant information should be available in `nlp` and `solver`. -Notably, you can access, and modify, the following: -- `solver.x`: current iterate; -- `solver.gx`: current gradient; -- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: - - `stats.dual_feas`: norm of current gradient; - - `stats.iter`: current iteration counter; - - `stats.objective`: current objective function value; - - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. - - `stats.elapsed_time`: elapsed time in seconds. - -# Examples -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -stats = R2(nlp) - -# output - -"Execution stats: first-order stationary" -``` - -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -solver = R2Solver(nlp); -stats = solve!(solver, nlp) - -# output - -"Execution stats: first-order stationary" -``` -""" -mutable struct R2Solver{T, V} <: AbstractOptimizationSolver - x::V - gx::V - cx::V - d::V # used for momentum term - α::T -end - -function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} - x = similar(nlp.meta.x0) - gx = similar(nlp.meta.x0) - cx = similar(nlp.meta.x0) - d = fill!(similar(nlp.meta.x0), 0) - α = zero(T) # init it to zero for now - return R2Solver{T, V}(x, gx, cx, d, α) -end - -@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} - solver = R2Solver(nlp) - return solve!(solver, nlp; kwargs...) -end - -function SolverCore.reset!(solver::R2Solver{T}) where {T} - solver.d .= zero(T) - solver -end -SolverCore.reset!(solver::R2Solver, ::AbstractNLPModel) = reset!(solver) - -function SolverCore.solve!( - solver::R2Solver{T, V}, - nlp::AbstractNLPModel{T, V}, - stats::GenericExecutionStats{T, V}; - callback = (args...) -> nothing, - x::V = nlp.meta.x0, - atol::T = √eps(T), - rtol::T = √eps(T), - η1 = eps(T)^(1 / 4), - η2 = T(0.95), - γ1 = T(1 / 2), - γ2 = 1 / γ1, - αmax = T(Inf), - max_time::Float64 = 30.0, - max_eval::Int = -1, - max_iter::Int = typemax(Int), - β::T = T(0), - verbose::Int = 0, -) where {T, V} - unconstrained(nlp) || error("R2 should only be called on unconstrained problems.") - - reset!(stats) - start_time = time() - set_time!(stats, 0.0) - - x = solver.x .= x - ∇fk = solver.gx - ck = solver.cx - d = solver.d - αk = solver.α - - set_iter!(stats, 0) - set_objective!(stats, obj(nlp, x)) - - grad!(nlp, x, ∇fk) - norm_∇fk = norm(∇fk) - set_dual_residual!(stats, norm_∇fk) - - αk = 1 / 2^round(log2(norm_∇fk + 1)) - # Stopping criterion: - ϵ = atol + rtol * norm_∇fk - optimal = norm_∇fk ≤ ϵ - if optimal - @info("Optimal point found at initial point") - @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" - @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk - end - if verbose > 0 && mod(stats.iter, verbose) == 0 - @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk - end - - set_status!( - stats, - get_status( - nlp, - elapsed_time = stats.elapsed_time, - optimal = optimal, - max_eval = max_eval, - iter = stats.iter, - max_iter = max_iter, - max_time = max_time, - ), - ) - - solver.α = αk - callback(nlp, solver, stats) - αk = solver.α - - done = stats.status != :unknown - - while !done - if β == 0 - ck .= x .- (∇fk .* αk) - else - d .= ∇fk .* (T(1) - β) .+ d .* β - ck .= x .- (d .* αk) - end - ΔTk = norm_∇fk^2 * αk - fck = obj(nlp, ck) - if fck == -Inf - set_status!(stats, :unbounded) - break - end - - ρk = (stats.objective - fck) / ΔTk - - # Update regularization parameters - if ρk >= η2 - αk = min(αmax, γ2 * αk) - elseif ρk < η1 - αk = αk * γ1 - end - - # Acceptance of the new candidate - if ρk >= η1 - x .= ck - set_objective!(stats, fck) - grad!(nlp, x, ∇fk) - norm_∇fk = norm(∇fk) - end - - set_iter!(stats, stats.iter + 1) - set_time!(stats, time() - start_time) - set_dual_residual!(stats, norm_∇fk) - optimal = norm_∇fk ≤ ϵ - - if verbose > 0 && mod(stats.iter, verbose) == 0 - @info infoline - infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk - end - - set_status!( - stats, - get_status( - nlp, - elapsed_time = stats.elapsed_time, - optimal = optimal, - max_eval = max_eval, - iter = stats.iter, - max_iter = max_iter, - max_time = max_time, - ), - ) - solver.α = αk - callback(nlp, solver, stats) - αk = solver.α - - done = stats.status != :unknown - end - - set_solution!(stats, x) - return stats -end diff --git a/src/fomo.jl b/src/fomo.jl index c983205b..78e151d5 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,9 +1,10 @@ -export fomo, FomoSolver, tr, r2 +export fomo, FomoSolver, tr, r2, R2 abstract type AbstractFomoMethod end -struct tr <: AbstractFomoMethod end -struct r2 <: AbstractFomoMethod end +struct tr <: AbstractFomoMethod end +struct r2 <: AbstractFomoMethod end +struct R2og <: AbstractFomoMethod end """ fomo(nlp; kwargs...) @@ -100,10 +101,97 @@ end return solve!(solver, nlp, stats; kwargs...) end +""" + R2(nlp; kwargs...) + +A first-order quadratic regularization method for unconstrained optimization. + +For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = R2Solver(nlp) + solve!(solver, nlp; kwargs...) +Important: `R2` and `R2Solver` are only interfaces to `FomoSolver`, a first order solver that includes momentum strategy. The momentum strategy is ignore with `R2`. + +# Arguments +- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess. +- `atol::T = √eps(T)`: absolute tolerance. +- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. +- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. +- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters. +- `σmin = eps(T)`: step parameter for R2 algorithm. +- `max_eval::Int = -1`: maximum number of evaluation of the objective function. +- `max_time::Float64 = 30.0`: maximum time limit in seconds. +- `max_iter::Int = typemax(Int)`: maximum number of iterations. +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.x`: current iterate; +- `solver.gx`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.dual_feas`: norm of current gradient; + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. + +# Examples +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +stats = R2(nlp) + +# output + +"Execution stats: first-order stationary" +``` + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +solver = R2Solver(nlp); +stats = solve!(solver, nlp) + +# output + +"Execution stats: first-order stationary" +``` +""" +function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} + x = similar(nlp.meta.x0) + g = similar(nlp.meta.x0) + c = similar(nlp.meta.x0) + m = Vector{T}() + d = g # similar without momentum + return FomoSolver{T, V}(x, g, c, m, d) +end + +@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} + solver = R2Solver(nlp) + stats = GenericExecutionStats(nlp) + if haskey(kwargs,:σmax) + return solve!(solver, nlp, stats; β = T(0), backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...) + else + return solve!(solver, nlp, stats; β = T(0), backend = R2og(), kwargs...) + end +end + function SolverCore.reset!(solver::FomoSolver{T}) where {T} fill!(solver.m,0) solver end + SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) function SolverCore.solve!( @@ -127,10 +215,13 @@ function SolverCore.solve!( θ1::T = T(0.1), θ2::T = T(eps(T)^(1/3)), verbose::Int = 0, - backend = r2() + backend = r2(), + σmin = nothing # keep consistency with R2 interface. kwargs immutable, can't delete it in `R2` ) where {T, V} - unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") - + r2mode = (backend == R2og()) + mthname = r2mode ? "R2" : "fomo" + unconstrained(nlp) || error("$mthname should only be called on unconstrained problems.") + reset!(stats) start_time = time() set_time!(stats, 0.0) @@ -154,12 +245,24 @@ function SolverCore.solve!( optimal = norm_∇fk ≤ ϵ if optimal @info("Optimal point found at initial point") - @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" - @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk + if r2mode + @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "σ" + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk + else + @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk + end + end if verbose > 0 && mod(stats.iter, verbose) == 0 - @info @sprintf "%5s %9s %7s %7s %7s" "iter" "f" "‖∇f‖" "α" "staβ" + if r2mode + @info @sprintf "%5s %9s %7s %7s" "iter" "f" "‖∇f‖" "σ" + infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk + else + @info @sprintf "%5s %9s %7s %7s %7s" "iter" "f" "‖∇f‖" "α" "staβ" infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk 0 + end + end set_status!( @@ -201,8 +304,10 @@ function SolverCore.solve!( αk = min(αmax, γ2 * αk) elseif ρk < η1 αk = αk * γ1 - satβ *= γ3 - d .= ∇fk .* (oneT - satβ) .+ m .* satβ + if !r2mode + satβ *= γ3 + (d .= ∇fk .* (oneT - satβ) .+ m .* satβ) + end end # Acceptance of the new candidate @@ -222,8 +327,10 @@ function SolverCore.solve!( d .= ∇fk norm_d = norm_∇fk end - avgsatβ += satβ - siter += 1 + if !r2mode + (avgsatβ += satβ) + (siter += 1) + end end set_iter!(stats, stats.iter + 1) @@ -233,7 +340,11 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk satβ + if r2mode + infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk + else + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk satβ + end end set_status!( @@ -254,9 +365,10 @@ function SolverCore.solve!( done = stats.status != :unknown end - - avgsatβ /= siter - stats.solver_specific[:avgsatβ] = avgsatβ + if !r2mode + avgsatβ /= siter + stats.solver_specific[:avgsatβ] = avgsatβ + end set_solution!(stats, x) return stats end @@ -285,7 +397,7 @@ end Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. """ -function init_alpha(norm_∇fk::T, ::r2) where{T} +function init_alpha(norm_∇fk::T, ::Union{r2,R2og}) where{T} 1/2^round(log2(norm_∇fk + 1)) end @@ -299,7 +411,7 @@ end Compute step size multiplier: `αk` for quadratic regularization(`::r2`) and `αk/norm_∇fk` for trust region (`::tr`). """ -function step_mult(αk::T, norm_∇fk::T, ::r2) where{T} +function step_mult(αk::T, norm_∇fk::T, ::Union{r2,R2og}) where{T} αk end diff --git a/test/allocs.jl b/test/allocs.jl index f029c5f1..ea17a8a2 100644 --- a/test/allocs.jl +++ b/test/allocs.jl @@ -30,7 +30,7 @@ end if Sys.isunix() @testset "Allocation tests" begin - @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver) + @testset "$symsolver" for symsolver in (:LBFGSSolver, :FomoSolver, :TrunkSolver, :TronSolver) for model in NLPModelsTest.nlp_problems nlp = eval(Meta.parse(model))() if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver)) diff --git a/test/restart.jl b/test/restart.jl index 98f82103..eb770739 100644 --- a/test/restart.jl +++ b/test/restart.jl @@ -1,5 +1,5 @@ @testset "Test restart with a different initial guess: $fun" for (fun, s) in ( - (:R2, :R2Solver), + (:R2, :FomoSolver), (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), @@ -44,7 +44,7 @@ end end @testset "Test restart with a different problem: $fun" for (fun, s) in ( - (:R2, :R2Solver), + (:R2, :FomoSolver), (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), From 6f2f01bfd2aa151db900c0a28e15b7c7f269f543 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 4 Feb 2024 10:40:13 -0500 Subject: [PATCH 098/171] Rxport R2Solver (fix doc build issue) and backend for R2 classic --- src/fomo.jl | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 78e151d5..fcd60978 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,4 +1,4 @@ -export fomo, FomoSolver, tr, r2, R2 +export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og abstract type AbstractFomoMethod end @@ -34,7 +34,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β]. - `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. -- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region +- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization ( no momentum, optimized for β = 0). # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -161,7 +161,7 @@ stats = R2(nlp) using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) solver = R2Solver(nlp); -stats = solve!(solver, nlp) +stats = solve!(solver, nlp, backend = R2og()) # output @@ -181,9 +181,9 @@ end solver = R2Solver(nlp) stats = GenericExecutionStats(nlp) if haskey(kwargs,:σmax) - return solve!(solver, nlp, stats; β = T(0), backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...) + return solve!(solver, nlp, stats; backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...) else - return solve!(solver, nlp, stats; β = T(0), backend = R2og(), kwargs...) + return solve!(solver, nlp, stats; backend = R2og(), kwargs...) end end @@ -292,7 +292,11 @@ function SolverCore.solve!( while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d - ΔTk = dot(∇fk , d) * λk + if r2mode + ΔTk = norm_∇fk^2 * λk + else + ΔTk = dot(∇fk , d) * λk + end fck = obj(nlp, c) if fck == -Inf set_status!(stats, :unbounded) @@ -306,20 +310,20 @@ function SolverCore.solve!( αk = αk * γ1 if !r2mode satβ *= γ3 - (d .= ∇fk .* (oneT - satβ) .+ m .* satβ) + d .= ∇fk .* (oneT - satβ) .+ m .* satβ end end # Acceptance of the new candidate if ρk >= η1 x .= c - if β!=0 + if !r2mode m .= ∇fk .* (oneT - β) .+ m .* β end set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) - if β!= 0 + if !r2mode satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - satβ) .+ m .* satβ norm_d = norm(d) @@ -328,8 +332,8 @@ function SolverCore.solve!( norm_d = norm_∇fk end if !r2mode - (avgsatβ += satβ) - (siter += 1) + avgsatβ += satβ + siter += 1 end end From 772adabfa1a3cc18c3d9701d974ce53d924c0509 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 4 Feb 2024 11:09:20 -0500 Subject: [PATCH 099/171] add :smallstep exception (step addition underflow) --- src/fomo.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index fcd60978..49b535c6 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -292,6 +292,7 @@ function SolverCore.solve!( while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d + x == c && set_status!(stats,:smallstep) # step addition underfow on every dimensions, should happen before αk == 0 if r2mode ΔTk = norm_∇fk^2 * λk else @@ -364,7 +365,8 @@ function SolverCore.solve!( ), ) - αk == 0 && set_status!(stats,:exception) + αk == 0 && set_status!(stats,:exception) # :smallstep exception should happen before + callback(nlp, solver, stats) done = stats.status != :unknown From 03e18bfdc8200dbcee3587a333b5d7b86668f2bb Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 4 Feb 2024 11:17:28 -0500 Subject: [PATCH 100/171] fix small_step exception --- src/fomo.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 49b535c6..95655ea4 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -292,7 +292,8 @@ function SolverCore.solve!( while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d - x == c && set_status!(stats,:smallstep) # step addition underfow on every dimensions, should happen before αk == 0 + uf = x == c # step addition underfow on every dimensions, should happen before αk == 0 + @show stats.status if r2mode ΔTk = norm_∇fk^2 * λk else @@ -365,11 +366,13 @@ function SolverCore.solve!( ), ) - αk == 0 && set_status!(stats,:exception) # :smallstep exception should happen before - callback(nlp, solver, stats) + uf && set_status!(stats,:small_step) + αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before + done = stats.status != :unknown + @show stats.status αk end if !r2mode avgsatβ /= siter From 81cb4432bb848ce3183cd9ab4a0c08b4bf05a3cd Mon Sep 17 00:00:00 2001 From: d-monnet Date: Mon, 5 Feb 2024 11:20:03 -0500 Subject: [PATCH 101/171] remove terminal ouput --- src/fomo.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 95655ea4..497bdfb3 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -293,7 +293,6 @@ function SolverCore.solve!( λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d uf = x == c # step addition underfow on every dimensions, should happen before αk == 0 - @show stats.status if r2mode ΔTk = norm_∇fk^2 * λk else @@ -372,7 +371,6 @@ function SolverCore.solve!( αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before done = stats.status != :unknown - @show stats.status αk end if !r2mode avgsatβ /= siter From 2048735fea1d42cc865ce8befb1a695d7311ff9d Mon Sep 17 00:00:00 2001 From: d-monnet Date: Mon, 5 Feb 2024 16:24:01 -0500 Subject: [PATCH 102/171] update docstring, add rhok to the output --- src/fomo.jl | 134 +++++++++++++++------------------------------------- 1 file changed, 37 insertions(+), 97 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 497bdfb3..30fc236f 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -8,6 +8,7 @@ struct R2og <: AbstractFomoMethod end """ fomo(nlp; kwargs...) + R2(nlp; kwargs...) A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods. @@ -16,6 +17,12 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i solver = FomoSolver(nlp) solve!(solver, nlp; kwargs...) +*Quadratic Regularization (R2)*: if the user do not want to use momentum (β = 0), it is recommended to use the memory-optimized `R2` method. +For advanced usage: + + solver = R2Solver(nlp) + solve!(solver, nlp; kwargs...) + # Arguments - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. @@ -25,16 +32,16 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. -- `γ3 = T(1/2)` : momentum factor satβ update parameter in case of unsuccessful iteration. +- `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. - `αmax = 1/eps(T)`: step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. -- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||², with mk memory of past gradient and satβ ∈ [0,β]. -- `θ2 = sqrt(T)^(1/3)` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)||, with mk memory of past gradient and satβ ∈ [0,β]. +- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ||∇f(xk)||², with m memory of past gradient and βmax ∈ [0,β]. +- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))||, with m memory of past gradient and βmax ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. -- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization ( no momentum, optimized for β = 0). +- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0). # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -96,78 +103,11 @@ end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FomoSolver(nlp) - solver_specific = Dict(:avgsatβ => T(0.)) + solver_specific = Dict(:avgβmax => T(0.)) stats = GenericExecutionStats(nlp;solver_specific=solver_specific) return solve!(solver, nlp, stats; kwargs...) end -""" - R2(nlp; kwargs...) - -A first-order quadratic regularization method for unconstrained optimization. - -For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`: - - solver = R2Solver(nlp) - solve!(solver, nlp; kwargs...) -Important: `R2` and `R2Solver` are only interfaces to `FomoSolver`, a first order solver that includes momentum strategy. The momentum strategy is ignore with `R2`. - -# Arguments -- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. - -# Keyword arguments -- `x::V = nlp.meta.x0`: the initial guess. -- `atol::T = √eps(T)`: absolute tolerance. -- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. -- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. -- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters. -- `σmin = eps(T)`: step parameter for R2 algorithm. -- `max_eval::Int = -1`: maximum number of evaluation of the objective function. -- `max_time::Float64 = 30.0`: maximum time limit in seconds. -- `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - -# Output -The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. - -# Callback -The callback is called at each iteration. -The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. -Changing any of the input arguments will affect the subsequent iterations. -In particular, setting `stats.status = :user` will stop the algorithm. -All relevant information should be available in `nlp` and `solver`. -Notably, you can access, and modify, the following: -- `solver.x`: current iterate; -- `solver.gx`: current gradient; -- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: - - `stats.dual_feas`: norm of current gradient; - - `stats.iter`: current iteration counter; - - `stats.objective`: current objective function value; - - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. - - `stats.elapsed_time`: elapsed time in seconds. - -# Examples -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -stats = R2(nlp) - -# output - -"Execution stats: first-order stationary" -``` - -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -solver = R2Solver(nlp); -stats = solve!(solver, nlp, backend = R2og()) - -# output - -"Execution stats: first-order stationary" -``` -""" function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} x = similar(nlp.meta.x0) g = similar(nlp.meta.x0) @@ -177,7 +117,7 @@ function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} return FomoSolver{T, V}(x, g, c, m, d) end -@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} +@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = R2Solver(nlp) stats = GenericExecutionStats(nlp) if haskey(kwargs,:σmax) @@ -256,11 +196,11 @@ function SolverCore.solve!( end if verbose > 0 && mod(stats.iter, verbose) == 0 if r2mode - @info @sprintf "%5s %9s %7s %7s" "iter" "f" "‖∇f‖" "σ" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk + @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" "σ" "ρk" + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN else - @info @sprintf "%5s %9s %7s %7s %7s" "iter" "f" "‖∇f‖" "α" "staβ" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk 0 + @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0 end end @@ -284,15 +224,15 @@ function SolverCore.solve!( d .= ∇fk norm_d = norm_∇fk - satβ = T(0) + βmax = T(0) ρk = T(0) - avgsatβ = T(0) + avgβmax = T(0) siter = 0 oneT = T(1) while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d - uf = x == c # step addition underfow on every dimensions, should happen before αk == 0 + step_underflow = x == c # step addition underfow on every dimensions, should happen before αk == 0 if r2mode ΔTk = norm_∇fk^2 * λk else @@ -310,8 +250,8 @@ function SolverCore.solve!( elseif ρk < η1 αk = αk * γ1 if !r2mode - satβ *= γ3 - d .= ∇fk .* (oneT - satβ) .+ m .* satβ + βmax *= γ3 + d .= ∇fk .* (oneT - βmax) .+ m .* βmax end end @@ -325,15 +265,15 @@ function SolverCore.solve!( grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if !r2mode - satβ = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2) - d .= ∇fk .* (oneT - satβ) .+ m .* satβ + βmax = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2) + d .= ∇fk .* (oneT - βmax) .+ m .* βmax norm_d = norm(d) else d .= ∇fk norm_d = norm_∇fk end if !r2mode - avgsatβ += satβ + avgβmax += βmax siter += 1 end end @@ -346,9 +286,9 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline if r2mode - infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk ρk else - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk satβ + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk ρk βmax end end @@ -367,14 +307,14 @@ function SolverCore.solve!( callback(nlp, solver, stats) - uf && set_status!(stats,:small_step) - αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before + step_underflow && set_status!(stats,:small_step) + αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before done = stats.status != :unknown end if !r2mode - avgsatβ /= siter - stats.solver_specific[:avgsatβ] = avgsatβ + avgβmax /= siter + stats.solver_specific[:avgβmax] = avgβmax end set_solution!(stats, x) return stats @@ -383,11 +323,11 @@ end """ find_beta(m, ∇f, norm_∇f, β, θ1, θ2) -Compute satβ which saturates the contibution of the momentum term to the gradient. -`satβ` is computed such that the two gradient-related conditions are ensured: -1. [(1-satβ)∇f(xk) + satβ mk.∇f(xk)] ≥ θ1||∇f(xk)||² -2. ||∇f(xk)|| ≥ θ2||(1-satβ)∇f(xk) + satβ mk.∇f(xk)|| -with `m` memory of past gradient/ +Compute βmax which saturates the contibution of the momentum term to the gradient. +`βmax` is computed such that the two gradient-related conditions are ensured: +1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||² +2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m.∇f(xk))|| +with `m` memory of past gradient """ function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} dotprod = dot(m,∇f) @@ -395,7 +335,7 @@ function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} n2 = norm(m .- ∇f) β1 = n1 > 0 ? (1-θ1)*norm_∇f^2/(n1) : β β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β - return min(β,min(β1,β2)) + return min(β,min(β1,β2)) end """ From 72b2456bcaa09a87f28a17eb95c92998bf8c17b3 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Mon, 5 Feb 2024 15:35:49 -0500 Subject: [PATCH 103/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 30fc236f..cf4697ae 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -329,7 +329,7 @@ Compute βmax which saturates the contibution of the momentum term to the gradie 2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m.∇f(xk))|| with `m` memory of past gradient """ -function find_beta(m::V,∇f::V,norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} +function find_beta(m::V, ∇f::V, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} dotprod = dot(m,∇f) n1 = norm_∇f^2 - dotprod n2 = norm(m .- ∇f) From 049e31ce575677683c9c457f5abbe36274aae109 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 6 Feb 2024 12:22:54 -0500 Subject: [PATCH 104/171] - create variable for dot(m,nabla f): avoid computation of dot(d, nabla f) in model decrease, is used in find_beta (interface updated) - update docstrings --- src/fomo.jl | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index cf4697ae..83fe6648 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -21,7 +21,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i For advanced usage: solver = R2Solver(nlp) - solve!(solver, nlp; kwargs...) + solve!(solver, nlp; backend = R2og(), kwargs...) # Arguments - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. @@ -39,10 +39,12 @@ For advanced usage: - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ||∇f(xk)||², with m memory of past gradient and βmax ∈ [0,β]. -- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))||, with m memory of past gradient and βmax ∈ [0,β]. +- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||, with m memory of past gradient and βmax ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0). +*Warning:* `R2og()` backend should be used only for advanced usage as described above. + # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -174,6 +176,7 @@ function SolverCore.solve!( set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) + grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) @@ -229,15 +232,12 @@ function SolverCore.solve!( avgβmax = T(0) siter = 0 oneT = T(1) + mdot∇f = T(0) # dot(m,∇fk) while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d step_underflow = x == c # step addition underfow on every dimensions, should happen before αk == 0 - if r2mode - ΔTk = norm_∇fk^2 * λk - else - ΔTk = dot(∇fk , d) * λk - end + ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk fck = obj(nlp, c) if fck == -Inf set_status!(stats, :unbounded) @@ -260,17 +260,15 @@ function SolverCore.solve!( x .= c if !r2mode m .= ∇fk .* (oneT - β) .+ m .* β + mdot∇f = dot(m,∇fk) end set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if !r2mode - βmax = find_beta(m, ∇fk, norm_∇fk, β, θ1, θ2) + βmax = find_beta(m, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ m .* βmax norm_d = norm(d) - else - d .= ∇fk - norm_d = norm_∇fk end if !r2mode avgβmax += βmax @@ -321,17 +319,16 @@ function SolverCore.solve!( end """ -find_beta(m, ∇f, norm_∇f, β, θ1, θ2) +find_beta(m, md∇f, norm_∇f, β, θ1, θ2) Compute βmax which saturates the contibution of the momentum term to the gradient. `βmax` is computed such that the two gradient-related conditions are ensured: 1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||² -2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * dot(m.∇f(xk))|| -with `m` memory of past gradient +2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m|| +with `m` memory of past gradient and `mdot∇f = dot(m,∇f(xk))` """ -function find_beta(m::V, ∇f::V, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} - dotprod = dot(m,∇f) - n1 = norm_∇f^2 - dotprod +function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} + n1 = norm_∇f^2 - mdot∇f n2 = norm(m .- ∇f) β1 = n1 > 0 ? (1-θ1)*norm_∇f^2/(n1) : β β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β @@ -354,9 +351,10 @@ end """ step_mult(αk::T, norm_∇fk::T, ::r2) + step_mult(αk::T, norm_∇fk::T, ::R2og) step_mult(αk::T, norm_∇fk::T, ::tr) -Compute step size multiplier: `αk` for quadratic regularization(`::r2`) and `αk/norm_∇fk` for trust region (`::tr`). +Compute step size multiplier: `αk` for quadratic regularization(`::r2` and `::R2og`) and `αk/norm_∇fk` for trust region (`::tr`). """ function step_mult(αk::T, norm_∇fk::T, ::Union{r2,R2og}) where{T} αk From 2064b6b263ca04e6c48a1fe63c3de639535b6ec0 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 6 Feb 2024 12:54:53 -0500 Subject: [PATCH 105/171] rename `m` as `momentum` --- src/fomo.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 83fe6648..b234622e 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -171,7 +171,7 @@ function SolverCore.solve!( x = solver.x .= x ∇fk = solver.g c = solver.c - m = solver.m + momentum = solver.m d = solver.d set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) @@ -232,7 +232,7 @@ function SolverCore.solve!( avgβmax = T(0) siter = 0 oneT = T(1) - mdot∇f = T(0) # dot(m,∇fk) + mdot∇f = T(0) # dot(momentum,∇fk) while !done λk = step_mult(αk,norm_d,backend) c .= x .- λk .* d @@ -251,7 +251,7 @@ function SolverCore.solve!( αk = αk * γ1 if !r2mode βmax *= γ3 - d .= ∇fk .* (oneT - βmax) .+ m .* βmax + d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax end end @@ -259,7 +259,7 @@ function SolverCore.solve!( if ρk >= η1 x .= c if !r2mode - m .= ∇fk .* (oneT - β) .+ m .* β + momentum .= ∇fk .* (oneT - β) .+ momentum .* β mdot∇f = dot(m,∇fk) end set_objective!(stats, fck) @@ -267,7 +267,7 @@ function SolverCore.solve!( norm_∇fk = norm(∇fk) if !r2mode βmax = find_beta(m, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2) - d .= ∇fk .* (oneT - βmax) .+ m .* βmax + d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) end if !r2mode @@ -325,7 +325,7 @@ Compute βmax which saturates the contibution of the momentum term to the gradie `βmax` is computed such that the two gradient-related conditions are ensured: 1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||² 2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m|| -with `m` memory of past gradient and `mdot∇f = dot(m,∇f(xk))` +with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` """ function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} n1 = norm_∇f^2 - mdot∇f From 16a164ab9cb449e0b202d036761ff97b7b81ad89 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Tue, 6 Feb 2024 12:47:50 -0500 Subject: [PATCH 106/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index b234622e..1f6a45cb 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -200,7 +200,7 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 if r2mode @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" "σ" "ρk" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN else @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0 From d521169e0f9039986f53fb6531dc06b840bcc8d6 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Tue, 6 Feb 2024 12:48:10 -0500 Subject: [PATCH 107/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 1f6a45cb..c1ef4d1f 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -203,7 +203,7 @@ function SolverCore.solve!( infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN else @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0 + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0 end end From 56dac0dde8cd42d0b1cab27a7d4008c4db56a26b Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Tue, 6 Feb 2024 12:48:26 -0500 Subject: [PATCH 108/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index c1ef4d1f..0a202466 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -205,7 +205,6 @@ function SolverCore.solve!( @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0 end - end set_status!( From 45bbd4fb9d46c9094e7c85b1e5c5dd3a96098287 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 6 Feb 2024 13:05:58 -0500 Subject: [PATCH 109/171] update docstring, fix `m` to `momentum` renaming. --- src/fomo.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 0a202466..93a5c11a 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -38,8 +38,8 @@ For advanced usage: - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. -- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ||∇f(xk)||², with m memory of past gradient and βmax ∈ [0,β]. -- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m||, with m memory of past gradient and βmax ∈ [0,β]. +- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β]. +- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖, with m memory of past gradient and βmax ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0). @@ -259,13 +259,13 @@ function SolverCore.solve!( x .= c if !r2mode momentum .= ∇fk .* (oneT - β) .+ momentum .* β - mdot∇f = dot(m,∇fk) + mdot∇f = dot(momentum,∇fk) end set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if !r2mode - βmax = find_beta(m, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2) + βmax = find_beta(momentum, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) end @@ -322,8 +322,8 @@ find_beta(m, md∇f, norm_∇f, β, θ1, θ2) Compute βmax which saturates the contibution of the momentum term to the gradient. `βmax` is computed such that the two gradient-related conditions are ensured: -1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ||∇f(xk)||² -2. ||∇f(xk)|| ≥ θ2 * ||(1-βmax) * ∇f(xk) + βmax * m|| +1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ‖∇f(xk)‖² +2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖ with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` """ function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} @@ -336,6 +336,7 @@ end """ init_alpha(norm_∇fk::T, ::r2) + init_alpha(norm_∇fk::T, ::R2og) init_alpha(norm_∇fk::T, ::tr) Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. From cb364960b75cd10a0997aede1fb5974f1fab0144 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Fri, 16 Feb 2024 09:31:41 -0500 Subject: [PATCH 110/171] Update src/fomo.jl Co-authored-by: tmigot --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 93a5c11a..c04d5725 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -122,7 +122,7 @@ end @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = R2Solver(nlp) stats = GenericExecutionStats(nlp) - if haskey(kwargs,:σmax) + if haskey(kwargs,:σmin) return solve!(solver, nlp, stats; backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...) else return solve!(solver, nlp, stats; backend = R2og(), kwargs...) From 98543b8a8da6b1fab7d5e359fba904c6c6257dab Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 17 Feb 2024 17:08:10 -0500 Subject: [PATCH 111/171] Update test/allocs.jl Co-authored-by: tmigot --- test/allocs.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/allocs.jl b/test/allocs.jl index ea17a8a2..f029c5f1 100644 --- a/test/allocs.jl +++ b/test/allocs.jl @@ -30,7 +30,7 @@ end if Sys.isunix() @testset "Allocation tests" begin - @testset "$symsolver" for symsolver in (:LBFGSSolver, :FomoSolver, :TrunkSolver, :TronSolver) + @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver) for model in NLPModelsTest.nlp_problems nlp = eval(Meta.parse(model))() if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver)) From 6ff2cc1d87f0cba96d5443723612e2e26cbfd0b5 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 17 Feb 2024 17:09:55 -0500 Subject: [PATCH 112/171] Update src/fomo.jl Co-authored-by: tmigot --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index c04d5725..b84c8664 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,6 +1,6 @@ export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og -abstract type AbstractFomoMethod end +abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end struct tr <: AbstractFomoMethod end struct r2 <: AbstractFomoMethod end From 6b93fa5e8296e6b63658cb04eb9375f17bedaca0 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 17 Feb 2024 17:48:27 -0500 Subject: [PATCH 113/171] put alpha in solver structure, uncomment callback test --- src/fomo.jl | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index b84c8664..6e2c018a 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,6 +1,6 @@ export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og -abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end +abstract type AbstractFomoMethod end struct tr <: AbstractFomoMethod end struct r2 <: AbstractFomoMethod end @@ -92,6 +92,7 @@ mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver c::V m::V d::V + α::T end function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} @@ -100,7 +101,7 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} c = similar(nlp.meta.x0) m = fill!(similar(nlp.meta.x0), 0) d = fill!(similar(nlp.meta.x0), 0) - return FomoSolver{T, V}(x, g, c, m, d) + return FomoSolver{T, V}(x, g, c, m, d, T(0)) end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} @@ -116,7 +117,7 @@ function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} c = similar(nlp.meta.x0) m = Vector{T}() d = g # similar without momentum - return FomoSolver{T, V}(x, g, c, m, d) + return FomoSolver{T, V}(x, g, c, m, d, T(0)) end @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} @@ -181,7 +182,7 @@ function SolverCore.solve!( norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) - αk = init_alpha(norm_∇fk,backend) + solver.α = init_alpha(norm_∇fk,backend) # Stopping criterion: ϵ = atol + rtol * norm_∇fk @@ -190,20 +191,20 @@ function SolverCore.solve!( @info("Optimal point found at initial point") if r2mode @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "σ" - @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α else @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" - @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α end end if verbose > 0 && mod(stats.iter, verbose) == 0 if r2mode @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" "σ" "ρk" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk NaN + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α NaN else @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk NaN 0 + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α NaN 0 end end @@ -233,9 +234,9 @@ function SolverCore.solve!( oneT = T(1) mdot∇f = T(0) # dot(momentum,∇fk) while !done - λk = step_mult(αk,norm_d,backend) + λk = step_mult(solver.α,norm_d,backend) c .= x .- λk .* d - step_underflow = x == c # step addition underfow on every dimensions, should happen before αk == 0 + step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0 ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk fck = obj(nlp, c) if fck == -Inf @@ -245,9 +246,9 @@ function SolverCore.solve!( ρk = (stats.objective - fck) / ΔTk # Update regularization parameters if ρk >= η2 - αk = min(αmax, γ2 * αk) + solver.α = min(αmax, γ2 * solver.α) elseif ρk < η1 - αk = αk * γ1 + solver.α = solver.α * γ1 if !r2mode βmax *= γ3 d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax @@ -283,9 +284,9 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline if r2mode - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/αk ρk + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α ρk else - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk ρk βmax + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α ρk βmax end end @@ -305,7 +306,7 @@ function SolverCore.solve!( callback(nlp, solver, stats) step_underflow && set_status!(stats,:small_step) - αk == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before + solver.α == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before done = stats.status != :unknown end @@ -350,16 +351,16 @@ function init_alpha(norm_∇fk::T, ::tr) where{T} end """ - step_mult(αk::T, norm_∇fk::T, ::r2) - step_mult(αk::T, norm_∇fk::T, ::R2og) - step_mult(αk::T, norm_∇fk::T, ::tr) + step_mult(α::T, norm_∇fk::T, ::r2) + step_mult(α::T, norm_∇fk::T, ::R2og) + step_mult(α::T, norm_∇fk::T, ::tr) -Compute step size multiplier: `αk` for quadratic regularization(`::r2` and `::R2og`) and `αk/norm_∇fk` for trust region (`::tr`). +Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`). """ -function step_mult(αk::T, norm_∇fk::T, ::Union{r2,R2og}) where{T} - αk +function step_mult(α::T, norm_∇fk::T, ::Union{r2,R2og}) where{T} + α end -function step_mult(αk::T, norm_∇fk::T, ::tr) where{T} - αk/norm_∇fk +function step_mult(α::T, norm_∇fk::T, ::tr) where{T} + α/norm_∇fk end \ No newline at end of file From 65959d1dfab6483fb663b1bcb1f458e4b5bd111d Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 17 Feb 2024 18:14:08 -0500 Subject: [PATCH 114/171] allocate memory for norm vector in find_beta --- src/fomo.jl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 6e2c018a..f047169d 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -92,6 +92,7 @@ mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver c::V m::V d::V + p::V α::T end @@ -101,7 +102,8 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} c = similar(nlp.meta.x0) m = fill!(similar(nlp.meta.x0), 0) d = fill!(similar(nlp.meta.x0), 0) - return FomoSolver{T, V}(x, g, c, m, d, T(0)) + p = similar(nlp.meta.x0) + return FomoSolver{T, V}(x, g, c, m, d, p, T(0)) end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} @@ -117,7 +119,8 @@ function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} c = similar(nlp.meta.x0) m = Vector{T}() d = g # similar without momentum - return FomoSolver{T, V}(x, g, c, m, d, T(0)) + p = Vector{T}() + return FomoSolver{T, V}(x, g, c, m, d, p, T(0)) end @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} @@ -174,6 +177,7 @@ function SolverCore.solve!( c = solver.c momentum = solver.m d = solver.d + p = solver.p set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) @@ -266,7 +270,8 @@ function SolverCore.solve!( grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if !r2mode - βmax = find_beta(momentum, ∇fk, mdot∇f, norm_∇fk, β, θ1, θ2) + p .= momentum .- ∇fk + βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) end @@ -327,9 +332,9 @@ Compute βmax which saturates the contibution of the momentum term to the gradie 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖ with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` """ -function find_beta(m::V, ∇f::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} +function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} n1 = norm_∇f^2 - mdot∇f - n2 = norm(m .- ∇f) + n2 = norm(p) β1 = n1 > 0 ? (1-θ1)*norm_∇f^2/(n1) : β β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β return min(β,min(β1,β2)) From 937288838934af02ecb90fe309913e9ef3e022c8 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 17 Feb 2024 22:45:36 -0500 Subject: [PATCH 115/171] Add FoSolver structure for no-momentum case. Change backend name to step_backend to avoid confusion. Update docstrings and tests. --- src/fomo.jl | 123 +++++++++++++++++++++++++------------------ test/test_solvers.jl | 2 +- 2 files changed, 74 insertions(+), 51 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index f047169d..c672b7b7 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,10 +1,10 @@ -export fomo, FomoSolver, R2, R2Solver, tr, r2, R2og +export fomo, FomoSolver, FoSolver, R2, R2Solver, tr_step, r2_step -abstract type AbstractFomoMethod end +abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end -struct tr <: AbstractFomoMethod end -struct r2 <: AbstractFomoMethod end -struct R2og <: AbstractFomoMethod end +abstract type AbstractFomoMethod end +struct tr_step <: AbstractFomoMethod end +struct r2_step <: AbstractFomoMethod end """ fomo(nlp; kwargs...) @@ -17,11 +17,12 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i solver = FomoSolver(nlp) solve!(solver, nlp; kwargs...) -*Quadratic Regularization (R2)*: if the user do not want to use momentum (β = 0), it is recommended to use the memory-optimized `R2` method. +**Quadratic Regularization (R2)**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` method. For advanced usage: - solver = R2Solver(nlp) - solve!(solver, nlp; backend = R2og(), kwargs...) + solver = FoSolver(nlp) + solve!(solver, nlp; kwargs...) +Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). # Arguments - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. @@ -33,7 +34,7 @@ For advanced usage: - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. -- `αmax = 1/eps(T)`: step parameter for fomo algorithm. +- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. @@ -41,9 +42,7 @@ For advanced usage: - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β]. - `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖, with m memory of past gradient and βmax ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. -- `backend = r2()`: model-based method employed. Options are `r2()` for quadratic regulation and `tr()` for trust-region, `R2og()` for classical quadratic regularization (no momentum, optimized for β = 0). - -*Warning:* `R2og()` backend should be used only for advanced usage as described above. +- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. # Output The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -65,6 +64,7 @@ Notably, you can access, and modify, the following: - `stats.elapsed_time`: elapsed time in seconds. # Examples +## `fomo` ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) @@ -83,10 +83,31 @@ stats = solve!(solver, nlp) # output +"Execution stats: first-order stationary" +``` +## `R2` +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +stats = R2(nlp) + +# output + +"Execution stats: first-order stationary" +``` + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +solver = FoSolver(nlp); +stats = solve!(solver, nlp) + +# output + "Execution stats: first-order stationary" ``` """ -mutable struct FomoSolver{T, V} <: AbstractOptimizationSolver +mutable struct FomoSolver{T, V} <: AbstractFirstOrderSolver x::V g::V c::V @@ -113,23 +134,28 @@ end return solve!(solver, nlp, stats; kwargs...) end -function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} + +mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver + x::V + g::V + c::V + α::T +end + +function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} x = similar(nlp.meta.x0) g = similar(nlp.meta.x0) c = similar(nlp.meta.x0) - m = Vector{T}() - d = g # similar without momentum - p = Vector{T}() - return FomoSolver{T, V}(x, g, c, m, d, p, T(0)) + return FoSolver{T, V}(x, g, c, T(0)) end @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} - solver = R2Solver(nlp) + solver = FoSolver(nlp) stats = GenericExecutionStats(nlp) if haskey(kwargs,:σmin) - return solve!(solver, nlp, stats; backend = R2og(), αmax = 1/kwargs[:σmin], kwargs...) + return solve!(solver, nlp, stats; step_backend = r2_step(), αmax = 1/kwargs[:σmin], kwargs...) else - return solve!(solver, nlp, stats; backend = R2og(), kwargs...) + return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...) end end @@ -141,7 +167,7 @@ end SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) function SolverCore.solve!( - solver::FomoSolver{T, V}, + solver::AbstractFirstOrderSolver, nlp::AbstractNLPModel{T, V}, stats::GenericExecutionStats{T, V}; callback = (args...) -> nothing, @@ -161,12 +187,11 @@ function SolverCore.solve!( θ1::T = T(0.1), θ2::T = T(eps(T)^(1/3)), verbose::Int = 0, - backend = r2(), + step_backend = r2_step(), σmin = nothing # keep consistency with R2 interface. kwargs immutable, can't delete it in `R2` ) where {T, V} - r2mode = (backend == R2og()) - mthname = r2mode ? "R2" : "fomo" - unconstrained(nlp) || error("$mthname should only be called on unconstrained problems.") + use_momentum = typeof(solver) <: FomoSolver + unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") reset!(stats) start_time = time() @@ -175,9 +200,9 @@ function SolverCore.solve!( x = solver.x .= x ∇fk = solver.g c = solver.c - momentum = solver.m - d = solver.d - p = solver.p + momentum = use_momentum ? solver.m : nothing # not used if no momentum + d = use_momentum ? solver.d : solver.g # g = d if no momentum + p = use_momentum ? solver.p : nothing # not used if no momentum set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) @@ -186,14 +211,14 @@ function SolverCore.solve!( norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) - solver.α = init_alpha(norm_∇fk,backend) + solver.α = init_alpha(norm_∇fk,step_backend) # Stopping criterion: ϵ = atol + rtol * norm_∇fk optimal = norm_∇fk ≤ ϵ if optimal @info("Optimal point found at initial point") - if r2mode + if !use_momentum @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "σ" @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α else @@ -203,7 +228,7 @@ function SolverCore.solve!( end if verbose > 0 && mod(stats.iter, verbose) == 0 - if r2mode + if !use_momentum @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" "σ" "ρk" infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α NaN else @@ -238,10 +263,10 @@ function SolverCore.solve!( oneT = T(1) mdot∇f = T(0) # dot(momentum,∇fk) while !done - λk = step_mult(solver.α,norm_d,backend) + λk = step_mult(solver.α,norm_d,step_backend) c .= x .- λk .* d step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0 - ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk + ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk with momentum, ‖∇fk‖²λk without momentum fck = obj(nlp, c) if fck == -Inf set_status!(stats, :unbounded) @@ -253,7 +278,7 @@ function SolverCore.solve!( solver.α = min(αmax, γ2 * solver.α) elseif ρk < η1 solver.α = solver.α * γ1 - if !r2mode + if use_momentum βmax *= γ3 d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax end @@ -262,20 +287,20 @@ function SolverCore.solve!( # Acceptance of the new candidate if ρk >= η1 x .= c - if !r2mode + if use_momentum momentum .= ∇fk .* (oneT - β) .+ momentum .* β mdot∇f = dot(momentum,∇fk) end set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) - if !r2mode + if use_momentum p .= momentum .- ∇fk βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) end - if !r2mode + if use_momentum avgβmax += βmax siter += 1 end @@ -288,7 +313,7 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline - if r2mode + if !use_momentum infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α ρk else infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α ρk βmax @@ -315,7 +340,7 @@ function SolverCore.solve!( done = stats.status != :unknown end - if !r2mode + if use_momentum avgβmax /= siter stats.solver_specific[:avgβmax] = avgβmax end @@ -341,31 +366,29 @@ function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where end """ - init_alpha(norm_∇fk::T, ::r2) - init_alpha(norm_∇fk::T, ::R2og) - init_alpha(norm_∇fk::T, ::tr) + init_alpha(norm_∇fk::T, ::r2_step) + init_alpha(norm_∇fk::T, ::tr_step) Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. """ -function init_alpha(norm_∇fk::T, ::Union{r2,R2og}) where{T} +function init_alpha(norm_∇fk::T, ::r2_step) where{T} 1/2^round(log2(norm_∇fk + 1)) end -function init_alpha(norm_∇fk::T, ::tr) where{T} +function init_alpha(norm_∇fk::T, ::tr_step) where{T} norm_∇fk/2^round(log2(norm_∇fk + 1)) end """ - step_mult(α::T, norm_∇fk::T, ::r2) - step_mult(α::T, norm_∇fk::T, ::R2og) - step_mult(α::T, norm_∇fk::T, ::tr) + step_mult(α::T, norm_∇fk::T, ::r2_step) + step_mult(α::T, norm_∇fk::T, ::tr_step) Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`). """ -function step_mult(α::T, norm_∇fk::T, ::Union{r2,R2og}) where{T} +function step_mult(α::T, norm_∇fk::T, ::r2_step) where{T} α end -function step_mult(α::T, norm_∇fk::T, ::tr) where{T} +function step_mult(α::T, norm_∇fk::T, ::tr_step) where{T} α/norm_∇fk end \ No newline at end of file diff --git a/test/test_solvers.jl b/test/test_solvers.jl index ba182731..d9266d29 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -9,7 +9,7 @@ function tests() ("tron", tron), ("R2", R2), ("fomo_r2", fomo), - ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,backend = JSOSolvers.tr(); kwargs...)), + ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,step_backend = JSOSolvers.tr_step(); kwargs...)), ] unconstrained_nlp(solver) multiprecision_nlp(solver, :unc) From dc911e36c2f5b01c20ea2f3aba9ff1a59edb4f3f Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 18 Feb 2024 12:56:34 -0500 Subject: [PATCH 116/171] fix allocs tests --- test/allocs.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/allocs.jl b/test/allocs.jl index f029c5f1..1fe38f23 100644 --- a/test/allocs.jl +++ b/test/allocs.jl @@ -30,7 +30,7 @@ end if Sys.isunix() @testset "Allocation tests" begin - @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :FomoSolver, :TrunkSolver, :TronSolver) + @testset "$symsolver" for symsolver in (:LBFGSSolver, :FoSolver, :FomoSolver, :TrunkSolver, :TronSolver) for model in NLPModelsTest.nlp_problems nlp = eval(Meta.parse(model))() if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver)) From 51c3445c5008967166e95d589d83fe292ed9ef8f Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 18 Feb 2024 14:39:24 -0500 Subject: [PATCH 117/171] add reset! function to FoSolver --- src/fomo.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/fomo.jl b/src/fomo.jl index c672b7b7..7a320173 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -166,6 +166,13 @@ end SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) + +function SolverCore.reset!(solver::FoSolver{T}) where {T} + solver +end + +SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver) + function SolverCore.solve!( solver::AbstractFirstOrderSolver, nlp::AbstractNLPModel{T, V}, From ece54bc2cd1352fcbabb0a1e1f3c513fec48b1f5 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sun, 18 Feb 2024 15:22:43 -0500 Subject: [PATCH 118/171] remove `R2Solver`, replaced by `FoSolver` --- src/fomo.jl | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 7a320173..c9a95e81 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,4 +1,4 @@ -export fomo, FomoSolver, FoSolver, R2, R2Solver, tr_step, r2_step +export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end @@ -134,6 +134,12 @@ end return solve!(solver, nlp, stats; kwargs...) end +function SolverCore.reset!(solver::FomoSolver{T}) where {T} + fill!(solver.m,0) + solver +end + +SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver x::V @@ -159,14 +165,6 @@ end end end -function SolverCore.reset!(solver::FomoSolver{T}) where {T} - fill!(solver.m,0) - solver -end - -SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) - - function SolverCore.reset!(solver::FoSolver{T}) where {T} solver end From 4d13504ffea8c69451b4a41c426c17ad0482bd7c Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 22:53:40 -0500 Subject: [PATCH 119/171] Update test/test_solvers.jl Co-authored-by: Dominique --- test/test_solvers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_solvers.jl b/test/test_solvers.jl index d9266d29..eb9029e1 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -9,7 +9,7 @@ function tests() ("tron", tron), ("R2", R2), ("fomo_r2", fomo), - ("fomo_tr", (nlp; kwargs...) -> fomo(nlp,step_backend = JSOSolvers.tr_step(); kwargs...)), + ("fomo_tr", (nlp; kwargs...) -> fomo(nlp, step_backend = JSOSolvers.tr_step(); kwargs...)), ] unconstrained_nlp(solver) multiprecision_nlp(solver, :unc) From 6a8af9c3496dfadd9cbfb361264dc2e989d1ae2a Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 22:54:14 -0500 Subject: [PATCH 120/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index c9a95e81..d28ff085 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -10,7 +10,7 @@ struct r2_step <: AbstractFomoMethod end fomo(nlp; kwargs...) R2(nlp; kwargs...) -A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods. +A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region steps. For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: From d6d5dc9dbcac39998137dc82e0cc6441949417d4 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 22:56:37 -0500 Subject: [PATCH 121/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index d28ff085..c96a7aff 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -12,7 +12,7 @@ struct r2_step <: AbstractFomoMethod end A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region steps. -For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: +For advanced usage, first define a `FomoSolver` or `FoSolver` to preallocate the memory used in the solver, and then call `solve!`: solver = FomoSolver(nlp) solve!(solver, nlp; kwargs...) From e9456ae879c18358e4c81566b93d8b79e83f4bfe Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:00:58 -0500 Subject: [PATCH 122/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index c96a7aff..1f2f4d4f 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -32,7 +32,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. -- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. +- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters. - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. - `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. From f39bba2848468c09d18218e681ca4fabd390ea08 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:06:07 -0500 Subject: [PATCH 123/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 1f2f4d4f..6c66ae4a 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -34,7 +34,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters. - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. -- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. +- `αmax = 1/eps(T)`: maximum step parameter for fomo solver. - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. From 5d9d12d9fd283e88df9b37a8c8d9bf9ad1fdb925 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:07:13 -0500 Subject: [PATCH 124/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 6c66ae4a..0cc734ee 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -35,7 +35,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters. - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. - `αmax = 1/eps(T)`: maximum step parameter for fomo solver. -- `max_eval::Int = -1`: maximum number of evaluation of the objective function. +- `max_eval::Int = -1`: maximum number of evaluation of the objective function (-1 means unlimited). - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. From aa48abde49cb177bd98f6baf5eb0de49cef481df Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:08:03 -0500 Subject: [PATCH 125/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index 0cc734ee..c59f9791 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -25,6 +25,7 @@ For advanced usage: Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). # Arguments + - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. # Keyword arguments From bd34530d8795d941555333913e8e3e2d6d2be7c1 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:10:04 -0500 Subject: [PATCH 126/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index c59f9791..40865fa2 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -29,6 +29,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. # Keyword arguments + - `x::V = nlp.meta.x0`: the initial guess. - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. From dd324ef3bcef8cb9de56eb85103a944e9987314a Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:10:45 -0500 Subject: [PATCH 127/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index 40865fa2..40bf8bec 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -47,6 +47,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. # Output + The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback From 6f52bcf6903d825e8a70f4305aac66801304a883 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:11:28 -0500 Subject: [PATCH 128/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index 40bf8bec..deac5778 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -51,6 +51,7 @@ Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback + The callback is called at each iteration. The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. Changing any of the input arguments will affect the subsequent iterations. From 35ced21fff1db4815c7b0e3fd71e7c1af3496e1c Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:12:15 -0500 Subject: [PATCH 129/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index deac5778..645a0d68 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -68,6 +68,7 @@ Notably, you can access, and modify, the following: - `stats.elapsed_time`: elapsed time in seconds. # Examples + ## `fomo` ```jldoctest using JSOSolvers, ADNLPModels From ed95a20c9369a174296f554792d343bf6ea4d4c5 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:12:59 -0500 Subject: [PATCH 130/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index 645a0d68..b1574045 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -70,6 +70,7 @@ Notably, you can access, and modify, the following: # Examples ## `fomo` + ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) From dfd8068b367e6baf46bb3e45aa4ae1dc715aec31 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:15:43 -0500 Subject: [PATCH 131/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index b1574045..c955b365 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -136,7 +136,7 @@ end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FomoSolver(nlp) solver_specific = Dict(:avgβmax => T(0.)) - stats = GenericExecutionStats(nlp;solver_specific=solver_specific) + stats = GenericExecutionStats(nlp; solver_specific = solver_specific) return solve!(solver, nlp, stats; kwargs...) end From 3acfac02b4b46da7bc74b34b338ce6b86280e2ac Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:16:23 -0500 Subject: [PATCH 132/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index c955b365..425ee41d 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -92,6 +92,7 @@ stats = solve!(solver, nlp) "Execution stats: first-order stationary" ``` ## `R2` + ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) From b34ac990086c3227cc7babbe55b2729f9a6921f0 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:16:50 -0500 Subject: [PATCH 133/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 425ee41d..edf16b14 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -142,7 +142,7 @@ end end function SolverCore.reset!(solver::FomoSolver{T}) where {T} - fill!(solver.m,0) + fill!(solver.m, 0) solver end From b0a04928ae3923c820443697ea7ffc83fd5653fa Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:17:15 -0500 Subject: [PATCH 134/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index edf16b14..141fe4f0 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -165,7 +165,7 @@ end @doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FoSolver(nlp) stats = GenericExecutionStats(nlp) - if haskey(kwargs,:σmin) + if haskey(kwargs, :σmin) return solve!(solver, nlp, stats; step_backend = r2_step(), αmax = 1/kwargs[:σmin], kwargs...) else return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...) From 2d6055b9689f6a705628904d0797c9d70b5fe362 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:18:07 -0500 Subject: [PATCH 135/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 141fe4f0..69426ffe 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -311,8 +311,6 @@ function SolverCore.solve!( βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) - end - if use_momentum avgβmax += βmax siter += 1 end From 090be62973c2be85cdb00dfbf42a37ee6fa352bc Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:19:48 -0500 Subject: [PATCH 136/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 69426ffe..414bfb70 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -218,7 +218,6 @@ function SolverCore.solve!( set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) - grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) From 29989a79dcffb08a9d2feff9947df127172794a8 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:20:43 -0500 Subject: [PATCH 137/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 414bfb70..79ba88b6 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -222,7 +222,7 @@ function SolverCore.solve!( norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) - solver.α = init_alpha(norm_∇fk,step_backend) + solver.α = init_alpha(norm_∇fk, step_backend) # Stopping criterion: ϵ = atol + rtol * norm_∇fk From 83c85b120ba4f66d5d72932ca26152a8da7d3c0f Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 21 Feb 2024 23:21:29 -0500 Subject: [PATCH 138/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 79ba88b6..5c200a9e 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -236,7 +236,6 @@ function SolverCore.solve!( @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α end - end if verbose > 0 && mod(stats.iter, verbose) == 0 if !use_momentum From 5de664b9e2dcdbc9e1704f5434eba1f8e74b9271 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 22 Feb 2024 00:04:15 -0500 Subject: [PATCH 139/171] Add TR solver (trust region with linear model) fix spacing update docstrings --- src/fomo.jl | 92 ++++++++++++++++++++++++++--------------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 5c200a9e..fa811924 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -2,75 +2,69 @@ export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end -abstract type AbstractFomoMethod end -struct tr_step <: AbstractFomoMethod end -struct r2_step <: AbstractFomoMethod end +abstract type AbstractFOMethod end +struct tr_step <: AbstractFOMethod end +struct r2_step <: AbstractFOMethod end """ fomo(nlp; kwargs...) R2(nlp; kwargs...) -A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region steps. +A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods. -For advanced usage, first define a `FomoSolver` or `FoSolver` to preallocate the memory used in the solver, and then call `solve!`: +For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: solver = FomoSolver(nlp) solve!(solver, nlp; kwargs...) -**Quadratic Regularization (R2)**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` method. +**No momentum**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods. For advanced usage: solver = FoSolver(nlp) - solve!(solver, nlp; kwargs...) -Extra keyword arguments `σmin` is accepted (`αmax` will be set to `1/σmin`). - + solve!(solver, nlp; step_bakckend = r2_step(),kwargs...) # for Quadratic Regularization (R2) step: s = - α .* ∇f(x) + solve!(solver, nlp; step_bakckend = tr_step(),kwargs...) # for linear model Trust Region (TR) step: s = - α .* ∇f(x) ./ ‖∇f(x)‖ + # Arguments - - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. # Keyword arguments - - `x::V = nlp.meta.x0`: the initial guess. - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. - `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. -- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization/trust region update parameters. +- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. -- `αmax = 1/eps(T)`: maximum step parameter for fomo solver. -- `max_eval::Int = -1`: maximum number of evaluation of the objective function (-1 means unlimited). +- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. +- `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. -- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk)) ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β]. -- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖, with m memory of past gradient and βmax ∈ [0,β]. +- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β]. +- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* m‖, with m memory of past gradient and βmax ∈ [0,β]. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. # Output - The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback - The callback is called at each iteration. The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. Changing any of the input arguments will affect the subsequent iterations. -In particular, setting `stats.status = :user` will stop the algorithm. +In particular, setting `stats.status = :user || stats.stats = :unknown` will stop the algorithm. All relevant information should be available in `nlp` and `solver`. Notably, you can access, and modify, the following: - `solver.x`: current iterate; - `solver.gx`: current gradient; - `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: - - `stats.dual_feas`: norm of current gradient; - - `stats.iter`: current iteration counter; - - `stats.objective`: current objective function value; - - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. - - `stats.elapsed_time`: elapsed time in seconds. + - `stats.dual_feas`: norm of current gradient; + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. # Examples - ## `fomo` - ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) @@ -92,7 +86,6 @@ stats = solve!(solver, nlp) "Execution stats: first-order stationary" ``` ## `R2` - ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) @@ -137,12 +130,12 @@ end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FomoSolver(nlp) solver_specific = Dict(:avgβmax => T(0.)) - stats = GenericExecutionStats(nlp; solver_specific = solver_specific) + stats = GenericExecutionStats(nlp;solver_specific=solver_specific) return solve!(solver, nlp, stats; kwargs...) end function SolverCore.reset!(solver::FomoSolver{T}) where {T} - fill!(solver.m, 0) + fill!(solver.m,0) solver end @@ -162,14 +155,18 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} return FoSolver{T, V}(x, g, c, T(0)) end -@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} +@doc (@doc FomoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FoSolver(nlp) stats = GenericExecutionStats(nlp) - if haskey(kwargs, :σmin) - return solve!(solver, nlp, stats; step_backend = r2_step(), αmax = 1/kwargs[:σmin], kwargs...) - else - return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...) - end + return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...) +end + +@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} + fo(nlp; step_backend = r2_step(), kwargs...) +end + +@doc (@doc FomoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} + fo(nlp; step_backend = tr_step(), kwargs...) end function SolverCore.reset!(solver::FoSolver{T}) where {T} @@ -200,7 +197,6 @@ function SolverCore.solve!( θ2::T = T(eps(T)^(1/3)), verbose::Int = 0, step_backend = r2_step(), - σmin = nothing # keep consistency with R2 interface. kwargs immutable, can't delete it in `R2` ) where {T, V} use_momentum = typeof(solver) <: FomoSolver unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") @@ -218,6 +214,7 @@ function SolverCore.solve!( set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) + grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) @@ -236,14 +233,15 @@ function SolverCore.solve!( @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α end + end if verbose > 0 && mod(stats.iter, verbose) == 0 if !use_momentum @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" "σ" "ρk" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α NaN + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α 0 else @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α NaN 0 + infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α 0 0 end end @@ -273,7 +271,7 @@ function SolverCore.solve!( oneT = T(1) mdot∇f = T(0) # dot(momentum,∇fk) while !done - λk = step_mult(solver.α,norm_d,step_backend) + λk = step_mult(solver.α, norm_d, step_backend) c .= x .- λk .* d step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0 ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk with momentum, ‖∇fk‖²λk without momentum @@ -299,7 +297,7 @@ function SolverCore.solve!( x .= c if use_momentum momentum .= ∇fk .* (oneT - β) .+ momentum .* β - mdot∇f = dot(momentum,∇fk) + mdot∇f = dot(momentum, ∇fk) end set_objective!(stats, fck) grad!(nlp, x, ∇fk) @@ -309,6 +307,8 @@ function SolverCore.solve!( βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) + end + if use_momentum avgβmax += βmax siter += 1 end @@ -343,8 +343,8 @@ function SolverCore.solve!( callback(nlp, solver, stats) - step_underflow && set_status!(stats,:small_step) - solver.α == 0 && set_status!(stats,:exception) # :small_nlstep exception should happen before + step_underflow && set_status!(stats, :small_step) + solver.α == 0 && set_status!(stats, :exception) # :small_nlstep exception should happen before done = stats.status != :unknown end @@ -357,13 +357,13 @@ function SolverCore.solve!( end """ -find_beta(m, md∇f, norm_∇f, β, θ1, θ2) +find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2) Compute βmax which saturates the contibution of the momentum term to the gradient. `βmax` is computed such that the two gradient-related conditions are ensured: -1. [(1-βmax) * ∇f(xk) + βmax * dot(m,∇f(xk))] ≥ θ1 * ‖∇f(xk)‖² -2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) + βmax * m‖ -with `m` the momentum term and `mdot∇f = dot(m,∇f(xk))` +1. [(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖² +2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖ +with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` """ function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} n1 = norm_∇f^2 - mdot∇f From 5bc9befcb112968eeaa2529f813457fe3bba8ed8 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Mon, 4 Mar 2024 14:33:19 -0500 Subject: [PATCH 140/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index fa811924..d1795391 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -18,6 +18,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i solve!(solver, nlp; kwargs...) **No momentum**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods. + For advanced usage: solver = FoSolver(nlp) From 192944ec22e15cbc0ee4ab5d492a3cb4e21235a6 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Mon, 4 Mar 2024 15:36:16 -0500 Subject: [PATCH 141/171] fix docstrings and verbose display --- src/fomo.jl | 90 +++++++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index d1795391..40b14e8c 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -3,8 +3,8 @@ export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end abstract type AbstractFOMethod end -struct tr_step <: AbstractFOMethod end -struct r2_step <: AbstractFOMethod end +struct tr_step <: AbstractFOMethod end +struct r2_step <: AbstractFOMethod end """ fomo(nlp; kwargs...) @@ -130,19 +130,19 @@ end @doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FomoSolver(nlp) - solver_specific = Dict(:avgβmax => T(0.)) - stats = GenericExecutionStats(nlp;solver_specific=solver_specific) + solver_specific = Dict(:avgβmax => T(0.0)) + stats = GenericExecutionStats(nlp; solver_specific = solver_specific) return solve!(solver, nlp, stats; kwargs...) end function SolverCore.reset!(solver::FomoSolver{T}) where {T} - fill!(solver.m,0) + fill!(solver.m, 0) solver end SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) -mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver +@doc (@doc FomoSolver) mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver x::V g::V c::V @@ -186,22 +186,23 @@ function SolverCore.solve!( rtol::T = √eps(T), η1::T = T(eps(T)^(1 / 4)), η2::T = T(0.95), - γ1::T = T(1/2), + γ1::T = T(1 / 2), γ2::T = T(2), - γ3::T = T(1/2), - αmax::T = 1/eps(T), + γ3::T = T(1 / 2), + αmax::T = 1 / eps(T), max_time::Float64 = 30.0, max_eval::Int = -1, max_iter::Int = typemax(Int), β::T = T(0.9), θ1::T = T(0.1), - θ2::T = T(eps(T)^(1/3)), + θ2::T = T(eps(T)^(1 / 3)), verbose::Int = 0, step_backend = r2_step(), ) where {T, V} use_momentum = typeof(solver) <: FomoSolver + is_r2 = typeof(step_backend) <: r2_step unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") - + reset!(stats) start_time = time() set_time!(stats, 0.0) @@ -215,34 +216,38 @@ function SolverCore.solve!( set_iter!(stats, 0) set_objective!(stats, obj(nlp, x)) - grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) set_dual_residual!(stats, norm_∇fk) solver.α = init_alpha(norm_∇fk, step_backend) - + # Stopping criterion: ϵ = atol + rtol * norm_∇fk optimal = norm_∇fk ≤ ϵ + header = ["iter", "f", "‖∇f‖", "α"] if optimal @info("Optimal point found at initial point") - if !use_momentum - @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "σ" - @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α + if is_r2 + @info @sprintf "%5s %9s %7s %7s " header... + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1 / solver.α else - @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" + @info @sprintf "%5s %9s %7s %7s " header... @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α end - end if verbose > 0 && mod(stats.iter, verbose) == 0 + push!(header, "ρk") + step_param = is_r2 ? 1 / solver.α : solver.α if !use_momentum - @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" "σ" "ρk" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α 0 + @info @sprintf "%5s %9s %7s %7s %7s " header... + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param else - @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" "α" "ρk" "βmax" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α 0 0 + push!(header, "βmax") + @info @sprintf "%5s %9s %7s %7s %7s %7s " header... + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' 0 end end @@ -305,7 +310,7 @@ function SolverCore.solve!( norm_∇fk = norm(∇fk) if use_momentum p .= momentum .- ∇fk - βmax = find_beta(p , mdot∇f, norm_∇fk, β, θ1, θ2) + βmax = find_beta(p, mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) end @@ -322,10 +327,13 @@ function SolverCore.solve!( if verbose > 0 && mod(stats.iter, verbose) == 0 @info infoline + step_param = is_r2 ? 1 / solver.α : solver.α if !use_momentum - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1/solver.α ρk + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ρk else - infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α ρk βmax + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ρk βmax end end @@ -341,11 +349,11 @@ function SolverCore.solve!( max_time = max_time, ), ) - + callback(nlp, solver, stats) - step_underflow && set_status!(stats, :small_step) - solver.α == 0 && set_status!(stats, :exception) # :small_nlstep exception should happen before + step_underflow && set_status!(stats, :small_step) + solver.α == 0 && set_status!(stats, :exception) # :small_nlstep exception should happen before done = stats.status != :unknown end @@ -362,16 +370,16 @@ find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2) Compute βmax which saturates the contibution of the momentum term to the gradient. `βmax` is computed such that the two gradient-related conditions are ensured: -1. [(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖² +1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖² 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖ with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` -""" -function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T,V} +""" +function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V} n1 = norm_∇f^2 - mdot∇f n2 = norm(p) - β1 = n1 > 0 ? (1-θ1)*norm_∇f^2/(n1) : β - β2 = n2 != 0 ? (1-θ2)*norm_∇f/(θ2*n2) : β - return min(β,min(β1,β2)) + β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / (n1) : β + β2 = n2 != 0 ? (1 - θ2) * norm_∇f / (n2) : β + return min(β, min(β1, β2)) end """ @@ -380,12 +388,12 @@ end Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. """ -function init_alpha(norm_∇fk::T, ::r2_step) where{T} - 1/2^round(log2(norm_∇fk + 1)) +function init_alpha(norm_∇fk::T, ::r2_step) where {T} + 1 / 2^round(log2(norm_∇fk + 1)) end -function init_alpha(norm_∇fk::T, ::tr_step) where{T} - norm_∇fk/2^round(log2(norm_∇fk + 1)) +function init_alpha(norm_∇fk::T, ::tr_step) where {T} + norm_∇fk / 2^round(log2(norm_∇fk + 1)) end """ @@ -394,10 +402,10 @@ end Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`). """ -function step_mult(α::T, norm_∇fk::T, ::r2_step) where{T} +function step_mult(α::T, norm_∇fk::T, ::r2_step) where {T} α end -function step_mult(α::T, norm_∇fk::T, ::tr_step) where{T} - α/norm_∇fk +function step_mult(α::T, norm_∇fk::T, ::tr_step) where {T} + α / norm_∇fk end \ No newline at end of file From ad65f30fc77035d93e23c98d81d5e1e27f6937d4 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 5 Mar 2024 14:18:18 -0500 Subject: [PATCH 142/171] update docstring, update info display, fix solver arg type, export TR --- src/fomo.jl | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 40b14e8c..e51e5b8a 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,4 +1,4 @@ -export fomo, FomoSolver, FoSolver, R2, tr_step, r2_step +export fomo, FomoSolver, FoSolver, R2, TR, tr_step, r2_step abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end @@ -10,14 +10,21 @@ struct r2_step <: AbstractFOMethod end fomo(nlp; kwargs...) R2(nlp; kwargs...) -A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region methods. +A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model. +The step is perform along d with +d = - (1-βmax) .* ∇f(xk) - βmax .* mk (1) +with mk the memory of past gradients updated at each successful iteration as +mk .= ∇f(xk) .* (1 - βmax) .+ momentum .* βmax (2) +and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied: +(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (3) +‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (4) For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: solver = FomoSolver(nlp) solve!(solver, nlp; kwargs...) -**No momentum**: if the user do not want to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods. +**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods. For advanced usage: @@ -40,8 +47,8 @@ For advanced usage: - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. -- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition #1. (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖², with m memory of past gradient and βmax ∈ [0,β]. -- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition #2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* m‖, with m memory of past gradient and βmax ∈ [0,β]. +- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (3). +- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. @@ -177,7 +184,7 @@ end SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver) function SolverCore.solve!( - solver::AbstractFirstOrderSolver, + solver::Union{FoSolver,FomoSolver}, nlp::AbstractNLPModel{T, V}, stats::GenericExecutionStats{T, V}; callback = (args...) -> nothing, @@ -225,7 +232,8 @@ function SolverCore.solve!( # Stopping criterion: ϵ = atol + rtol * norm_∇fk optimal = norm_∇fk ≤ ϵ - header = ["iter", "f", "‖∇f‖", "α"] + header = ["iter", "f", "‖∇f‖"] + is_r2 ? push!(header,"σ") : push!(header,"Δ") if optimal @info("Optimal point found at initial point") if is_r2 From d0de9dac9d7535174f378d219a4d8b575e5dff5f Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 5 Mar 2024 15:14:32 -0500 Subject: [PATCH 143/171] update docstring --- src/fomo.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index e51e5b8a..ed07d668 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -13,8 +13,8 @@ struct r2_step <: AbstractFOMethod end A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model. The step is perform along d with d = - (1-βmax) .* ∇f(xk) - βmax .* mk (1) -with mk the memory of past gradients updated at each successful iteration as -mk .= ∇f(xk) .* (1 - βmax) .+ momentum .* βmax (2) +with mk the memory of past gradients (initiated with 0) updated at each successful iteration as +mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax (2) and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied: (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (3) ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (4) From a7077c9f941677bd02a50adc93791504e8de0e2a Mon Sep 17 00:00:00 2001 From: d-monnet Date: Wed, 6 Mar 2024 12:02:58 -0500 Subject: [PATCH 144/171] fix grad and momentum dot product --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index ed07d668..d31c6b0e 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -311,12 +311,12 @@ function SolverCore.solve!( x .= c if use_momentum momentum .= ∇fk .* (oneT - β) .+ momentum .* β - mdot∇f = dot(momentum, ∇fk) end set_objective!(stats, fck) grad!(nlp, x, ∇fk) norm_∇fk = norm(∇fk) if use_momentum + mdot∇f = dot(momentum, ∇fk) p .= momentum .- ∇fk βmax = find_beta(p, mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax From 67b6da27a21be74f7ac5a63f0950d0243734b27f Mon Sep 17 00:00:00 2001 From: d-monnet Date: Wed, 6 Mar 2024 12:19:05 -0500 Subject: [PATCH 145/171] cosmetics --- src/fomo.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index d31c6b0e..0d15d1cd 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -48,7 +48,7 @@ For advanced usage: - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. - `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (3). -- `θ2::T = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). +- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. @@ -385,8 +385,8 @@ with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V} n1 = norm_∇f^2 - mdot∇f n2 = norm(p) - β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / (n1) : β - β2 = n2 != 0 ? (1 - θ2) * norm_∇f / (n2) : β + β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / n1 : β + β2 = n2 != 0 ? (1 - θ2) * norm_∇f / n2 : β return min(β, min(β1, β2)) end From 62995a4978b1779357f0cabf3e14ef12a4652371 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Thu, 7 Mar 2024 11:18:24 -0500 Subject: [PATCH 146/171] Create FoSolver docstring --- src/fomo.jl | 130 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 89 insertions(+), 41 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 0d15d1cd..2e2527e1 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -8,29 +8,26 @@ struct r2_step <: AbstractFOMethod end """ fomo(nlp; kwargs...) - R2(nlp; kwargs...) A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model. + +# Algorithm description + The step is perform along d with -d = - (1-βmax) .* ∇f(xk) - βmax .* mk (1) +d = - (1-βmax) .* ∇f(xk) - βmax .* mk with mk the memory of past gradients (initiated with 0) updated at each successful iteration as -mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax (2) +mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied: -(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (3) -‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (4) +(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (1) +‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (2) +# Advanced usage For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: solver = FomoSolver(nlp) solve!(solver, nlp; kwargs...) -**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `R2` or `TR` methods. - -For advanced usage: - - solver = FoSolver(nlp) - solve!(solver, nlp; step_bakckend = r2_step(),kwargs...) # for Quadratic Regularization (R2) step: s = - α .* ∇f(x) - solve!(solver, nlp; step_bakckend = tr_step(),kwargs...) # for linear model Trust Region (TR) step: s = - α .* ∇f(x) ./ ‖∇f(x)‖ +**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `fo` method. # Arguments - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. @@ -47,8 +44,8 @@ For advanced usage: - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. -- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (3). -- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (4). +- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (1). +- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (2). - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. @@ -91,27 +88,6 @@ stats = solve!(solver, nlp) # output -"Execution stats: first-order stationary" -``` -## `R2` -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -stats = R2(nlp) - -# output - -"Execution stats: first-order stationary" -``` - -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -solver = FoSolver(nlp); -stats = solve!(solver, nlp) - -# output - "Execution stats: first-order stationary" ``` """ @@ -149,7 +125,79 @@ end SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) -@doc (@doc FomoSolver) mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver +""" + fo(nlp; kwargs...) + R2(nlp; kwargs...) + TR(nlp; kwargs...) + +A First-Order (FO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model. + +For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = FoSolver(nlp) + solve!(solver, nlp; kwargs...) + +`R2` and `TR` runs `fo` with the dedicated `step_backend` keyword argument. + +# Arguments +- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. + +# Keyword arguments +- `x::V = nlp.meta.x0`: the initial guess. +- `atol::T = √eps(T)`: absolute tolerance. +- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. +- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. +- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. +- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. +- `max_eval::Int = -1`: maximum number of evaluation of the objective function. +- `max_time::Float64 = 30.0`: maximum time limit in seconds. +- `max_iter::Int = typemax(Int)`: maximum number of iterations. +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. +- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. + +# Output +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user || stats.stats = :unknown` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.x`: current iterate; +- `solver.gx`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.dual_feas`: norm of current gradient; + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. + +# Examples + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +stats = fo(nlp) # run with step_backend = r2_step(), equivalent to R2(nlp) + +# output + +"Execution stats: first-order stationary" +``` + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +solver = FoSolver(nlp); +stats = solve!(solver, nlp) + +# output + +"Execution stats: first-order stationary" +``` +""" +mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver x::V g::V c::V @@ -163,17 +211,17 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} return FoSolver{T, V}(x, g, c, T(0)) end -@doc (@doc FomoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} +@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FoSolver(nlp) stats = GenericExecutionStats(nlp) return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...) end -@doc (@doc FomoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} +@doc (@doc FoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} fo(nlp; step_backend = r2_step(), kwargs...) end -@doc (@doc FomoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} +@doc (@doc FoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} fo(nlp; step_backend = tr_step(), kwargs...) end @@ -184,7 +232,7 @@ end SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver) function SolverCore.solve!( - solver::Union{FoSolver,FomoSolver}, + solver::Union{FoSolver, FomoSolver}, nlp::AbstractNLPModel{T, V}, stats::GenericExecutionStats{T, V}; callback = (args...) -> nothing, @@ -233,7 +281,7 @@ function SolverCore.solve!( ϵ = atol + rtol * norm_∇fk optimal = norm_∇fk ≤ ϵ header = ["iter", "f", "‖∇f‖"] - is_r2 ? push!(header,"σ") : push!(header,"Δ") + is_r2 ? push!(header, "σ") : push!(header, "Δ") if optimal @info("Optimal point found at initial point") if is_r2 From e6f7a229fa5c5c9e9d75e63606615bba6beaf665 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Thu, 7 Mar 2024 10:37:12 -0500 Subject: [PATCH 147/171] Update test/restart.jl Co-authored-by: Tangi Migot --- test/restart.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/restart.jl b/test/restart.jl index eb770739..e6b75cc1 100644 --- a/test/restart.jl +++ b/test/restart.jl @@ -44,7 +44,7 @@ end end @testset "Test restart with a different problem: $fun" for (fun, s) in ( - (:R2, :FomoSolver), + (:R2, :FoSolver), (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), From 9126c0847f57cdef7409a4947898f6474bf0f58a Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Thu, 7 Mar 2024 10:37:22 -0500 Subject: [PATCH 148/171] Update test/restart.jl Co-authored-by: Tangi Migot --- test/restart.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/restart.jl b/test/restart.jl index e6b75cc1..38765465 100644 --- a/test/restart.jl +++ b/test/restart.jl @@ -1,5 +1,5 @@ @testset "Test restart with a different initial guess: $fun" for (fun, s) in ( - (:R2, :FomoSolver), + (:R2, :FoSolver), (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), From 06eb8f83b925f2237e434bed60024ddb3aa4b009 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Fri, 8 Mar 2024 11:31:07 -0500 Subject: [PATCH 149/171] deprecate R2Solver --- src/fomo.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fomo.jl b/src/fomo.jl index 2e2527e1..002ba712 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -211,6 +211,8 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} return FoSolver{T, V}(x, g, c, T(0)) end +Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(nlp::AbstractNLPModel; kwargs...) + @doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} solver = FoSolver(nlp) stats = GenericExecutionStats(nlp) From d6750c354a7b457cd86ba1ed03cd298fe5a5e8fc Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:13:41 -0500 Subject: [PATCH 150/171] Update src/fomo.jl Co-authored-by: Tangi Migot --- src/fomo.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/fomo.jl b/src/fomo.jl index 002ba712..9009c36a 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -211,6 +211,11 @@ function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} return FoSolver{T, V}(x, g, c, T(0)) end +""" + `R2Solver` is deprecated, please check the documentation of `R2`. +""" +mutable struct R2Solver{T, V} <: AbstractOptimizationSolver end + Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(nlp::AbstractNLPModel; kwargs...) @doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} From 39c30532f6dc1abd59d6f90147e1705bb45d5e79 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:14:27 -0500 Subject: [PATCH 151/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 9009c36a..ba22964b 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -13,7 +13,7 @@ A First-Order with MOmentum (FOMO) model-based method for unconstrained optimiza # Algorithm description -The step is perform along d with +The step is computed along d = - (1-βmax) .* ∇f(xk) - βmax .* mk with mk the memory of past gradients (initiated with 0) updated at each successful iteration as mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax From 96c82d231b443064b49649e5cfc6f4653cd37798 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:14:45 -0500 Subject: [PATCH 152/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index ba22964b..20b5cce1 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -15,7 +15,7 @@ A First-Order with MOmentum (FOMO) model-based method for unconstrained optimiza The step is computed along d = - (1-βmax) .* ∇f(xk) - βmax .* mk -with mk the memory of past gradients (initiated with 0) updated at each successful iteration as +with mk the memory of past gradients (initialized at 0), and updated at each successful iteration as mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied: (1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (1) From 0899b59e0b6fdfbd50d6027cccabf8ece2aa7d98 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:15:14 -0500 Subject: [PATCH 153/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index 20b5cce1..35d497a4 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -22,6 +22,7 @@ and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the follow ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (2) # Advanced usage + For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: solver = FomoSolver(nlp) From dbb1bbc2e9fab1d2a8783703e308a6e83bc8245b Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:15:36 -0500 Subject: [PATCH 154/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index 35d497a4..ac78ecbc 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -31,6 +31,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i **No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `fo` method. # Arguments + - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. # Keyword arguments From c00423df0d5be0a34bd4c67c65efaeb18f0de2eb Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:15:57 -0500 Subject: [PATCH 155/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/fomo.jl b/src/fomo.jl index ac78ecbc..88f42570 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -35,6 +35,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. # Keyword arguments + - `x::V = nlp.meta.x0`: the initial guess. - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. From bfec5ddc668a2e1e16df2f8945fd2ec7f1fa2e31 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:16:14 -0500 Subject: [PATCH 156/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 88f42570..edc5608a 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -432,7 +432,7 @@ function SolverCore.solve!( end """ -find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2) + find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2) Compute βmax which saturates the contibution of the momentum term to the gradient. `βmax` is computed such that the two gradient-related conditions are ensured: From 825a28ab3720fab230e9dbedb963e151154e38a9 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:16:30 -0500 Subject: [PATCH 157/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index edc5608a..6cf1e221 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -434,7 +434,7 @@ end """ find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2) -Compute βmax which saturates the contibution of the momentum term to the gradient. +Compute value `βmax` that saturates the contribution of the momentum term to the gradient. `βmax` is computed such that the two gradient-related conditions are ensured: 1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖² 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖ From 79e758d15640e2a96c72db8d1343099c0e5cf45a Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:16:48 -0500 Subject: [PATCH 158/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 6cf1e221..2043fa07 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -449,8 +449,8 @@ function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where end """ - init_alpha(norm_∇fk::T, ::r2_step) - init_alpha(norm_∇fk::T, ::tr_step) + init_alpha(norm_∇fk::T, ::r2_step) + init_alpha(norm_∇fk::T, ::tr_step) Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. """ From 3d43e3d5fe2349cbeede31bc142bc9c57aa7238d Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:17:11 -0500 Subject: [PATCH 159/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 2043fa07..b5ebcdfc 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -452,7 +452,8 @@ end init_alpha(norm_∇fk::T, ::r2_step) init_alpha(norm_∇fk::T, ::tr_step) -Initialize α step size parameter. Ensure first step is the same for quadratic regularization and trust region methods. +Initialize `α` step size parameter. +Ensure first step is the same for quadratic regularization and trust region methods. """ function init_alpha(norm_∇fk::T, ::r2_step) where {T} 1 / 2^round(log2(norm_∇fk + 1)) From 40de1e47e53b425c23c1514894401a196d7b3298 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:17:25 -0500 Subject: [PATCH 160/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index b5ebcdfc..96c994c3 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -464,8 +464,8 @@ function init_alpha(norm_∇fk::T, ::tr_step) where {T} end """ - step_mult(α::T, norm_∇fk::T, ::r2_step) - step_mult(α::T, norm_∇fk::T, ::tr_step) + step_mult(α::T, norm_∇fk::T, ::r2_step) + step_mult(α::T, norm_∇fk::T, ::tr_step) Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`). """ From fac145eda2f74d5293b1e65d9ae22198ddabfe89 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:17:48 -0500 Subject: [PATCH 161/171] Update src/fomo.jl Co-authored-by: Dominique --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 96c994c3..917f1fff 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -43,7 +43,7 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. - `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. - `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. -- `max_eval::Int = -1`: maximum number of evaluation of the objective function. +- `max_eval::Int = -1`: maximum number of objective evaluations. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. - `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. From bbc97a017da79181a861f5a2587493d8b47c8780 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 9 Mar 2024 16:19:42 -0500 Subject: [PATCH 162/171] Apply suggestions from code review Co-authored-by: Dominique --- src/fomo.jl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 917f1fff..560d7505 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -46,16 +46,18 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `max_eval::Int = -1`: maximum number of objective evaluations. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `β = T(0.9) ∈ [0,1)` : target decay rate for the momentum. -- `θ1 = T(0.1)` : momentum contribution parameter for convergence condition (1). -- `θ2 = T(eps(T)^(1/3))` : momentum contribution parameter for convergence condition (2). +- `β = T(0.9) ∈ [0,1)`: target decay rate for the momentum. +- `θ1 = T(0.1)`: momentum contribution parameter for convergence condition (1). +- `θ2 = T(eps(T)^(1/3))`: momentum contribution parameter for convergence condition (2). - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. # Output + The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback + The callback is called at each iteration. The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. Changing any of the input arguments will affect the subsequent iterations. @@ -72,7 +74,9 @@ Notably, you can access, and modify, the following: - `stats.elapsed_time`: elapsed time in seconds. # Examples + ## `fomo` + ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)) @@ -143,9 +147,11 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i `R2` and `TR` runs `fo` with the dedicated `step_backend` keyword argument. # Arguments + - `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. # Keyword arguments + - `x::V = nlp.meta.x0`: the initial guess. - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. @@ -159,9 +165,11 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. # Output + The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. # Callback + The callback is called at each iteration. The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. Changing any of the input arguments will affect the subsequent iterations. From 9035f4004c878674cd1024973dc02e11293b04cb Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 9 Mar 2024 17:34:27 -0500 Subject: [PATCH 163/171] exports fo --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 560d7505..30acd96b 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,4 +1,4 @@ -export fomo, FomoSolver, FoSolver, R2, TR, tr_step, r2_step +export fomo, FomoSolver, FoSolver, fo, R2, TR, tr_step, r2_step abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end From ea3fcf7313ca1215e03131ef2e9592cf92a7ad79 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 9 Mar 2024 17:58:21 -0500 Subject: [PATCH 164/171] remove header to test allocation --- src/fomo.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 30acd96b..1d098ba9 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -298,15 +298,15 @@ function SolverCore.solve!( # Stopping criterion: ϵ = atol + rtol * norm_∇fk optimal = norm_∇fk ≤ ϵ - header = ["iter", "f", "‖∇f‖"] - is_r2 ? push!(header, "σ") : push!(header, "Δ") + #header = ["iter", "f", "‖∇f‖"] + #is_r2 ? push!(header, "σ") : push!(header, "Δ") if optimal @info("Optimal point found at initial point") if is_r2 - @info @sprintf "%5s %9s %7s %7s " header... + @info @sprintf "%5s %9s %7s %7s " #header... @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1 / solver.α else - @info @sprintf "%5s %9s %7s %7s " header... + @info @sprintf "%5s %9s %7s %7s " #header... @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α end end @@ -314,12 +314,12 @@ function SolverCore.solve!( push!(header, "ρk") step_param = is_r2 ? 1 / solver.α : solver.α if !use_momentum - @info @sprintf "%5s %9s %7s %7s %7s " header... + @info @sprintf "%5s %9s %7s %7s %7s " #header... infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param else push!(header, "βmax") - @info @sprintf "%5s %9s %7s %7s %7s %7s " header... + @info @sprintf "%5s %9s %7s %7s %7s %7s " #header... infoline = @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' 0 end From 3984fe15eaa9d1d2a3e75512d667e76ffed8a9f7 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 9 Mar 2024 18:00:50 -0500 Subject: [PATCH 165/171] update readme --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d7556a3e..cd21ff94 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,8 @@ This package provides an implementation of four classic algorithms for unconstra > high-order regularized models. *Mathematical Programming*, 163(1), 359-368. > DOI: [10.1007/s10107-016-1065-8](https://doi.org/10.1007/s10107-016-1065-8) - +- `fomo`: a first-order method with momentum for unconstrained optimization; + - `tron`: a pure Julia implementation of TRON, a trust-region solver for bound-constrained optimization described in > Chih-Jen Lin and Jorge J. Moré, *Newton's Method for Large Bound-Constrained From fb2525ed51bb20b79864c28409edf4973381757b Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 9 Mar 2024 18:21:15 -0500 Subject: [PATCH 166/171] fix header allocation --- src/fomo.jl | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 1d098ba9..d3f3132c 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -298,30 +298,28 @@ function SolverCore.solve!( # Stopping criterion: ϵ = atol + rtol * norm_∇fk optimal = norm_∇fk ≤ ϵ - #header = ["iter", "f", "‖∇f‖"] - #is_r2 ? push!(header, "σ") : push!(header, "Δ") + step_param_name = is_r2 ? "σ" : "Δ" if optimal @info("Optimal point found at initial point") if is_r2 - @info @sprintf "%5s %9s %7s %7s " #header... + @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" step_param_name @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1 / solver.α else - @info @sprintf "%5s %9s %7s %7s " #header... + @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" step_param_name @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α end - end - if verbose > 0 && mod(stats.iter, verbose) == 0 - push!(header, "ρk") - step_param = is_r2 ? 1 / solver.α : solver.α - if !use_momentum - @info @sprintf "%5s %9s %7s %7s %7s " #header... - infoline = - @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param - else - push!(header, "βmax") - @info @sprintf "%5s %9s %7s %7s %7s %7s " #header... - infoline = - @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' 0 + else + if verbose > 0 && mod(stats.iter, verbose) == 0 + step_param = is_r2 ? 1 / solver.α : solver.α + if !use_momentum + @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" step_param_name "ρk" + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' + else + @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" step_param_name "ρk" "βmax" + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' 0 + end end end From 04f7709de726bb7b1cb5ef8dd7efac8bcc3694dc Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 9 Mar 2024 18:36:14 -0500 Subject: [PATCH 167/171] try to fix fomo allocation --- src/fomo.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index d3f3132c..9c9e39cc 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -382,7 +382,8 @@ function SolverCore.solve!( if use_momentum mdot∇f = dot(momentum, ∇fk) p .= momentum .- ∇fk - βmax = find_beta(p, mdot∇f, norm_∇fk, β, θ1, θ2) + diff_norm = norm(p) + βmax = find_beta(diff_norm, mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) end @@ -446,11 +447,10 @@ Compute value `βmax` that saturates the contribution of the momentum term to th 2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖ with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` """ -function find_beta(p::V, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V} +function find_beta(diff_norm::T, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V} n1 = norm_∇f^2 - mdot∇f - n2 = norm(p) β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / n1 : β - β2 = n2 != 0 ? (1 - θ2) * norm_∇f / n2 : β + β2 = n2 != 0 ? (1 - θ2) * norm_∇f / diff_norm : β return min(β, min(β1, β2)) end From a0324fdd18fde8f111ec221f8d1a839114f6b0f3 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Sat, 9 Mar 2024 18:45:46 -0500 Subject: [PATCH 168/171] fix find_beta --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 9c9e39cc..331c7214 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -450,7 +450,7 @@ with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` function find_beta(diff_norm::T, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V} n1 = norm_∇f^2 - mdot∇f β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / n1 : β - β2 = n2 != 0 ? (1 - θ2) * norm_∇f / diff_norm : β + β2 = diff_norm != 0 ? (1 - θ2) * norm_∇f / diff_norm : β return min(β, min(β1, β2)) end From a5c8c7d4d6de4f84805a72d462aea9633734e0dd Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 12 Mar 2024 16:29:35 -0400 Subject: [PATCH 169/171] remove unecessary if condition, comment out line causing possible allocation --- src/fomo.jl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/fomo.jl b/src/fomo.jl index 331c7214..72f68209 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -345,7 +345,7 @@ function SolverCore.solve!( βmax = T(0) ρk = T(0) avgβmax = T(0) - siter = 0 + siter::Int = 0 oneT = T(1) mdot∇f = T(0) # dot(momentum,∇fk) while !done @@ -386,8 +386,6 @@ function SolverCore.solve!( βmax = find_beta(diff_norm, mdot∇f, norm_∇fk, β, θ1, θ2) d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax norm_d = norm(d) - end - if use_momentum avgβmax += βmax siter += 1 end @@ -432,7 +430,7 @@ function SolverCore.solve!( end if use_momentum avgβmax /= siter - stats.solver_specific[:avgβmax] = avgβmax + # stats.solver_specific[:avgβmax] = avgβmax end set_solution!(stats, x) return stats From eee3823ab9cd3ec2bfee29813a922164995d27f5 Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Wed, 13 Mar 2024 11:51:09 -0400 Subject: [PATCH 170/171] Update src/fomo.jl Co-authored-by: Tangi Migot --- src/fomo.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fomo.jl b/src/fomo.jl index 72f68209..d8e206bb 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -430,7 +430,7 @@ function SolverCore.solve!( end if use_momentum avgβmax /= siter - # stats.solver_specific[:avgβmax] = avgβmax + set_solver_specific!(stats, :avgβmax, avgβmax) end set_solution!(stats, x) return stats From d5f409dc5bc34dd6f52911b7826511f8977dd767 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Wed, 13 Mar 2024 11:57:19 -0400 Subject: [PATCH 171/171] fix allocation tests: pre-allocate solver_specific field in stats. --- test/allocs.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/allocs.jl b/test/allocs.jl index 1fe38f23..f5768c4a 100644 --- a/test/allocs.jl +++ b/test/allocs.jl @@ -35,7 +35,12 @@ if Sys.isunix() nlp = eval(Meta.parse(model))() if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver)) solver = eval(symsolver)(nlp) - stats = GenericExecutionStats(nlp) + if symsolver == :FomoSolver + T = eltype(nlp.meta.x0) + stats = GenericExecutionStats(nlp, solver_specific = Dict(:avgβmax => T(0))) + else + stats = GenericExecutionStats(nlp) + end with_logger(NullLogger()) do SolverCore.solve!(solver, nlp, stats) reset!(solver)