From 3ed9f921850a2ccefc0e9a74c00b6a2fd0c0daaa Mon Sep 17 00:00:00 2001 From: d-monnet <70266099+d-monnet@users.noreply.github.com> Date: Sat, 16 Mar 2024 12:29:42 -0400 Subject: [PATCH] First-order with momentum (#250) Co-authored-by: Dominique Co-authored-by: tmigot --- README.md | 3 +- docs/src/solvers.md | 4 +- src/JSOSolvers.jl | 2 +- src/R2.jl | 231 --------------------- src/fomo.jl | 482 +++++++++++++++++++++++++++++++++++++++++++ test/allocs.jl | 9 +- test/callback.jl | 5 + test/consistency.jl | 5 +- test/restart.jl | 6 +- test/runtests.jl | 2 +- test/test_solvers.jl | 2 + 11 files changed, 510 insertions(+), 241 deletions(-) delete mode 100644 src/R2.jl create mode 100644 src/fomo.jl diff --git a/README.md b/README.md index d7556a3e..cd21ff94 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,8 @@ This package provides an implementation of four classic algorithms for unconstra > high-order regularized models. *Mathematical Programming*, 163(1), 359-368. > DOI: [10.1007/s10107-016-1065-8](https://doi.org/10.1007/s10107-016-1065-8) - +- `fomo`: a first-order method with momentum for unconstrained optimization; + - `tron`: a pure Julia implementation of TRON, a trust-region solver for bound-constrained optimization described in > Chih-Jen Lin and Jorge J. Moré, *Newton's Method for Large Bound-Constrained diff --git a/docs/src/solvers.md b/docs/src/solvers.md index 06fe0eed..322f7c2e 100644 --- a/docs/src/solvers.md +++ b/docs/src/solvers.md @@ -6,10 +6,11 @@ - [`tron`](@ref) - [`trunk`](@ref) - [`R2`](@ref) +- [`fomo`](@ref) | Problem type | Solvers | | --------------------- | -------- | -| Unconstrained NLP | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref)| +| Unconstrained NLP | [`lbfgs`](@ref), [`tron`](@ref), [`trunk`](@ref), [`R2`](@ref), [`fomo`](@ref)| | Unconstrained NLS | [`trunk`](@ref), [`tron`](@ref) | | Bound-constrained NLP | [`tron`](@ref) | | Bound-constrained NLS | [`tron`](@ref) | @@ -21,4 +22,5 @@ lbfgs tron trunk R2 +fomo ``` diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl index cd65c9b2..85afc4fe 100644 --- a/src/JSOSolvers.jl +++ b/src/JSOSolvers.jl @@ -13,7 +13,7 @@ export solve! # Unconstrained solvers include("lbfgs.jl") include("trunk.jl") -include("R2.jl") +include("fomo.jl") # Unconstrained solvers for NLS include("trunkls.jl") diff --git a/src/R2.jl b/src/R2.jl deleted file mode 100644 index b7304dc3..00000000 --- a/src/R2.jl +++ /dev/null @@ -1,231 +0,0 @@ -export R2, R2Solver - -""" - R2(nlp; kwargs...) - -A first-order quadratic regularization method for unconstrained optimization. - -For advanced usage, first define a `R2Solver` to preallocate the memory used in the algorithm, and then call `solve!`: - - solver = R2Solver(nlp) - solve!(solver, nlp; kwargs...) - -# Arguments -- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. - -# Keyword arguments -- `x::V = nlp.meta.x0`: the initial guess. -- `atol::T = √eps(T)`: absolute tolerance. -- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. -- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. -- `γ1 = T(1/2)`, `γ2 = 1/γ1`: regularization update parameters. -- `αmax = 1/eps(T)`: maximum value for step size parameter for R2 algorithm. -- `max_eval::Int = -1`: maximum number of evaluation of the objective function. -- `max_time::Float64 = 30.0`: maximum time limit in seconds. -- `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `β = T(0) ∈ [0,1]` is the constant in the momentum term. If `β == 0`, R2 does not use momentum. -- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - -# Output -The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. - -# Callback -The callback is called at each iteration. -The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. -Changing any of the input arguments will affect the subsequent iterations. -In particular, setting `stats.status = :user` will stop the algorithm. -All relevant information should be available in `nlp` and `solver`. -Notably, you can access, and modify, the following: -- `solver.x`: current iterate; -- `solver.gx`: current gradient; -- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: - - `stats.dual_feas`: norm of current gradient; - - `stats.iter`: current iteration counter; - - `stats.objective`: current objective function value; - - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. - - `stats.elapsed_time`: elapsed time in seconds. - -# Examples -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -stats = R2(nlp) - -# output - -"Execution stats: first-order stationary" -``` - -```jldoctest -using JSOSolvers, ADNLPModels -nlp = ADNLPModel(x -> sum(x.^2), ones(3)) -solver = R2Solver(nlp); -stats = solve!(solver, nlp) - -# output - -"Execution stats: first-order stationary" -``` -""" -mutable struct R2Solver{T, V} <: AbstractOptimizationSolver - x::V - gx::V - cx::V - d::V # used for momentum term - α::T -end - -function R2Solver(nlp::AbstractNLPModel{T, V}) where {T, V} - x = similar(nlp.meta.x0) - gx = similar(nlp.meta.x0) - cx = similar(nlp.meta.x0) - d = fill!(similar(nlp.meta.x0), 0) - α = zero(T) # init it to zero for now - return R2Solver{T, V}(x, gx, cx, d, α) -end - -@doc (@doc R2Solver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} - solver = R2Solver(nlp) - return solve!(solver, nlp; kwargs...) -end - -function SolverCore.reset!(solver::R2Solver{T}) where {T} - solver.d .= zero(T) - solver -end -SolverCore.reset!(solver::R2Solver, ::AbstractNLPModel) = reset!(solver) - -function SolverCore.solve!( - solver::R2Solver{T, V}, - nlp::AbstractNLPModel{T, V}, - stats::GenericExecutionStats{T, V}; - callback = (args...) -> nothing, - x::V = nlp.meta.x0, - atol::T = √eps(T), - rtol::T = √eps(T), - η1 = eps(T)^(1 / 4), - η2 = T(0.95), - γ1 = T(1 / 2), - γ2 = 1 / γ1, - αmax = T(Inf), - max_time::Float64 = 30.0, - max_eval::Int = -1, - max_iter::Int = typemax(Int), - β::T = T(0), - verbose::Int = 0, -) where {T, V} - unconstrained(nlp) || error("R2 should only be called on unconstrained problems.") - - reset!(stats) - start_time = time() - set_time!(stats, 0.0) - - x = solver.x .= x - ∇fk = solver.gx - ck = solver.cx - d = solver.d - αk = solver.α - - set_iter!(stats, 0) - set_objective!(stats, obj(nlp, x)) - - grad!(nlp, x, ∇fk) - norm_∇fk = norm(∇fk) - set_dual_residual!(stats, norm_∇fk) - - αk = 1 / 2^round(log2(norm_∇fk + 1)) - # Stopping criterion: - ϵ = atol + rtol * norm_∇fk - optimal = norm_∇fk ≤ ϵ - if optimal - @info("Optimal point found at initial point") - @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" - @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk - end - if verbose > 0 && mod(stats.iter, verbose) == 0 - @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" "α" - infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk - end - - set_status!( - stats, - get_status( - nlp, - elapsed_time = stats.elapsed_time, - optimal = optimal, - max_eval = max_eval, - iter = stats.iter, - max_iter = max_iter, - max_time = max_time, - ), - ) - - solver.α = αk - callback(nlp, solver, stats) - αk = solver.α - - done = stats.status != :unknown - - while !done - if β == 0 - ck .= x .- (∇fk .* αk) - else - d .= ∇fk .* (T(1) - β) .+ d .* β - ck .= x .- (d .* αk) - end - ΔTk = norm_∇fk^2 * αk - fck = obj(nlp, ck) - if fck == -Inf - set_status!(stats, :unbounded) - break - end - - ρk = (stats.objective - fck) / ΔTk - - # Update regularization parameters - if ρk >= η2 - αk = min(αmax, γ2 * αk) - elseif ρk < η1 - αk = αk * γ1 - end - - # Acceptance of the new candidate - if ρk >= η1 - x .= ck - set_objective!(stats, fck) - grad!(nlp, x, ∇fk) - norm_∇fk = norm(∇fk) - end - - set_iter!(stats, stats.iter + 1) - set_time!(stats, time() - start_time) - set_dual_residual!(stats, norm_∇fk) - optimal = norm_∇fk ≤ ϵ - - if verbose > 0 && mod(stats.iter, verbose) == 0 - @info infoline - infoline = @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk αk - end - - set_status!( - stats, - get_status( - nlp, - elapsed_time = stats.elapsed_time, - optimal = optimal, - max_eval = max_eval, - iter = stats.iter, - max_iter = max_iter, - max_time = max_time, - ), - ) - solver.α = αk - callback(nlp, solver, stats) - αk = solver.α - - done = stats.status != :unknown - end - - set_solution!(stats, x) - return stats -end diff --git a/src/fomo.jl b/src/fomo.jl new file mode 100644 index 00000000..d8e206bb --- /dev/null +++ b/src/fomo.jl @@ -0,0 +1,482 @@ +export fomo, FomoSolver, FoSolver, fo, R2, TR, tr_step, r2_step + +abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end + +abstract type AbstractFOMethod end +struct tr_step <: AbstractFOMethod end +struct r2_step <: AbstractFOMethod end + +""" + fomo(nlp; kwargs...) + +A First-Order with MOmentum (FOMO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model. + +# Algorithm description + +The step is computed along +d = - (1-βmax) .* ∇f(xk) - βmax .* mk +with mk the memory of past gradients (initialized at 0), and updated at each successful iteration as +mk .= ∇f(xk) .* (1 - βmax) .+ mk .* βmax +and βmax ∈ [0,β] chosen as to ensure d is gradient-related, i.e., the following 2 conditions are satisfied: +(1-βmax) .* ∇f(xk) + βmax .* ∇f(xk)ᵀmk ≥ θ1 * ‖∇f(xk)‖² (1) +‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) *. ∇f(xk) + βmax .* mk‖ (2) + +# Advanced usage + +For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = FomoSolver(nlp) + solve!(solver, nlp; kwargs...) + +**No momentum**: if the user does not whish to use momentum (`β` = 0), it is recommended to use the memory-optimized `fo` method. + +# Arguments + +- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. + +# Keyword arguments + +- `x::V = nlp.meta.x0`: the initial guess. +- `atol::T = √eps(T)`: absolute tolerance. +- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. +- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. +- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. +- `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. +- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. +- `max_eval::Int = -1`: maximum number of objective evaluations. +- `max_time::Float64 = 30.0`: maximum time limit in seconds. +- `max_iter::Int = typemax(Int)`: maximum number of iterations. +- `β = T(0.9) ∈ [0,1)`: target decay rate for the momentum. +- `θ1 = T(0.1)`: momentum contribution parameter for convergence condition (1). +- `θ2 = T(eps(T)^(1/3))`: momentum contribution parameter for convergence condition (2). +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. +- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. + +# Output + +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback + +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user || stats.stats = :unknown` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.x`: current iterate; +- `solver.gx`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.dual_feas`: norm of current gradient; + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. + +# Examples + +## `fomo` + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +stats = fomo(nlp) + +# output + +"Execution stats: first-order stationary" +``` + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +solver = FomoSolver(nlp); +stats = solve!(solver, nlp) + +# output + +"Execution stats: first-order stationary" +``` +""" +mutable struct FomoSolver{T, V} <: AbstractFirstOrderSolver + x::V + g::V + c::V + m::V + d::V + p::V + α::T +end + +function FomoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} + x = similar(nlp.meta.x0) + g = similar(nlp.meta.x0) + c = similar(nlp.meta.x0) + m = fill!(similar(nlp.meta.x0), 0) + d = fill!(similar(nlp.meta.x0), 0) + p = similar(nlp.meta.x0) + return FomoSolver{T, V}(x, g, c, m, d, p, T(0)) +end + +@doc (@doc FomoSolver) function fomo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} + solver = FomoSolver(nlp) + solver_specific = Dict(:avgβmax => T(0.0)) + stats = GenericExecutionStats(nlp; solver_specific = solver_specific) + return solve!(solver, nlp, stats; kwargs...) +end + +function SolverCore.reset!(solver::FomoSolver{T}) where {T} + fill!(solver.m, 0) + solver +end + +SolverCore.reset!(solver::FomoSolver, ::AbstractNLPModel) = reset!(solver) + +""" + fo(nlp; kwargs...) + R2(nlp; kwargs...) + TR(nlp; kwargs...) + +A First-Order (FO) model-based method for unconstrained optimization. Supports quadratic regularization and trust region method with linear model. + +For advanced usage, first define a `FomoSolver` to preallocate the memory used in the algorithm, and then call `solve!`: + + solver = FoSolver(nlp) + solve!(solver, nlp; kwargs...) + +`R2` and `TR` runs `fo` with the dedicated `step_backend` keyword argument. + +# Arguments + +- `nlp::AbstractNLPModel{T, V}` is the model to solve, see `NLPModels.jl`. + +# Keyword arguments + +- `x::V = nlp.meta.x0`: the initial guess. +- `atol::T = √eps(T)`: absolute tolerance. +- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. +- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. +- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. +- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. +- `max_eval::Int = -1`: maximum number of evaluation of the objective function. +- `max_time::Float64 = 30.0`: maximum time limit in seconds. +- `max_iter::Int = typemax(Int)`: maximum number of iterations. +- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. +- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. + +# Output + +The value returned is a `GenericExecutionStats`, see `SolverCore.jl`. + +# Callback + +The callback is called at each iteration. +The expected signature of the callback is `callback(nlp, solver, stats)`, and its output is ignored. +Changing any of the input arguments will affect the subsequent iterations. +In particular, setting `stats.status = :user || stats.stats = :unknown` will stop the algorithm. +All relevant information should be available in `nlp` and `solver`. +Notably, you can access, and modify, the following: +- `solver.x`: current iterate; +- `solver.gx`: current gradient; +- `stats`: structure holding the output of the algorithm (`GenericExecutionStats`), which contains, among other things: + - `stats.dual_feas`: norm of current gradient; + - `stats.iter`: current iteration counter; + - `stats.objective`: current objective function value; + - `stats.status`: current status of the algorithm. Should be `:unknown` unless the algorithm has attained a stopping criterion. Changing this to anything will stop the algorithm, but you should use `:user` to properly indicate the intention. + - `stats.elapsed_time`: elapsed time in seconds. + +# Examples + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +stats = fo(nlp) # run with step_backend = r2_step(), equivalent to R2(nlp) + +# output + +"Execution stats: first-order stationary" +``` + +```jldoctest +using JSOSolvers, ADNLPModels +nlp = ADNLPModel(x -> sum(x.^2), ones(3)) +solver = FoSolver(nlp); +stats = solve!(solver, nlp) + +# output + +"Execution stats: first-order stationary" +``` +""" +mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver + x::V + g::V + c::V + α::T +end + +function FoSolver(nlp::AbstractNLPModel{T, V}) where {T, V} + x = similar(nlp.meta.x0) + g = similar(nlp.meta.x0) + c = similar(nlp.meta.x0) + return FoSolver{T, V}(x, g, c, T(0)) +end + +""" + `R2Solver` is deprecated, please check the documentation of `R2`. +""" +mutable struct R2Solver{T, V} <: AbstractOptimizationSolver end + +Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(nlp::AbstractNLPModel; kwargs...) + +@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} + solver = FoSolver(nlp) + stats = GenericExecutionStats(nlp) + return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...) +end + +@doc (@doc FoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} + fo(nlp; step_backend = r2_step(), kwargs...) +end + +@doc (@doc FoSolver) function TR(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} + fo(nlp; step_backend = tr_step(), kwargs...) +end + +function SolverCore.reset!(solver::FoSolver{T}) where {T} + solver +end + +SolverCore.reset!(solver::FoSolver, ::AbstractNLPModel) = reset!(solver) + +function SolverCore.solve!( + solver::Union{FoSolver, FomoSolver}, + nlp::AbstractNLPModel{T, V}, + stats::GenericExecutionStats{T, V}; + callback = (args...) -> nothing, + x::V = nlp.meta.x0, + atol::T = √eps(T), + rtol::T = √eps(T), + η1::T = T(eps(T)^(1 / 4)), + η2::T = T(0.95), + γ1::T = T(1 / 2), + γ2::T = T(2), + γ3::T = T(1 / 2), + αmax::T = 1 / eps(T), + max_time::Float64 = 30.0, + max_eval::Int = -1, + max_iter::Int = typemax(Int), + β::T = T(0.9), + θ1::T = T(0.1), + θ2::T = T(eps(T)^(1 / 3)), + verbose::Int = 0, + step_backend = r2_step(), +) where {T, V} + use_momentum = typeof(solver) <: FomoSolver + is_r2 = typeof(step_backend) <: r2_step + unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") + + reset!(stats) + start_time = time() + set_time!(stats, 0.0) + + x = solver.x .= x + ∇fk = solver.g + c = solver.c + momentum = use_momentum ? solver.m : nothing # not used if no momentum + d = use_momentum ? solver.d : solver.g # g = d if no momentum + p = use_momentum ? solver.p : nothing # not used if no momentum + set_iter!(stats, 0) + set_objective!(stats, obj(nlp, x)) + + grad!(nlp, x, ∇fk) + norm_∇fk = norm(∇fk) + set_dual_residual!(stats, norm_∇fk) + + solver.α = init_alpha(norm_∇fk, step_backend) + + # Stopping criterion: + ϵ = atol + rtol * norm_∇fk + optimal = norm_∇fk ≤ ϵ + step_param_name = is_r2 ? "σ" : "Δ" + if optimal + @info("Optimal point found at initial point") + if is_r2 + @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" step_param_name + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk 1 / solver.α + else + @info @sprintf "%5s %9s %7s %7s " "iter" "f" "‖∇f‖" step_param_name + @info @sprintf "%5d %9.2e %7.1e %7.1e" stats.iter stats.objective norm_∇fk solver.α + end + else + if verbose > 0 && mod(stats.iter, verbose) == 0 + step_param = is_r2 ? 1 / solver.α : solver.α + if !use_momentum + @info @sprintf "%5s %9s %7s %7s %7s " "iter" "f" "‖∇f‖" step_param_name "ρk" + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' + else + @info @sprintf "%5s %9s %7s %7s %7s %7s " "iter" "f" "‖∇f‖" step_param_name "ρk" "βmax" + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ' ' 0 + end + end + end + + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + optimal = optimal, + max_eval = max_eval, + iter = stats.iter, + max_iter = max_iter, + max_time = max_time, + ), + ) + + callback(nlp, solver, stats) + + done = stats.status != :unknown + + d .= ∇fk + norm_d = norm_∇fk + βmax = T(0) + ρk = T(0) + avgβmax = T(0) + siter::Int = 0 + oneT = T(1) + mdot∇f = T(0) # dot(momentum,∇fk) + while !done + λk = step_mult(solver.α, norm_d, step_backend) + c .= x .- λk .* d + step_underflow = x == c # step addition underfow on every dimensions, should happen before solver.α == 0 + ΔTk = ((oneT - βmax) * norm_∇fk^2 + βmax * mdot∇f) * λk # = dot(d,∇fk) * λk with momentum, ‖∇fk‖²λk without momentum + fck = obj(nlp, c) + if fck == -Inf + set_status!(stats, :unbounded) + break + end + ρk = (stats.objective - fck) / ΔTk + # Update regularization parameters + if ρk >= η2 + solver.α = min(αmax, γ2 * solver.α) + elseif ρk < η1 + solver.α = solver.α * γ1 + if use_momentum + βmax *= γ3 + d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax + end + end + + # Acceptance of the new candidate + if ρk >= η1 + x .= c + if use_momentum + momentum .= ∇fk .* (oneT - β) .+ momentum .* β + end + set_objective!(stats, fck) + grad!(nlp, x, ∇fk) + norm_∇fk = norm(∇fk) + if use_momentum + mdot∇f = dot(momentum, ∇fk) + p .= momentum .- ∇fk + diff_norm = norm(p) + βmax = find_beta(diff_norm, mdot∇f, norm_∇fk, β, θ1, θ2) + d .= ∇fk .* (oneT - βmax) .+ momentum .* βmax + norm_d = norm(d) + avgβmax += βmax + siter += 1 + end + end + + set_iter!(stats, stats.iter + 1) + set_time!(stats, time() - start_time) + set_dual_residual!(stats, norm_∇fk) + optimal = norm_∇fk ≤ ϵ + + if verbose > 0 && mod(stats.iter, verbose) == 0 + @info infoline + step_param = is_r2 ? 1 / solver.α : solver.α + if !use_momentum + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ρk + else + infoline = + @sprintf "%5d %9.2e %7.1e %7.1e %7.1e %7.1e" stats.iter stats.objective norm_∇fk step_param ρk βmax + end + end + + set_status!( + stats, + get_status( + nlp, + elapsed_time = stats.elapsed_time, + optimal = optimal, + max_eval = max_eval, + iter = stats.iter, + max_iter = max_iter, + max_time = max_time, + ), + ) + + callback(nlp, solver, stats) + + step_underflow && set_status!(stats, :small_step) + solver.α == 0 && set_status!(stats, :exception) # :small_nlstep exception should happen before + + done = stats.status != :unknown + end + if use_momentum + avgβmax /= siter + set_solver_specific!(stats, :avgβmax, avgβmax) + end + set_solution!(stats, x) + return stats +end + +""" + find_beta(m, mdot∇f, norm_∇f, β, θ1, θ2) + +Compute value `βmax` that saturates the contribution of the momentum term to the gradient. +`βmax` is computed such that the two gradient-related conditions are ensured: +1. (1-βmax) * ‖∇f(xk)‖² + βmax * ∇f(xk)ᵀm ≥ θ1 * ‖∇f(xk)‖² +2. ‖∇f(xk)‖ ≥ θ2 * ‖(1-βmax) * ∇f(xk) .+ βmax .* m‖ +with `m` the momentum term and `mdot∇f = ∇f(xk)ᵀm` +""" +function find_beta(diff_norm::T, mdot∇f::T, norm_∇f::T, β::T, θ1::T, θ2::T) where {T, V} + n1 = norm_∇f^2 - mdot∇f + β1 = n1 > 0 ? (1 - θ1) * norm_∇f^2 / n1 : β + β2 = diff_norm != 0 ? (1 - θ2) * norm_∇f / diff_norm : β + return min(β, min(β1, β2)) +end + +""" + init_alpha(norm_∇fk::T, ::r2_step) + init_alpha(norm_∇fk::T, ::tr_step) + +Initialize `α` step size parameter. +Ensure first step is the same for quadratic regularization and trust region methods. +""" +function init_alpha(norm_∇fk::T, ::r2_step) where {T} + 1 / 2^round(log2(norm_∇fk + 1)) +end + +function init_alpha(norm_∇fk::T, ::tr_step) where {T} + norm_∇fk / 2^round(log2(norm_∇fk + 1)) +end + +""" + step_mult(α::T, norm_∇fk::T, ::r2_step) + step_mult(α::T, norm_∇fk::T, ::tr_step) + +Compute step size multiplier: `α` for quadratic regularization(`::r2` and `::R2og`) and `α/norm_∇fk` for trust region (`::tr`). +""" +function step_mult(α::T, norm_∇fk::T, ::r2_step) where {T} + α +end + +function step_mult(α::T, norm_∇fk::T, ::tr_step) where {T} + α / norm_∇fk +end \ No newline at end of file diff --git a/test/allocs.jl b/test/allocs.jl index b02b6621..f5768c4a 100644 --- a/test/allocs.jl +++ b/test/allocs.jl @@ -30,12 +30,17 @@ end if Sys.isunix() @testset "Allocation tests" begin - @testset "$symsolver" for symsolver in (:LBFGSSolver, :R2Solver, :TrunkSolver, :TronSolver) + @testset "$symsolver" for symsolver in (:LBFGSSolver, :FoSolver, :FomoSolver, :TrunkSolver, :TronSolver) for model in NLPModelsTest.nlp_problems nlp = eval(Meta.parse(model))() if unconstrained(nlp) || (bound_constrained(nlp) && (symsolver == :TronSolver)) solver = eval(symsolver)(nlp) - stats = GenericExecutionStats(nlp) + if symsolver == :FomoSolver + T = eltype(nlp.meta.x0) + stats = GenericExecutionStats(nlp, solver_specific = Dict(:avgβmax => T(0))) + else + stats = GenericExecutionStats(nlp) + end with_logger(NullLogger()) do SolverCore.solve!(solver, nlp, stats) reset!(solver) diff --git a/test/callback.jl b/test/callback.jl index f43796fd..ddadc799 100644 --- a/test/callback.jl +++ b/test/callback.jl @@ -31,6 +31,11 @@ using ADNLPModels, JSOSolvers, LinearAlgebra, Logging #, Plots tron(nlp, callback = cb) end @test stats.iter == 8 + + stats = with_logger(NullLogger()) do + fomo(nlp, callback = cb) + end + @test stats.iter == 8 end @testset "Test callback for NLS" begin diff --git a/test/consistency.jl b/test/consistency.jl index 94569dca..fb725b5b 100644 --- a/test/consistency.jl +++ b/test/consistency.jl @@ -10,8 +10,9 @@ function consistency() @testset "Consistency" begin args = Pair{Symbol, Number}[:atol => 1e-6, :rtol => 1e-6, :max_eval => 20000, :max_time => 60.0] - @testset "NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2] + @testset "NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo] with_logger(NullLogger()) do + reset!(unlp) stats = mtd(unlp; args...) @test stats isa GenericExecutionStats @test stats.status == :first_order @@ -27,7 +28,7 @@ function consistency() end end - @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2] + @testset "Quasi-Newton NLP with $mtd" for mtd in [trunk, lbfgs, tron, R2, fomo] with_logger(NullLogger()) do reset!(qnlp) stats = mtd(qnlp; args...) diff --git a/test/restart.jl b/test/restart.jl index 02d90902..38765465 100644 --- a/test/restart.jl +++ b/test/restart.jl @@ -1,5 +1,6 @@ @testset "Test restart with a different initial guess: $fun" for (fun, s) in ( - (:R2, :R2Solver), + (:R2, :FoSolver), + (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), (:trunk, :TrunkSolver), @@ -43,7 +44,8 @@ end end @testset "Test restart with a different problem: $fun" for (fun, s) in ( - (:R2, :R2Solver), + (:R2, :FoSolver), + (:fomo, :FomoSolver), (:lbfgs, :LBFGSSolver), (:tron, :TronSolver), (:trunk, :TrunkSolver), diff --git a/test/runtests.jl b/test/runtests.jl index de0295ed..bb41eeba 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,7 +18,7 @@ using JSOSolvers end @testset "Test iteration limit" begin - @testset "$fun" for fun in (R2, lbfgs, tron, trunk) + @testset "$fun" for fun in (R2, fomo, lbfgs, tron, trunk) f(x) = (x[1] - 1)^2 + 4 * (x[2] - x[1]^2)^2 nlp = ADNLPModel(f, [-1.2; 1.0]) diff --git a/test/test_solvers.jl b/test/test_solvers.jl index cb41e83e..eb9029e1 100644 --- a/test/test_solvers.jl +++ b/test/test_solvers.jl @@ -8,6 +8,8 @@ function tests() ("lbfgs", lbfgs), ("tron", tron), ("R2", R2), + ("fomo_r2", fomo), + ("fomo_tr", (nlp; kwargs...) -> fomo(nlp, step_backend = JSOSolvers.tr_step(); kwargs...)), ] unconstrained_nlp(solver) multiprecision_nlp(solver, :unc)