Use SolverParameters (#254)

* Use SolverParameters Co-authored-by: Dominique <[email protected]> Co-authored-by: d-monnet <[email protected]>
JuliaSmoothOptimizers · Nov 11, 2024 · 6842496 · 6842496
1 parent bcfa138
commit 6842496
Show file tree

Hide file tree

Showing 8 changed files with 537 additions and 91 deletions.
diff --git a/Project.toml b/Project.toml
@@ -11,6 +11,7 @@ NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6"
 NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843"
+SolverParameters = "d076d87d-d1f9-4ea3-a44b-64b4cdd1e470"
 SolverTools = "b5612192-2639-5dc1-abfe-fbedd65fab29"
 
 [compat]
@@ -19,6 +20,7 @@ LinearOperators = "2.0"
 NLPModels = "0.21"
 NLPModelsModifiers = "0.7"
 SolverCore = "0.3"
+SolverParameters = "0.1"
 SolverTools = "0.9"
 julia = "1.6"
 

diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl
@@ -4,7 +4,8 @@ module JSOSolvers
 using LinearAlgebra, Logging, Printf
 
 # JSO packages
-using Krylov, LinearOperators, NLPModels, NLPModelsModifiers, SolverCore, SolverTools
+using Krylov,
+  LinearOperators, NLPModels, NLPModelsModifiers, SolverCore, SolverParameters, SolverTools
 
 import SolverCore.solve!
 export solve!

diff --git a/src/fomo.jl b/src/fomo.jl
@@ -1,11 +1,107 @@
-export fomo, FomoSolver, FoSolver, fo, R2, TR, tr_step, r2_step
+export fomo, FomoSolver, FOMOParameterSet, FoSolver, fo, R2, TR
+export tr_step, r2_step
 
 abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end
 
 abstract type AbstractFOMethod end
 struct tr_step <: AbstractFOMethod end
 struct r2_step <: AbstractFOMethod end
 
+# Default algorithm parameter values
+const FOMO_η1 =
+  DefaultParameter((nlp::AbstractNLPModel) -> eps(eltype(nlp.meta.x0))^(1 // 4), "eps(T)^(1 // 4)")
+const FOMO_η2 =
+  DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(95 // 100), "T(95/100)")
+const FOMO_γ1 = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(1 // 2), "T(1/2)")
+const FOMO_γ2 = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(2), "T(2)")
+const FOMO_γ3 = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(1 // 2), "T(1/2)")
+const FOMO_αmax =
+  DefaultParameter((nlp::AbstractNLPModel) -> 1 / eps(eltype(nlp.meta.x0)), "1/eps(T)")
+const FOMO_β = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(9 // 10), "T(9/10)")
+const FOMO_θ1 = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(1 // 10), "T(1/10)")
+const FOMO_θ2 =
+  DefaultParameter((nlp::AbstractNLPModel) -> eps(eltype(nlp.meta.x0))^(1 // 3), "eps(T)^(1/3)")
+const FOMO_M = DefaultParameter(1)
+const FOMO_step_backend = DefaultParameter(nlp -> r2_step(), "r2_step()")
+
+"""
+    FOMOParameterSet{T} <: AbstractParameterSet
+
+This structure designed for `fomo` regroups the following parameters:
+  - `η1`, `η2`: step acceptance parameters.
+  - `γ1`, `γ2`: regularization update parameters.
+  - `γ3` : momentum factor βmax update parameter in case of unsuccessful iteration.
+  - `αmax`: maximum step parameter for fomo algorithm.
+  - `β ∈ [0,1)`: target decay rate for the momentum.
+  - `θ1`: momentum contribution parameter for convergence condition (1).
+  - `θ2`: momentum contribution parameter for convergence condition (2). 
+  - `M` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour. 
+  - `step_backend`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
+
+An additional constructor is
+
+    FOMOParameterSet(nlp: kwargs...)
+
+where the kwargs are the parameters above.
+
+Default values are:
+  - `η1::T = $(FOMO_η1)`
+  - `η2::T = $(FOMO_η2)`
+  - `γ1::T = $(FOMO_γ1)`
+  - `γ2::T = $(FOMO_γ2)`
+  - `γ3::T = $(FOMO_γ3)`
+  - `αmax::T = $(FOMO_αmax)`
+  - `β = $(FOMO_β) ∈ [0,1)`
+  - `θ1 = $(FOMO_θ1)`
+  - `θ2 = $(FOMO_θ2)`
+  - `M = $(FOMO_M)`
+  - `step_backend = $(FOMO_step_backend)
+"""
+struct FOMOParameterSet{T} <: AbstractParameterSet
+  η1::Parameter{T, RealInterval{T}}
+  η2::Parameter{T, RealInterval{T}}
+  γ1::Parameter{T, RealInterval{T}}
+  γ2::Parameter{T, RealInterval{T}}
+  γ3::Parameter{T, RealInterval{T}}
+  αmax::Parameter{T, RealInterval{T}}
+  β::Parameter{T, RealInterval{T}}
+  θ1::Parameter{T, RealInterval{T}}
+  θ2::Parameter{T, RealInterval{T}}
+  M::Parameter{Int, IntegerRange{Int}}
+  step_backend::Parameter{Union{r2_step, tr_step}, CategoricalSet{Union{r2_step, tr_step}}}
+end
+
+# add a default constructor
+function FOMOParameterSet(
+  nlp::AbstractNLPModel{T};
+  η1::T = get(FOMO_η1, nlp),
+  η2::T = get(FOMO_η2, nlp),
+  γ1::T = get(FOMO_γ1, nlp),
+  γ2::T = get(FOMO_γ2, nlp),
+  γ3::T = get(FOMO_γ3, nlp),
+  αmax::T = get(FOMO_αmax, nlp),
+  β::T = get(FOMO_β, nlp),
+  θ1::T = get(FOMO_θ1, nlp),
+  θ2::T = get(FOMO_θ2, nlp),
+  M::Int = get(FOMO_M, nlp),
+  step_backend::AbstractFOMethod = get(FOMO_step_backend, nlp),
+) where {T}
+  @assert η1 <= η2
+  FOMOParameterSet(
+    Parameter(η1, RealInterval(T(0), T(1), lower_open = true, upper_open = true)),
+    Parameter(η2, RealInterval(T(0), T(1), lower_open = true, upper_open = true)),
+    Parameter(γ1, RealInterval(T(0), T(1), lower_open = true, upper_open = true)),
+    Parameter(γ2, RealInterval(T(1), T(Inf), lower_open = true, upper_open = true)),
+    Parameter(γ3, RealInterval(T(0), T(1))),
+    Parameter(αmax, RealInterval(T(1), T(Inf), upper_open = true)),
+    Parameter(β, RealInterval(T(0), T(1), upper_open = true)),
+    Parameter(θ1, RealInterval(T(0), T(1))),
+    Parameter(θ2, RealInterval(T(0), T(1), upper_open = true)),
+    Parameter(M, IntegerRange(Int(1), typemax(Int))),
+    Parameter(step_backend, CategoricalSet{Union{tr_step, r2_step}}([r2_step(); tr_step()])),
+  )
+end
+
 """
     fomo(nlp; kwargs...)
 
@@ -42,19 +138,19 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `x::V = nlp.meta.x0`: the initial guess.
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
-- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
-- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
-- `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
-- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
+- `η1 = $(FOMO_η1)`, `η2 = $(FOMO_η2)`: step acceptance parameters.
+- `γ1 = $(FOMO_γ1)`, `γ2 = $(FOMO_γ2)`: regularization update parameters.
+- `γ3 = $(FOMO_γ3)` : momentum factor βmax update parameter in case of unsuccessful iteration.
+- `αmax = $(FOMO_αmax)`: maximum step parameter for fomo algorithm.
 - `max_eval::Int = -1`: maximum number of objective evaluations.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `β = T(0.9) ∈ [0,1)`: target decay rate for the momentum.
-- `θ1 = T(0.1)`: momentum contribution parameter for convergence condition (1).
-- `θ2 = T(eps(T)^(1/3))`: momentum contribution parameter for convergence condition (2). 
-- `M = 1` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour. 
+- `β = $(FOMO_β) ∈ [0,1)`: target decay rate for the momentum.
+- `θ1 = $(FOMO_θ1)`: momentum contribution parameter for convergence condition (1).
+- `θ2 = $(FOMO_θ2)`: momentum contribution parameter for convergence condition (2). 
+- `M = $(FOMO_M)` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour. 
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
+- `step_backend = $(FOMO_step_backend)`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
 
 # Output
 
@@ -113,25 +209,50 @@ mutable struct FomoSolver{T, V} <: AbstractFirstOrderSolver
   p::V
   o::V
   α::T
+  params::FOMOParameterSet{T}
 end
 
-function FomoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V}
+function FomoSolver(nlp::AbstractNLPModel{T, V}; M::Int = get(FOMO_M, nlp), kwargs...) where {T, V}
+  params = FOMOParameterSet(nlp; M = M, kwargs...)
   x = similar(nlp.meta.x0)
   g = similar(nlp.meta.x0)
   c = similar(nlp.meta.x0)
   m = fill!(similar(nlp.meta.x0), 0)
   d = fill!(similar(nlp.meta.x0), 0)
   p = similar(nlp.meta.x0)
   o = fill!(Vector{T}(undef, M), -Inf)
-  return FomoSolver{T, V}(x, g, c, m, d, p, o, T(0))
+  return FomoSolver{T, V}(x, g, c, m, d, p, o, T(0), params)
 end
 
 @doc (@doc FomoSolver) function fomo(
   nlp::AbstractNLPModel{T, V};
-  M::Int = 1,
+  η1::T = get(FOMO_η1, nlp),
+  η2::T = get(FOMO_η2, nlp),
+  γ1::T = get(FOMO_γ1, nlp),
+  γ2::T = get(FOMO_γ2, nlp),
+  γ3::T = get(FOMO_γ3, nlp),
+  αmax::T = get(FOMO_αmax, nlp),
+  β::T = get(FOMO_β, nlp),
+  θ1::T = get(FOMO_θ1, nlp),
+  θ2::T = get(FOMO_θ2, nlp),
+  M::Int = get(FOMO_M, nlp),
+  step_backend::AbstractFOMethod = get(FOMO_step_backend, nlp),
   kwargs...,
 ) where {T, V}
-  solver = FomoSolver(nlp; M)
+  solver = FomoSolver(
+    nlp;
+    η1 = η1,
+    η2 = η2,
+    γ1 = γ1,
+    γ2 = γ2,
+    γ3 = γ3,
+    αmax = αmax,
+    β = β,
+    θ1 = θ1,
+    θ2 = θ2,
+    M = M,
+    step_backend = step_backend,
+  )
   solver_specific = Dict(:avgβmax => T(0.0))
   stats = GenericExecutionStats(nlp; solver_specific = solver_specific)
   return solve!(solver, nlp, stats; kwargs...)
@@ -168,15 +289,17 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
 - `x::V = nlp.meta.x0`: the initial guess.
 - `atol::T = √eps(T)`: absolute tolerance.
 - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
-- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
-- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
-- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
+- `η1 = $(FOMO_η1)`: algorithm parameter, see [`FOMOParameterSet`](@ref).
+- `η2 = $(FOMO_η2)`: algorithm parameter, see [`FOMOParameterSet`](@ref).
+- `γ1 = $(FOMO_γ1)`: algorithm parameter, see [`FOMOParameterSet`](@ref).
+- `γ2 = $(FOMO_γ2)`: algorithm parameter, see [`FOMOParameterSet`](@ref).
+- `αmax = $(FOMO_αmax)`: algorithm parameter, see [`FOMOParameterSet`](@ref).
 - `max_eval::Int = -1`: maximum number of evaluation of the objective function.
 - `max_time::Float64 = 30.0`: maximum time limit in seconds.
 - `max_iter::Int = typemax(Int)`: maximum number of iterations.
-- `M = 1` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour. 
+- `M = $(FOMO_M)` : algorithm parameter, see [`FOMOParameterSet`](@ref).
 - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
-- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
+- `step_backend = $(FOMO_step_backend)`: algorithm parameter, see [`FOMOParameterSet`](@ref).
 
 # Output
 
@@ -215,14 +338,16 @@ mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
   c::V
   o::V
   α::T
+  params::FOMOParameterSet{T}
 end
 
-function FoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V}
+function FoSolver(nlp::AbstractNLPModel{T, V}; M::Int = get(FOMO_M, nlp), kwargs...) where {T, V}
+  params = FOMOParameterSet(nlp; M = M, kwargs...)
   x = similar(nlp.meta.x0)
   g = similar(nlp.meta.x0)
   c = similar(nlp.meta.x0)
   o = fill!(Vector{T}(undef, M), -Inf)
-  return FoSolver{T, V}(x, g, c, o, T(0))
+  return FoSolver{T, V}(x, g, c, o, T(0), params)
 end
 
 """
@@ -232,14 +357,41 @@ mutable struct R2Solver{T, V} <: AbstractOptimizationSolver end
 
 Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(
   nlp::AbstractNLPModel;
-  M = 1,
+  M = FOMO_M,
   kwargs...,
 )
 
-@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; M::Int = 1, kwargs...) where {T, V}
-  solver = FoSolver(nlp; M)
+@doc (@doc FoSolver) function fo(
+  nlp::AbstractNLPModel{T, V};
+  η1::T = get(FOMO_η1, nlp),
+  η2::T = get(FOMO_η2, nlp),
+  γ1::T = get(FOMO_γ1, nlp),
+  γ2::T = get(FOMO_γ2, nlp),
+  γ3::T = get(FOMO_γ3, nlp),
+  αmax::T = get(FOMO_αmax, nlp),
+  β::T = get(FOMO_β, nlp),
+  θ1::T = get(FOMO_θ1, nlp),
+  θ2::T = get(FOMO_θ2, nlp),
+  M::Int = get(FOMO_M, nlp),
+  step_backend::AbstractFOMethod = get(FOMO_step_backend, nlp),
+  kwargs...,
+) where {T, V}
+  solver = FoSolver(
+    nlp;
+    η1 = η1,
+    η2 = η2,
+    γ1 = γ1,
+    γ2 = γ2,
+    γ3 = γ3,
+    αmax = αmax,
+    β = β,
+    θ1 = θ1,
+    θ2 = θ2,
+    M = M,
+    step_backend = step_backend,
+  )
   stats = GenericExecutionStats(nlp)
-  return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
+  return solve!(solver, nlp, stats; kwargs...)
 end
 
 @doc (@doc FoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
@@ -265,24 +417,28 @@ function SolverCore.solve!(
   x::V = nlp.meta.x0,
   atol::T = √eps(T),
   rtol::T = √eps(T),
-  η1::T = T(eps(T)^(1 / 4)),
-  η2::T = T(0.95),
-  γ1::T = T(1 / 2),
-  γ2::T = T(2),
-  γ3::T = T(1 / 2),
-  αmax::T = 1 / eps(T),
   max_time::Float64 = 30.0,
   max_eval::Int = -1,
   max_iter::Int = typemax(Int),
-  β::T = T(0.9),
-  θ1::T = T(0.1),
-  θ2::T = T(eps(T)^(1 / 3)),
   verbose::Int = 0,
-  step_backend = r2_step(),
 ) where {T, V}
+  unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
+
+  # parameters
+  η1 = value(solver.params.η1)
+  η2 = value(solver.params.η2)
+  γ1 = value(solver.params.γ1)
+  γ2 = value(solver.params.γ2)
+  γ3 = value(solver.params.γ3)
+  αmax = value(solver.params.αmax)
+  β = value(solver.params.β)
+  θ1 = value(solver.params.θ1)
+  θ2 = value(solver.params.θ2)
+  M = value(solver.params.M)
+  step_backend = value(solver.params.step_backend)
+
   use_momentum = typeof(solver) <: FomoSolver
   is_r2 = typeof(step_backend) <: r2_step
-  unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")
 
   reset!(stats)
   start_time = time()