Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use SolverParameters #254

Merged
merged 6 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6"
NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843"
SolverParameters = "d076d87d-d1f9-4ea3-a44b-64b4cdd1e470"
SolverTools = "b5612192-2639-5dc1-abfe-fbedd65fab29"

[compat]
Expand All @@ -19,6 +20,7 @@ LinearOperators = "2.0"
NLPModels = "0.21"
NLPModelsModifiers = "0.7"
SolverCore = "0.3"
SolverParameters = "0.1"
SolverTools = "0.9"
julia = "1.6"

Expand Down
3 changes: 2 additions & 1 deletion src/JSOSolvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ module JSOSolvers
using LinearAlgebra, Logging, Printf

# JSO packages
using Krylov, LinearOperators, NLPModels, NLPModelsModifiers, SolverCore, SolverTools
using Krylov,
LinearOperators, NLPModels, NLPModelsModifiers, SolverCore, SolverParameters, SolverTools

import SolverCore.solve!
export solve!
Expand Down
228 changes: 192 additions & 36 deletions src/fomo.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,107 @@
export fomo, FomoSolver, FoSolver, fo, R2, TR, tr_step, r2_step
export fomo, FomoSolver, FOMOParameterSet, FoSolver, fo, R2, TR
export tr_step, r2_step

abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end

abstract type AbstractFOMethod end
struct tr_step <: AbstractFOMethod end
struct r2_step <: AbstractFOMethod end

# Default algorithm parameter values
const FOMO_η1 =
DefaultParameter((nlp::AbstractNLPModel) -> eps(eltype(nlp.meta.x0))^(1 // 4), "eps(T)^(1 // 4)")
const FOMO_η2 =
DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(95 // 100), "T(95/100)")
const FOMO_γ1 = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(1 // 2), "T(1/2)")
const FOMO_γ2 = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(2), "T(2)")
const FOMO_γ3 = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(1 // 2), "T(1/2)")
const FOMO_αmax =
DefaultParameter((nlp::AbstractNLPModel) -> 1 / eps(eltype(nlp.meta.x0)), "1/eps(T)")
const FOMO_β = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(9 // 10), "T(9/10)")
const FOMO_θ1 = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(1 // 10), "T(1/10)")
const FOMO_θ2 =
DefaultParameter((nlp::AbstractNLPModel) -> eps(eltype(nlp.meta.x0))^(1 // 3), "eps(T)^(1/3)")
const FOMO_M = DefaultParameter(1)
const FOMO_step_backend = DefaultParameter(nlp -> r2_step(), "r2_step()")

"""
FOMOParameterSet{T} <: AbstractParameterSet

This structure designed for `fomo` regroups the following parameters:
- `η1`, `η2`: step acceptance parameters.
- `γ1`, `γ2`: regularization update parameters.
- `γ3` : momentum factor βmax update parameter in case of unsuccessful iteration.
- `αmax`: maximum step parameter for fomo algorithm.
- `β ∈ [0,1)`: target decay rate for the momentum.
- `θ1`: momentum contribution parameter for convergence condition (1).
- `θ2`: momentum contribution parameter for convergence condition (2).
- `M` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour.
- `step_backend`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.

An additional constructor is

FOMOParameterSet(nlp: kwargs...)

where the kwargs are the parameters above.

Default values are:
- `η1::T = $(FOMO_η1)`
- `η2::T = $(FOMO_η2)`
- `γ1::T = $(FOMO_γ1)`
- `γ2::T = $(FOMO_γ2)`
- `γ3::T = $(FOMO_γ3)`
- `αmax::T = $(FOMO_αmax)`
- `β = $(FOMO_β) ∈ [0,1)`
- `θ1 = $(FOMO_θ1)`
- `θ2 = $(FOMO_θ2)`
- `M = $(FOMO_M)`
- `step_backend = $(FOMO_step_backend)
"""
struct FOMOParameterSet{T} <: AbstractParameterSet
η1::Parameter{T, RealInterval{T}}
η2::Parameter{T, RealInterval{T}}
γ1::Parameter{T, RealInterval{T}}
γ2::Parameter{T, RealInterval{T}}
γ3::Parameter{T, RealInterval{T}}
αmax::Parameter{T, RealInterval{T}}
β::Parameter{T, RealInterval{T}}
θ1::Parameter{T, RealInterval{T}}
θ2::Parameter{T, RealInterval{T}}
M::Parameter{Int, IntegerRange{Int}}
step_backend::Parameter{Union{r2_step, tr_step}, CategoricalSet{Union{r2_step, tr_step}}}
end

# add a default constructor
function FOMOParameterSet(
nlp::AbstractNLPModel{T};
η1::T = get(FOMO_η1, nlp),
η2::T = get(FOMO_η2, nlp),
γ1::T = get(FOMO_γ1, nlp),
γ2::T = get(FOMO_γ2, nlp),
γ3::T = get(FOMO_γ3, nlp),
αmax::T = get(FOMO_αmax, nlp),
β::T = get(FOMO_β, nlp),
θ1::T = get(FOMO_θ1, nlp),
θ2::T = get(FOMO_θ2, nlp),
M::Int = get(FOMO_M, nlp),
step_backend::AbstractFOMethod = get(FOMO_step_backend, nlp),
) where {T}
@assert η1 <= η2
FOMOParameterSet(
Parameter(η1, RealInterval(T(0), T(1), lower_open = true, upper_open = true)),
Parameter(η2, RealInterval(T(0), T(1), lower_open = true, upper_open = true)),
Parameter(γ1, RealInterval(T(0), T(1), lower_open = true, upper_open = true)),
Parameter(γ2, RealInterval(T(1), T(Inf), lower_open = true, upper_open = true)),
Parameter(γ3, RealInterval(T(0), T(1))),
Parameter(αmax, RealInterval(T(1), T(Inf), upper_open = true)),
Parameter(β, RealInterval(T(0), T(1), upper_open = true)),
Parameter(θ1, RealInterval(T(0), T(1))),
Parameter(θ2, RealInterval(T(0), T(1), upper_open = true)),
Parameter(M, IntegerRange(Int(1), typemax(Int))),
Parameter(step_backend, CategoricalSet{Union{tr_step, r2_step}}([r2_step(); tr_step()])),
)
end

"""
fomo(nlp; kwargs...)

Expand Down Expand Up @@ -42,19 +138,19 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
- `x::V = nlp.meta.x0`: the initial guess.
- `atol::T = √eps(T)`: absolute tolerance.
- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
- `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration.
- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
- `η1 = $(FOMO_η1)`, `η2 = $(FOMO_η2)`: step acceptance parameters.
- `γ1 = $(FOMO_γ1)`, `γ2 = $(FOMO_γ2)`: regularization update parameters.
- `γ3 = $(FOMO_γ3)` : momentum factor βmax update parameter in case of unsuccessful iteration.
- `αmax = $(FOMO_αmax)`: maximum step parameter for fomo algorithm.
- `max_eval::Int = -1`: maximum number of objective evaluations.
- `max_time::Float64 = 30.0`: maximum time limit in seconds.
- `max_iter::Int = typemax(Int)`: maximum number of iterations.
- `β = T(0.9) ∈ [0,1)`: target decay rate for the momentum.
- `θ1 = T(0.1)`: momentum contribution parameter for convergence condition (1).
- `θ2 = T(eps(T)^(1/3))`: momentum contribution parameter for convergence condition (2).
- `M = 1` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour.
- `β = $(FOMO_β) ∈ [0,1)`: target decay rate for the momentum.
- `θ1 = $(FOMO_θ1)`: momentum contribution parameter for convergence condition (1).
- `θ2 = $(FOMO_θ2)`: momentum contribution parameter for convergence condition (2).
- `M = $(FOMO_M)` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour.
- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
- `step_backend = $(FOMO_step_backend)`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.

# Output

Expand Down Expand Up @@ -113,25 +209,50 @@ mutable struct FomoSolver{T, V} <: AbstractFirstOrderSolver
p::V
o::V
α::T
params::FOMOParameterSet{T}
end

function FomoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V}
function FomoSolver(nlp::AbstractNLPModel{T, V}; M::Int = get(FOMO_M, nlp), kwargs...) where {T, V}
params = FOMOParameterSet(nlp; M = M, kwargs...)
x = similar(nlp.meta.x0)
g = similar(nlp.meta.x0)
c = similar(nlp.meta.x0)
m = fill!(similar(nlp.meta.x0), 0)
d = fill!(similar(nlp.meta.x0), 0)
p = similar(nlp.meta.x0)
o = fill!(Vector{T}(undef, M), -Inf)
return FomoSolver{T, V}(x, g, c, m, d, p, o, T(0))
return FomoSolver{T, V}(x, g, c, m, d, p, o, T(0), params)
end

@doc (@doc FomoSolver) function fomo(
nlp::AbstractNLPModel{T, V};
M::Int = 1,
η1::T = get(FOMO_η1, nlp),
η2::T = get(FOMO_η2, nlp),
γ1::T = get(FOMO_γ1, nlp),
γ2::T = get(FOMO_γ2, nlp),
γ3::T = get(FOMO_γ3, nlp),
αmax::T = get(FOMO_αmax, nlp),
β::T = get(FOMO_β, nlp),
θ1::T = get(FOMO_θ1, nlp),
θ2::T = get(FOMO_θ2, nlp),
M::Int = get(FOMO_M, nlp),
step_backend::AbstractFOMethod = get(FOMO_step_backend, nlp),
kwargs...,
) where {T, V}
solver = FomoSolver(nlp; M)
solver = FomoSolver(
nlp;
η1 = η1,
η2 = η2,
γ1 = γ1,
γ2 = γ2,
γ3 = γ3,
αmax = αmax,
β = β,
θ1 = θ1,
θ2 = θ2,
M = M,
step_backend = step_backend,
)
solver_specific = Dict(:avgβmax => T(0.0))
stats = GenericExecutionStats(nlp; solver_specific = solver_specific)
return solve!(solver, nlp, stats; kwargs...)
Expand Down Expand Up @@ -168,15 +289,17 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i
- `x::V = nlp.meta.x0`: the initial guess.
- `atol::T = √eps(T)`: absolute tolerance.
- `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖.
- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters.
- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters.
- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm.
- `η1 = $(FOMO_η1)`: algorithm parameter, see [`FOMOParameterSet`](@ref).
- `η2 = $(FOMO_η2)`: algorithm parameter, see [`FOMOParameterSet`](@ref).
- `γ1 = $(FOMO_γ1)`: algorithm parameter, see [`FOMOParameterSet`](@ref).
- `γ2 = $(FOMO_γ2)`: algorithm parameter, see [`FOMOParameterSet`](@ref).
- `αmax = $(FOMO_αmax)`: algorithm parameter, see [`FOMOParameterSet`](@ref).
- `max_eval::Int = -1`: maximum number of evaluation of the objective function.
- `max_time::Float64 = 30.0`: maximum time limit in seconds.
- `max_iter::Int = typemax(Int)`: maximum number of iterations.
- `M = 1` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour.
- `M = $(FOMO_M)` : algorithm parameter, see [`FOMOParameterSet`](@ref).
- `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration.
- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region.
- `step_backend = $(FOMO_step_backend)`: algorithm parameter, see [`FOMOParameterSet`](@ref).

# Output

Expand Down Expand Up @@ -215,14 +338,16 @@ mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver
c::V
o::V
α::T
params::FOMOParameterSet{T}
end

function FoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V}
function FoSolver(nlp::AbstractNLPModel{T, V}; M::Int = get(FOMO_M, nlp), kwargs...) where {T, V}
params = FOMOParameterSet(nlp; M = M, kwargs...)
x = similar(nlp.meta.x0)
g = similar(nlp.meta.x0)
c = similar(nlp.meta.x0)
o = fill!(Vector{T}(undef, M), -Inf)
return FoSolver{T, V}(x, g, c, o, T(0))
return FoSolver{T, V}(x, g, c, o, T(0), params)
end

"""
Expand All @@ -232,14 +357,41 @@ mutable struct R2Solver{T, V} <: AbstractOptimizationSolver end

Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver(
nlp::AbstractNLPModel;
M = 1,
M = FOMO_M,
kwargs...,
)

@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; M::Int = 1, kwargs...) where {T, V}
solver = FoSolver(nlp; M)
@doc (@doc FoSolver) function fo(
nlp::AbstractNLPModel{T, V};
η1::T = get(FOMO_η1, nlp),
η2::T = get(FOMO_η2, nlp),
γ1::T = get(FOMO_γ1, nlp),
γ2::T = get(FOMO_γ2, nlp),
γ3::T = get(FOMO_γ3, nlp),
αmax::T = get(FOMO_αmax, nlp),
β::T = get(FOMO_β, nlp),
θ1::T = get(FOMO_θ1, nlp),
θ2::T = get(FOMO_θ2, nlp),
M::Int = get(FOMO_M, nlp),
step_backend::AbstractFOMethod = get(FOMO_step_backend, nlp),
kwargs...,
) where {T, V}
solver = FoSolver(
nlp;
η1 = η1,
η2 = η2,
γ1 = γ1,
γ2 = γ2,
γ3 = γ3,
αmax = αmax,
β = β,
θ1 = θ1,
θ2 = θ2,
M = M,
step_backend = step_backend,
)
stats = GenericExecutionStats(nlp)
return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...)
return solve!(solver, nlp, stats; kwargs...)
end

@doc (@doc FoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V}
Expand All @@ -265,24 +417,28 @@ function SolverCore.solve!(
x::V = nlp.meta.x0,
atol::T = √eps(T),
rtol::T = √eps(T),
η1::T = T(eps(T)^(1 / 4)),
η2::T = T(0.95),
γ1::T = T(1 / 2),
γ2::T = T(2),
γ3::T = T(1 / 2),
αmax::T = 1 / eps(T),
max_time::Float64 = 30.0,
max_eval::Int = -1,
max_iter::Int = typemax(Int),
β::T = T(0.9),
θ1::T = T(0.1),
θ2::T = T(eps(T)^(1 / 3)),
verbose::Int = 0,
step_backend = r2_step(),
) where {T, V}
unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")

# parameters
η1 = value(solver.params.η1)
η2 = value(solver.params.η2)
γ1 = value(solver.params.γ1)
γ2 = value(solver.params.γ2)
γ3 = value(solver.params.γ3)
αmax = value(solver.params.αmax)
β = value(solver.params.β)
θ1 = value(solver.params.θ1)
θ2 = value(solver.params.θ2)
M = value(solver.params.M)
step_backend = value(solver.params.step_backend)

use_momentum = typeof(solver) <: FomoSolver
is_r2 = typeof(step_backend) <: r2_step
unconstrained(nlp) || error("fomo should only be called on unconstrained problems.")

reset!(stats)
start_time = time()
Expand Down
Loading
Loading