diff --git a/Project.toml b/Project.toml index 02024ba9..dbdb67d4 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,7 @@ NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" NLPModelsModifiers = "e01155f1-5c6f-4375-a9d8-616dd036575f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" SolverCore = "ff4d7338-4cf1-434d-91df-b86cb86fb843" +SolverParameters = "d076d87d-d1f9-4ea3-a44b-64b4cdd1e470" SolverTools = "b5612192-2639-5dc1-abfe-fbedd65fab29" [compat] @@ -19,6 +20,7 @@ LinearOperators = "2.0" NLPModels = "0.21" NLPModelsModifiers = "0.7" SolverCore = "0.3" +SolverParameters = "0.1" SolverTools = "0.9" julia = "1.6" diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl index 2265fa1e..0e86dfd6 100644 --- a/src/JSOSolvers.jl +++ b/src/JSOSolvers.jl @@ -4,7 +4,8 @@ module JSOSolvers using LinearAlgebra, Logging, Printf # JSO packages -using Krylov, LinearOperators, NLPModels, NLPModelsModifiers, SolverCore, SolverTools +using Krylov, + LinearOperators, NLPModels, NLPModelsModifiers, SolverCore, SolverParameters, SolverTools import SolverCore.solve! export solve! diff --git a/src/fomo.jl b/src/fomo.jl index 51c876b5..a6327fb5 100644 --- a/src/fomo.jl +++ b/src/fomo.jl @@ -1,4 +1,5 @@ -export fomo, FomoSolver, FoSolver, fo, R2, TR, tr_step, r2_step +export fomo, FomoSolver, FOMOParameterSet, FoSolver, fo, R2, TR +export tr_step, r2_step abstract type AbstractFirstOrderSolver <: AbstractOptimizationSolver end @@ -6,6 +7,101 @@ abstract type AbstractFOMethod end struct tr_step <: AbstractFOMethod end struct r2_step <: AbstractFOMethod end +# Default algorithm parameter values +const FOMO_η1 = + DefaultParameter((nlp::AbstractNLPModel) -> eps(eltype(nlp.meta.x0))^(1 // 4), "eps(T)^(1 // 4)") +const FOMO_η2 = + DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(95 // 100), "T(95/100)") +const FOMO_γ1 = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(1 // 2), "T(1/2)") +const FOMO_γ2 = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(2), "T(2)") +const FOMO_γ3 = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(1 // 2), "T(1/2)") +const FOMO_αmax = + DefaultParameter((nlp::AbstractNLPModel) -> 1 / eps(eltype(nlp.meta.x0)), "1/eps(T)") +const FOMO_β = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(9 // 10), "T(9/10)") +const FOMO_θ1 = DefaultParameter((nlp::AbstractNLPModel) -> eltype(nlp.meta.x0)(1 // 10), "T(1/10)") +const FOMO_θ2 = + DefaultParameter((nlp::AbstractNLPModel) -> eps(eltype(nlp.meta.x0))^(1 // 3), "eps(T)^(1/3)") +const FOMO_M = DefaultParameter(1) +const FOMO_step_backend = DefaultParameter(nlp -> r2_step(), "r2_step()") + +""" + FOMOParameterSet{T} <: AbstractParameterSet + +This structure designed for `fomo` regroups the following parameters: + - `η1`, `η2`: step acceptance parameters. + - `γ1`, `γ2`: regularization update parameters. + - `γ3` : momentum factor βmax update parameter in case of unsuccessful iteration. + - `αmax`: maximum step parameter for fomo algorithm. + - `β ∈ [0,1)`: target decay rate for the momentum. + - `θ1`: momentum contribution parameter for convergence condition (1). + - `θ2`: momentum contribution parameter for convergence condition (2). + - `M` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour. + - `step_backend`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. + +An additional constructor is + + FOMOParameterSet(nlp: kwargs...) + +where the kwargs are the parameters above. + +Default values are: + - `η1::T = $(FOMO_η1)` + - `η2::T = $(FOMO_η2)` + - `γ1::T = $(FOMO_γ1)` + - `γ2::T = $(FOMO_γ2)` + - `γ3::T = $(FOMO_γ3)` + - `αmax::T = $(FOMO_αmax)` + - `β = $(FOMO_β) ∈ [0,1)` + - `θ1 = $(FOMO_θ1)` + - `θ2 = $(FOMO_θ2)` + - `M = $(FOMO_M)` + - `step_backend = $(FOMO_step_backend) +""" +struct FOMOParameterSet{T} <: AbstractParameterSet + η1::Parameter{T, RealInterval{T}} + η2::Parameter{T, RealInterval{T}} + γ1::Parameter{T, RealInterval{T}} + γ2::Parameter{T, RealInterval{T}} + γ3::Parameter{T, RealInterval{T}} + αmax::Parameter{T, RealInterval{T}} + β::Parameter{T, RealInterval{T}} + θ1::Parameter{T, RealInterval{T}} + θ2::Parameter{T, RealInterval{T}} + M::Parameter{Int, IntegerRange{Int}} + step_backend::Parameter{Union{r2_step, tr_step}, CategoricalSet{Union{r2_step, tr_step}}} +end + +# add a default constructor +function FOMOParameterSet( + nlp::AbstractNLPModel{T}; + η1::T = get(FOMO_η1, nlp), + η2::T = get(FOMO_η2, nlp), + γ1::T = get(FOMO_γ1, nlp), + γ2::T = get(FOMO_γ2, nlp), + γ3::T = get(FOMO_γ3, nlp), + αmax::T = get(FOMO_αmax, nlp), + β::T = get(FOMO_β, nlp), + θ1::T = get(FOMO_θ1, nlp), + θ2::T = get(FOMO_θ2, nlp), + M::Int = get(FOMO_M, nlp), + step_backend::AbstractFOMethod = get(FOMO_step_backend, nlp), +) where {T} + @assert η1 <= η2 + FOMOParameterSet( + Parameter(η1, RealInterval(T(0), T(1), lower_open = true, upper_open = true)), + Parameter(η2, RealInterval(T(0), T(1), lower_open = true, upper_open = true)), + Parameter(γ1, RealInterval(T(0), T(1), lower_open = true, upper_open = true)), + Parameter(γ2, RealInterval(T(1), T(Inf), lower_open = true, upper_open = true)), + Parameter(γ3, RealInterval(T(0), T(1))), + Parameter(αmax, RealInterval(T(1), T(Inf), upper_open = true)), + Parameter(β, RealInterval(T(0), T(1), upper_open = true)), + Parameter(θ1, RealInterval(T(0), T(1))), + Parameter(θ2, RealInterval(T(0), T(1), upper_open = true)), + Parameter(M, IntegerRange(Int(1), typemax(Int))), + Parameter(step_backend, CategoricalSet{Union{tr_step, r2_step}}([r2_step(); tr_step()])), + ) +end + """ fomo(nlp; kwargs...) @@ -42,19 +138,19 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `x::V = nlp.meta.x0`: the initial guess. - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. -- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. -- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. -- `γ3 = T(1/2)` : momentum factor βmax update parameter in case of unsuccessful iteration. -- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. +- `η1 = $(FOMO_η1)`, `η2 = $(FOMO_η2)`: step acceptance parameters. +- `γ1 = $(FOMO_γ1)`, `γ2 = $(FOMO_γ2)`: regularization update parameters. +- `γ3 = $(FOMO_γ3)` : momentum factor βmax update parameter in case of unsuccessful iteration. +- `αmax = $(FOMO_αmax)`: maximum step parameter for fomo algorithm. - `max_eval::Int = -1`: maximum number of objective evaluations. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `β = T(0.9) ∈ [0,1)`: target decay rate for the momentum. -- `θ1 = T(0.1)`: momentum contribution parameter for convergence condition (1). -- `θ2 = T(eps(T)^(1/3))`: momentum contribution parameter for convergence condition (2). -- `M = 1` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour. +- `β = $(FOMO_β) ∈ [0,1)`: target decay rate for the momentum. +- `θ1 = $(FOMO_θ1)`: momentum contribution parameter for convergence condition (1). +- `θ2 = $(FOMO_θ2)`: momentum contribution parameter for convergence condition (2). +- `M = $(FOMO_M)` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour. - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. -- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. +- `step_backend = $(FOMO_step_backend)`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. # Output @@ -113,9 +209,11 @@ mutable struct FomoSolver{T, V} <: AbstractFirstOrderSolver p::V o::V α::T + params::FOMOParameterSet{T} end -function FomoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V} +function FomoSolver(nlp::AbstractNLPModel{T, V}; M::Int = get(FOMO_M, nlp), kwargs...) where {T, V} + params = FOMOParameterSet(nlp; M = M, kwargs...) x = similar(nlp.meta.x0) g = similar(nlp.meta.x0) c = similar(nlp.meta.x0) @@ -123,15 +221,38 @@ function FomoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V} d = fill!(similar(nlp.meta.x0), 0) p = similar(nlp.meta.x0) o = fill!(Vector{T}(undef, M), -Inf) - return FomoSolver{T, V}(x, g, c, m, d, p, o, T(0)) + return FomoSolver{T, V}(x, g, c, m, d, p, o, T(0), params) end @doc (@doc FomoSolver) function fomo( nlp::AbstractNLPModel{T, V}; - M::Int = 1, + η1::T = get(FOMO_η1, nlp), + η2::T = get(FOMO_η2, nlp), + γ1::T = get(FOMO_γ1, nlp), + γ2::T = get(FOMO_γ2, nlp), + γ3::T = get(FOMO_γ3, nlp), + αmax::T = get(FOMO_αmax, nlp), + β::T = get(FOMO_β, nlp), + θ1::T = get(FOMO_θ1, nlp), + θ2::T = get(FOMO_θ2, nlp), + M::Int = get(FOMO_M, nlp), + step_backend::AbstractFOMethod = get(FOMO_step_backend, nlp), kwargs..., ) where {T, V} - solver = FomoSolver(nlp; M) + solver = FomoSolver( + nlp; + η1 = η1, + η2 = η2, + γ1 = γ1, + γ2 = γ2, + γ3 = γ3, + αmax = αmax, + β = β, + θ1 = θ1, + θ2 = θ2, + M = M, + step_backend = step_backend, + ) solver_specific = Dict(:avgβmax => T(0.0)) stats = GenericExecutionStats(nlp; solver_specific = solver_specific) return solve!(solver, nlp, stats; kwargs...) @@ -168,15 +289,17 @@ For advanced usage, first define a `FomoSolver` to preallocate the memory used i - `x::V = nlp.meta.x0`: the initial guess. - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance: algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. -- `η1 = eps(T)^(1/4)`, `η2 = T(0.95)`: step acceptance parameters. -- `γ1 = T(1/2)`, `γ2 = T(2)`: regularization update parameters. -- `αmax = 1/eps(T)`: maximum step parameter for fomo algorithm. +- `η1 = $(FOMO_η1)`: algorithm parameter, see [`FOMOParameterSet`](@ref). +- `η2 = $(FOMO_η2)`: algorithm parameter, see [`FOMOParameterSet`](@ref). +- `γ1 = $(FOMO_γ1)`: algorithm parameter, see [`FOMOParameterSet`](@ref). +- `γ2 = $(FOMO_γ2)`: algorithm parameter, see [`FOMOParameterSet`](@ref). +- `αmax = $(FOMO_αmax)`: algorithm parameter, see [`FOMOParameterSet`](@ref). - `max_eval::Int = -1`: maximum number of evaluation of the objective function. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `M = 1` : requires objective decrease over the `M` last iterates (nonmonotone context). `M=1` implies monotone behaviour. +- `M = $(FOMO_M)` : algorithm parameter, see [`FOMOParameterSet`](@ref). - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. -- `step_backend = r2_step()`: step computation mode. Options are `r2_step()` for quadratic regulation step and `tr_step()` for first-order trust-region. +- `step_backend = $(FOMO_step_backend)`: algorithm parameter, see [`FOMOParameterSet`](@ref). # Output @@ -215,14 +338,16 @@ mutable struct FoSolver{T, V} <: AbstractFirstOrderSolver c::V o::V α::T + params::FOMOParameterSet{T} end -function FoSolver(nlp::AbstractNLPModel{T, V}; M::Int = 1) where {T, V} +function FoSolver(nlp::AbstractNLPModel{T, V}; M::Int = get(FOMO_M, nlp), kwargs...) where {T, V} + params = FOMOParameterSet(nlp; M = M, kwargs...) x = similar(nlp.meta.x0) g = similar(nlp.meta.x0) c = similar(nlp.meta.x0) o = fill!(Vector{T}(undef, M), -Inf) - return FoSolver{T, V}(x, g, c, o, T(0)) + return FoSolver{T, V}(x, g, c, o, T(0), params) end """ @@ -232,14 +357,41 @@ mutable struct R2Solver{T, V} <: AbstractOptimizationSolver end Base.@deprecate R2Solver(nlp::AbstractNLPModel; kwargs...) FoSolver( nlp::AbstractNLPModel; - M = 1, + M = FOMO_M, kwargs..., ) -@doc (@doc FoSolver) function fo(nlp::AbstractNLPModel{T, V}; M::Int = 1, kwargs...) where {T, V} - solver = FoSolver(nlp; M) +@doc (@doc FoSolver) function fo( + nlp::AbstractNLPModel{T, V}; + η1::T = get(FOMO_η1, nlp), + η2::T = get(FOMO_η2, nlp), + γ1::T = get(FOMO_γ1, nlp), + γ2::T = get(FOMO_γ2, nlp), + γ3::T = get(FOMO_γ3, nlp), + αmax::T = get(FOMO_αmax, nlp), + β::T = get(FOMO_β, nlp), + θ1::T = get(FOMO_θ1, nlp), + θ2::T = get(FOMO_θ2, nlp), + M::Int = get(FOMO_M, nlp), + step_backend::AbstractFOMethod = get(FOMO_step_backend, nlp), + kwargs..., +) where {T, V} + solver = FoSolver( + nlp; + η1 = η1, + η2 = η2, + γ1 = γ1, + γ2 = γ2, + γ3 = γ3, + αmax = αmax, + β = β, + θ1 = θ1, + θ2 = θ2, + M = M, + step_backend = step_backend, + ) stats = GenericExecutionStats(nlp) - return solve!(solver, nlp, stats; step_backend = r2_step(), kwargs...) + return solve!(solver, nlp, stats; kwargs...) end @doc (@doc FoSolver) function R2(nlp::AbstractNLPModel{T, V}; kwargs...) where {T, V} @@ -265,24 +417,28 @@ function SolverCore.solve!( x::V = nlp.meta.x0, atol::T = √eps(T), rtol::T = √eps(T), - η1::T = T(eps(T)^(1 / 4)), - η2::T = T(0.95), - γ1::T = T(1 / 2), - γ2::T = T(2), - γ3::T = T(1 / 2), - αmax::T = 1 / eps(T), max_time::Float64 = 30.0, max_eval::Int = -1, max_iter::Int = typemax(Int), - β::T = T(0.9), - θ1::T = T(0.1), - θ2::T = T(eps(T)^(1 / 3)), verbose::Int = 0, - step_backend = r2_step(), ) where {T, V} + unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") + + # parameters + η1 = value(solver.params.η1) + η2 = value(solver.params.η2) + γ1 = value(solver.params.γ1) + γ2 = value(solver.params.γ2) + γ3 = value(solver.params.γ3) + αmax = value(solver.params.αmax) + β = value(solver.params.β) + θ1 = value(solver.params.θ1) + θ2 = value(solver.params.θ2) + M = value(solver.params.M) + step_backend = value(solver.params.step_backend) + use_momentum = typeof(solver) <: FomoSolver is_r2 = typeof(step_backend) <: r2_step - unconstrained(nlp) || error("fomo should only be called on unconstrained problems.") reset!(stats) start_time = time() diff --git a/src/lbfgs.jl b/src/lbfgs.jl index c5211d9a..6a183032 100644 --- a/src/lbfgs.jl +++ b/src/lbfgs.jl @@ -1,4 +1,48 @@ -export lbfgs, LBFGSSolver +export lbfgs, LBFGSSolver, LBFGSParameterSet + +# Default algorithm parameter values +const LBFGS_mem = DefaultParameter(5) +const LBFGS_τ₁ = DefaultParameter(nlp -> eltype(nlp.meta.x0)(0.9999), "T(0.9999)") +const LBFGS_bk_max = DefaultParameter(25) + +""" + LBFGSParameterSet{T} <: AbstractParameterSet + +This structure designed for `lbfgs` regroups the following parameters: + - `mem`: memory parameter of the `lbfgs` algorithm + - `τ₁`: slope factor in the Wolfe condition when performing the line search + - `bk_max`: maximum number of backtracks when performing the line search. + +An additional constructor is + + LBFGSParameterSet(nlp: kwargs...) + +where the kwargs are the parameters above. + +Default values are: + - `mem::Int = $(LBFGS_mem)` + - `τ₁::T = $(LBFGS_τ₁)` + - `bk_max:: Int = $(LBFGS_bk_max)` +""" +struct LBFGSParameterSet{T} <: AbstractParameterSet + mem::Parameter{Int, IntegerRange{Int}} + τ₁::Parameter{T, RealInterval{T}} + bk_max::Parameter{Int, IntegerRange{Int}} +end + +# add a default constructor +function LBFGSParameterSet( + nlp::AbstractNLPModel{T}; + mem::Int = get(LBFGS_mem, nlp), + τ₁::T = get(LBFGS_τ₁, nlp), + bk_max::Int = get(LBFGS_bk_max, nlp), +) where {T} + LBFGSParameterSet( + Parameter(mem, IntegerRange(Int(5), Int(20))), + Parameter(τ₁, RealInterval(T(0), T(1), lower_open = true)), + Parameter(bk_max, IntegerRange(Int(1), Int(100))), + ) +end """ lbfgs(nlp; kwargs...) @@ -7,21 +51,21 @@ An implementation of a limited memory BFGS line-search method for unconstrained For advanced usage, first define a `LBFGSSolver` to preallocate the memory used in the algorithm, and then call `solve!`. - solver = LBFGSSolver(nlp; mem::Int = 5) + solver = LBFGSSolver(nlp; mem::Int = $(LBFGS_mem)) solve!(solver, nlp; kwargs...) # Arguments - `nlp::AbstractNLPModel{T, V}` represents the model to solve, see `NLPModels.jl`. The keyword arguments may include - `x::V = nlp.meta.x0`: the initial guess. -- `mem::Int = 5`: memory parameter of the `lbfgs` algorithm. +- `mem::Int = $(LBFGS_mem)`: algorithm parameter, see [`LBFGSParameterSet`](@ref). - `atol::T = √eps(T)`: absolute tolerance. - `rtol::T = √eps(T)`: relative tolerance, the algorithm stops when ‖∇f(xᵏ)‖ ≤ atol + rtol * ‖∇f(x⁰)‖. - `max_eval::Int = -1`: maximum number of objective function evaluations. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `τ₁::T = T(0.9999)`: slope factor in the Wolfe condition when performing the line search. -- `bk_max:: Int = 25`: maximum number of backtracks when performing the line search. +- `τ₁::T = $(LBFGS_τ₁)`: algorithm parameter, see [`LBFGSParameterSet`](@ref). +- `bk_max:: Int = $(LBFGS_bk_max)`: algorithm parameter, see [`LBFGSParameterSet`](@ref). - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `verbose_subsolver::Int = 0`: if > 0, display iteration information every `verbose_subsolver` iteration of the subsolver. @@ -45,7 +89,7 @@ stats = lbfgs(nlp) ```jldoctest using JSOSolvers, ADNLPModels nlp = ADNLPModel(x -> sum(x.^2), ones(3)); -solver = LBFGSSolver(nlp; mem = 5); +solver = LBFGSSolver(nlp; mem = $(LBFGS_mem)); stats = solve!(solver, nlp) # output @@ -62,10 +106,15 @@ mutable struct LBFGSSolver{T, V, Op <: AbstractLinearOperator{T}, M <: AbstractN d::V H::Op h::LineModel{T, V, M} + params::LBFGSParameterSet{T} end -function LBFGSSolver(nlp::M; mem::Int = 5) where {T, V, M <: AbstractNLPModel{T, V}} +function LBFGSSolver(nlp::M; kwargs...) where {T, V, M <: AbstractNLPModel{T, V}} nvar = nlp.meta.nvar + + params = LBFGSParameterSet(nlp; kwargs...) + mem = value(params.mem) + x = V(undef, nvar) d = V(undef, nvar) xt = V(undef, nvar) @@ -74,7 +123,7 @@ function LBFGSSolver(nlp::M; mem::Int = 5) where {T, V, M <: AbstractNLPModel{T, H = InverseLBFGSOperator(T, nvar, mem = mem, scaling = true) h = LineModel(nlp, x, d) Op = typeof(H) - return LBFGSSolver{T, V, Op, M}(x, xt, gx, gt, d, H, h) + return LBFGSSolver{T, V, Op, M}(x, xt, gx, gt, d, H, h, params) end function SolverCore.reset!(solver::LBFGSSolver) @@ -88,12 +137,14 @@ function SolverCore.reset!(solver::LBFGSSolver, nlp::AbstractNLPModel) end @doc (@doc LBFGSSolver) function lbfgs( - nlp::AbstractNLPModel; + nlp::AbstractNLPModel{T, V}; x::V = nlp.meta.x0, - mem::Int = 5, + mem::Int = get(LBFGS_mem, nlp), + τ₁::T = get(LBFGS_τ₁, nlp), + bk_max::Int = get(LBFGS_bk_max, nlp), kwargs..., -) where {V} - solver = LBFGSSolver(nlp; mem = mem) +) where {T, V} + solver = LBFGSSolver(nlp; mem = mem, τ₁ = τ₁, bk_max = bk_max) return solve!(solver, nlp; x = x, kwargs...) end @@ -108,8 +159,6 @@ function SolverCore.solve!( max_eval::Int = -1, max_iter::Int = typemax(Int), max_time::Float64 = 30.0, - τ₁::T = T(0.9999), - bk_max::Int = 25, verbose::Int = 0, verbose_subsolver::Int = 0, ) where {T, V} @@ -124,6 +173,10 @@ function SolverCore.solve!( start_time = time() set_time!(stats, 0.0) + # parameters + τ₁ = value(solver.params.τ₁) + bk_max = value(solver.params.bk_max) + n = nlp.meta.nvar solver.x .= x diff --git a/src/tron.jl b/src/tron.jl index 7dd9f546..7b41c1c1 100644 --- a/src/tron.jl +++ b/src/tron.jl @@ -1,10 +1,54 @@ # Some parts of this code were adapted from # https://github.com/PythonOptimizers/NLP.py/blob/develop/nlp/optimize/tron.py -export tron, TronSolver +export tron, TronSolver, TRONParameterSet tron(nlp::AbstractNLPModel; variant = :Newton, kwargs...) = tron(Val(variant), nlp; kwargs...) +# Default algorithm parameter values +const TRON_μ₀ = DefaultParameter(nlp -> eltype(nlp.meta.x0)(1 // 100), "T(1 / 100)") +const TRON_μ₁ = DefaultParameter(nlp -> eltype(nlp.meta.x0)(1), "T(1)") +const TRON_σ = DefaultParameter(nlp -> eltype(nlp.meta.x0)(10), "T(10)") + +""" + TRONParameterSet{T} <: AbstractParameterSet + +This structure designed for `tron` regroups the following parameters: + - `μ₀::T`: algorithm parameter in (0, 0.5). + - `μ₁::T`: algorithm parameter in (0, +∞). + - `σ::T`: algorithm parameter in (1, +∞). + +An additional constructor is + + TRONParameterSet(nlp: kwargs...) + +where the kwargs are the parameters above. + +Default values are: + - `μ₀::T = $(TRON_μ₀)` + - `μ₁::T = $(TRON_μ₁)` + - `σ::T = $(TRON_σ)` +""" +struct TRONParameterSet{T} <: AbstractParameterSet + μ₀::Parameter{T, RealInterval{T}} + μ₁::Parameter{T, RealInterval{T}} + σ::Parameter{T, RealInterval{T}} +end + +# add a default constructor +function TRONParameterSet( + nlp::AbstractNLPModel{T}; + μ₀::T = get(TRON_μ₀, nlp), + μ₁::T = get(TRON_μ₁, nlp), + σ::T = get(TRON_σ, nlp), +) where {T} + TRONParameterSet( + Parameter(μ₀, RealInterval(T(0), T(1 // 2), lower_open = true)), + Parameter(μ₁, RealInterval(T(0), T(Inf), lower_open = true)), + Parameter(σ, RealInterval(T(1), T(Inf), lower_open = true)), + ) +end + """ tron(nlp; kwargs...) @@ -21,9 +65,9 @@ For advanced usage, first define a `TronSolver` to preallocate the memory used i - `nlp::AbstractNLPModel{T, V}` represents the model to solve, see `NLPModels.jl`. The keyword arguments may include - `x::V = nlp.meta.x0`: the initial guess. -- `μ₀::T = T(1e-2)`: algorithm parameter in (0, 0.5). -- `μ₁::T = one(T)`: algorithm parameter in (0, +∞). -- `σ::T = T(10)`: algorithm parameter in (1, +∞). +- `μ₀::T = $(TRON_μ₀)`: algorithm parameter, see [`TRONParameterSet`](@ref). +- `μ₁::T = $(TRON_μ₁)`: algorithm parameter, see [`TRONParameterSet`](@ref). +- `σ::T = $(TRON_σ)`: algorithm parameter, see [`TRONParameterSet`](@ref). - `max_eval::Int = -1`: maximum number of objective function evaluations. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. @@ -98,13 +142,18 @@ mutable struct TronSolver{ cg_op::LinearOperator{T} ZHZ::Aop + params::TRONParameterSet{T} end function TronSolver( nlp::AbstractNLPModel{T, V}; + μ₀::T = get(TRON_μ₀, nlp), + μ₁::T = get(TRON_μ₁, nlp), + σ::T = get(TRON_σ, nlp), max_radius::T = min(one(T) / sqrt(2 * eps(T)), T(100)), kwargs..., ) where {T, V <: AbstractVector{T}} + params = TRONParameterSet(nlp; μ₀ = μ₀, μ₁ = μ₁, σ = σ) nvar = nlp.meta.nvar x = V(undef, nvar) xc = V(undef, nvar) @@ -145,6 +194,7 @@ function TronSolver( cg_op_diag, cg_op, ZHZ, + params, ) end @@ -165,12 +215,15 @@ end ::Val{:Newton}, nlp::AbstractNLPModel{T, V}; x::V = nlp.meta.x0, + μ₀::T = get(TRON_μ₀, nlp), + μ₁::T = get(TRON_μ₁, nlp), + σ::T = get(TRON_σ, nlp), kwargs..., ) where {T, V} dict = Dict(kwargs) subsolver_keys = intersect(keys(dict), tron_keys) subsolver_kwargs = Dict(k => dict[k] for k in subsolver_keys) - solver = TronSolver(nlp; subsolver_kwargs...) + solver = TronSolver(nlp; μ₀ = μ₀, μ₁ = μ₁, σ = σ, subsolver_kwargs...) for k in subsolver_keys pop!(dict, k) end @@ -183,9 +236,6 @@ function SolverCore.solve!( stats::GenericExecutionStats{T, V}; callback = (args...) -> nothing, x::V = nlp.meta.x0, - μ₀::T = T(1e-2), - μ₁::T = one(T), - σ::T = T(10), max_eval::Int = -1, max_iter::Int = typemax(Int), max_time::Float64 = 30.0, @@ -204,6 +254,11 @@ function SolverCore.solve!( error("tron should only be called for unconstrained or bound-constrained problems") end + # parameters + μ₀ = value(solver.params.μ₀) + μ₁ = value(solver.params.μ₁) + σ = value(solver.params.σ) + reset!(stats) ℓ = nlp.meta.lvar u = nlp.meta.uvar @@ -390,7 +445,8 @@ function SolverCore.solve!( stats end -"""`s = projected_line_search!(x, H, g, d, ℓ, u, Hs; μ₀ = 1e-2)` +""" + s = projected_line_search!(x, H, g, d, ℓ, u, Hs, μ₀) Performs a projected line search, searching for a step size `t` such that @@ -407,8 +463,8 @@ function projected_line_search!( ℓ::AbstractVector{T}, u::AbstractVector{T}, Hs::AbstractVector{T}, - s::AbstractVector{T}; - μ₀::Real = T(1e-2), + s::AbstractVector{T}, + μ₀::Real, ) where {T <: Real} α = one(T) _, brkmin, _ = breakpoints(x, d, ℓ, u) @@ -460,9 +516,9 @@ function cauchy!( u::AbstractVector{T}, s::AbstractVector{T}, Hs::AbstractVector{T}; - μ₀::Real = T(1e-2), - μ₁::Real = one(T), - σ::Real = T(10), + μ₀::Real = T(TRON_μ₀), + μ₁::Real = T(TRON_μ₁), + σ::Real = T(TRON_σ), ) where {T <: Real} # TODO: Use brkmin to care for g direction s .= .-g @@ -598,7 +654,7 @@ function projected_newton!( # Projected line search cgs_rhs .*= -1 - projected_line_search!(x, ZHZ, cgs_rhs, st, ℓ, u, Hs, w) + projected_line_search!(x, ZHZ, cgs_rhs, st, ℓ, u, Hs, w, value(solver.params.μ₀)) s .+= w mul!(Hs, H, s) diff --git a/src/tronls.jl b/src/tronls.jl index 622d202e..0f838bcb 100644 --- a/src/tronls.jl +++ b/src/tronls.jl @@ -1,9 +1,53 @@ -export TronSolverNLS +export TronSolverNLS, TRONLSParameterSet const tronls_allowed_subsolvers = [CglsSolver, CrlsSolver, LsqrSolver, LsmrSolver] tron(nls::AbstractNLSModel; variant = :GaussNewton, kwargs...) = tron(Val(variant), nls; kwargs...) +# Default algorithm parameter values +const TRONLS_μ₀ = DefaultParameter(nlp -> eltype(nlp.meta.x0)(1 // 100), "T(1 / 100)") +const TRONLS_μ₁ = DefaultParameter(nlp -> eltype(nlp.meta.x0)(1), "T(1)") +const TRONLS_σ = DefaultParameter(nlp -> eltype(nlp.meta.x0)(10), "T(10)") + +""" + TRONLSParameterSet{T} <: AbstractParameterSet + +This structure designed for `tron` regroups the following parameters: + - `μ₀`: algorithm parameter in (0, 0.5). + - `μ₁`: algorithm parameter in (0, +∞). + - `σ`: algorithm parameter in (1, +∞). + +An additional constructor is + + TRONLSParameterSet(nlp: kwargs...) + +where the kwargs are the parameters above. + +Default values are: + - `μ₀::T = $(TRONLS_μ₀)` + - `μ₁::T = $(TRONLS_μ₁)` + - `σ::T = $(TRONLS_σ)` +""" +struct TRONLSParameterSet{T} <: AbstractParameterSet + μ₀::Parameter{T, RealInterval{T}} + μ₁::Parameter{T, RealInterval{T}} + σ::Parameter{T, RealInterval{T}} +end + +# add a default constructor +function TRONLSParameterSet( + nlp::AbstractNLPModel{T}; + μ₀::T = get(TRONLS_μ₀, nlp), + μ₁::T = get(TRONLS_μ₁, nlp), + σ::T = get(TRONLS_σ, nlp), +) where {T} + TRONLSParameterSet( + Parameter(μ₀, RealInterval(T(0), T(1 // 2), lower_open = true)), + Parameter(μ₁, RealInterval(T(0), T(Inf), lower_open = true)), + Parameter(σ, RealInterval(T(1), T(Inf), lower_open = true)), + ) +end + """ tron(nls; kwargs...) @@ -21,9 +65,9 @@ For advanced usage, first define a `TronSolverNLS` to preallocate the memory use The keyword arguments may include - `x::V = nlp.meta.x0`: the initial guess. - `subsolver_type::Symbol = LsmrSolver`: `Krylov.jl` method used as subproblem solver, see `JSOSolvers.tronls_allowed_subsolvers` for a list. -- `μ₀::T = T(1e-2)`: algorithm parameter in (0, 0.5). -- `μ₁::T = one(T)`: algorithm parameter in (0, +∞). -- `σ::T = T(10)`: algorithm parameter in (1, +∞). +- `μ₀::T = $(TRONLS_μ₀)`: algorithm parameter, see [`TRONLSParameterSet`](@ref). +- `μ₁::T = $(TRONLS_μ₁)`: algorithm parameter, see [`TRONLSParameterSet`](@ref). +- `σ::T = $(TRONLS_σ)`: algorithm parameter, see [`TRONLSParameterSet`](@ref). - `max_eval::Int = -1`: maximum number of objective function evaluations. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. @@ -103,10 +147,14 @@ mutable struct TronSolverNLS{ AZ::Aop ls_subsolver::Sub + params::TRONLSParameterSet{T} end function TronSolverNLS( nlp::AbstractNLSModel{T, V}; + μ₀::T = get(TRONLS_μ₀, nlp), + μ₁::T = get(TRONLS_μ₁, nlp), + σ::T = get(TRONLS_σ, nlp), subsolver_type::Type{<:KrylovSolver} = LsmrSolver, max_radius::T = min(one(T) / sqrt(2 * eps(T)), T(100)), kwargs..., @@ -114,6 +162,7 @@ function TronSolverNLS( subsolver_type in tronls_allowed_subsolvers || error("subproblem solver must be one of $(tronls_allowed_subsolvers)") + params = TRONLSParameterSet(nlp; μ₀ = μ₀, μ₁ = μ₁, σ = σ) nvar = nlp.meta.nvar nequ = nlp.nls_meta.nequ x = V(undef, nvar) @@ -164,6 +213,7 @@ function TronSolverNLS( ls_op, AZ, ls_subsolver, + params, ) end @@ -182,13 +232,23 @@ end ::Val{:GaussNewton}, nlp::AbstractNLSModel{T, V}; x::V = nlp.meta.x0, + μ₀::Real = get(TRONLS_μ₀, nlp), + μ₁::Real = get(TRONLS_μ₁, nlp), + σ::Real = get(TRONLS_σ, nlp), subsolver_type::Type{<:KrylovSolver} = LsmrSolver, kwargs..., ) where {T, V} dict = Dict(kwargs) subsolver_keys = intersect(keys(dict), tron_keys) subsolver_kwargs = Dict(k => dict[k] for k in subsolver_keys) - solver = TronSolverNLS(nlp, subsolver_type = subsolver_type; subsolver_kwargs...) + solver = TronSolverNLS( + nlp, + μ₀ = μ₀, + μ₁ = μ₁, + σ = σ, + subsolver_type = subsolver_type; + subsolver_kwargs..., + ) for k in subsolver_keys pop!(dict, k) end @@ -201,9 +261,6 @@ function SolverCore.solve!( stats::GenericExecutionStats{T, V}; callback = (args...) -> nothing, x::V = nlp.meta.x0, - μ₀::Real = T(1e-2), - μ₁::Real = one(T), - σ::Real = T(10), max_eval::Int = -1, max_iter::Int = typemax(Int), max_time::Real = 30.0, @@ -223,6 +280,11 @@ function SolverCore.solve!( error("tron should only be called for unconstrained or bound-constrained problems") end + # parameters + μ₀ = value(solver.params.μ₀) + μ₁ = value(solver.params.μ₁) + σ = value(solver.params.σ) + reset!(stats) ℓ = nlp.meta.lvar u = nlp.meta.uvar diff --git a/src/trunk.jl b/src/trunk.jl index 37754e08..8161855e 100644 --- a/src/trunk.jl +++ b/src/trunk.jl @@ -1,7 +1,51 @@ -export trunk, TrunkSolver +export trunk, TrunkSolver, TRUNKParameterSet trunk(nlp::AbstractNLPModel; variant = :Newton, kwargs...) = trunk(Val(variant), nlp; kwargs...) +# Default algorithm parameter values +const TRUNK_bk_max = DefaultParameter(10) +const TRUNK_monotone = DefaultParameter(true) +const TRUNK_nm_itmax = DefaultParameter(25) + +""" + TRUNKParameterSet <: AbstractParameterSet + +This structure designed for `tron` regroups the following parameters: + - `bk_max`: algorithm parameter. + - `monotone`: algorithm parameter. + - `nm_itmax`: algorithm parameter. + +An additional constructor is + + TRUNKParameterSet(nlp: kwargs...) + +where the kwargs are the parameters above. + +Default values are: + - `bk_max::Int = $(TRUNK_bk_max)` + - `monotone::Bool = $(TRUNK_monotone)` + - `nm_itmax::Int = $(TRUNK_nm_itmax)` +""" +struct TRUNKParameterSet <: AbstractParameterSet + bk_max::Parameter{Int, IntegerRange{Int}} + monotone::Parameter{Bool, BinaryRange{Bool}} + nm_itmax::Parameter{Int, IntegerRange{Int}} +end + +# add a default constructor +function TRUNKParameterSet( + nlp::AbstractNLPModel; + bk_max::Int = get(TRUNK_bk_max, nlp), + monotone::Bool = get(TRUNK_monotone, nlp), + nm_itmax::Int = get(TRUNK_nm_itmax, nlp), +) + TRUNKParameterSet( + Parameter(bk_max, IntegerRange(1, typemax(Int))), + Parameter(monotone, BinaryRange()), + Parameter(nm_itmax, IntegerRange(1, typemax(Int))), + ) +end + """ trunk(nlp; kwargs...) @@ -22,9 +66,9 @@ The keyword arguments may include - `max_eval::Int = -1`: maximum number of objective function evaluations. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `bk_max::Int = 10`: algorithm parameter. -- `monotone::Bool = true`: algorithm parameter. -- `nm_itmax::Int = 25`: algorithm parameter. +- `bk_max::Int = $(TRUNK_bk_max)`: algorithm parameter, see [`TRUNKParameterSet`](@ref). +- `monotone::Bool = $(TRUNK_monotone)`: algorithm parameter, see [`TRUNKParameterSet`](@ref). +- `nm_itmax::Int = $(TRUNK_nm_itmax)`: algorithm parameter, see [`TRUNKParameterSet`](@ref). - `verbose::Int = 0`: if > 0, display iteration information every `verbose` iteration. - `subsolver_verbose::Int = 0`: if > 0, display iteration information every `subsolver_verbose` iteration of the subsolver. - `M`: linear operator that models a Hermitian positive-definite matrix of size `n`; passed to Krylov subsolvers. @@ -84,12 +128,17 @@ mutable struct TrunkSolver{ subsolver::Sub H::Op tr::TrustRegion{T, V} + params::TRUNKParameterSet end function TrunkSolver( nlp::AbstractNLPModel{T, V}; + bk_max::Int = get(TRUNK_bk_max, nlp), + monotone::Bool = get(TRUNK_monotone, nlp), + nm_itmax::Int = get(TRUNK_nm_itmax, nlp), subsolver_type::Type{<:KrylovSolver} = CgSolver, ) where {T, V <: AbstractVector{T}} + params = TRUNKParameterSet(nlp; bk_max = bk_max, monotone = monotone, nm_itmax = nm_itmax) nvar = nlp.meta.nvar x = V(undef, nvar) xt = V(undef, nvar) @@ -102,7 +151,7 @@ function TrunkSolver( H = hess_op!(nlp, x, Hs) Op = typeof(H) tr = TrustRegion(gt, one(T)) - return TrunkSolver{T, V, Sub, Op}(x, xt, gx, gt, gn, Hs, subsolver, H, tr) + return TrunkSolver{T, V, Sub, Op}(x, xt, gx, gt, gn, Hs, subsolver, H, tr, params) end function SolverCore.reset!(solver::TrunkSolver) @@ -126,7 +175,7 @@ end subsolver_type::Type{<:KrylovSolver} = CgSolver, kwargs..., ) where {V} - solver = TrunkSolver(nlp, subsolver_type = subsolver_type) + solver = TrunkSolver(nlp; subsolver_type = subsolver_type) return solve!(solver, nlp; x = x, kwargs...) end @@ -142,9 +191,6 @@ function SolverCore.solve!( max_eval::Int = -1, max_iter::Int = typemax(Int), max_time::Float64 = 30.0, - bk_max::Int = 10, - monotone::Bool = true, - nm_itmax::Int = 25, verbose::Int = 0, subsolver_verbose::Int = 0, M = I, @@ -156,6 +202,11 @@ function SolverCore.solve!( error("trunk should only be called for unconstrained problems. Try tron instead") end + # parameters + bk_max = value(solver.params.bk_max) + monotone = value(solver.params.monotone) + nm_itmax = value(solver.params.nm_itmax) + reset!(stats) start_time = time() set_time!(stats, 0.0) diff --git a/src/trunkls.jl b/src/trunkls.jl index 79ef752c..389df2a7 100644 --- a/src/trunkls.jl +++ b/src/trunkls.jl @@ -1,10 +1,54 @@ -export TrunkSolverNLS +export TrunkSolverNLS, TRUNKLSParameterSet const trunkls_allowed_subsolvers = [CglsSolver, CrlsSolver, LsqrSolver, LsmrSolver] trunk(nlp::AbstractNLSModel; variant = :GaussNewton, kwargs...) = trunk(Val(variant), nlp; kwargs...) +# Default algorithm parameter values +const TRUNKLS_bk_max = DefaultParameter(10) +const TRUNKLS_monotone = DefaultParameter(true) +const TRUNKLS_nm_itmax = DefaultParameter(25) + +""" + TRUNKLSParameterSet <: AbstractParameterSet + +This structure designed for `tron` regroups the following parameters: + - `bk_max`: algorithm parameter. + - `monotone`: algorithm parameter. + - `nm_itmax`: algorithm parameter. + +An additional constructor is + + TRUNKLSParameterSet(nlp: kwargs...) + +where the kwargs are the parameters above. + +Default values are: + - `bk_max::Int = $(TRUNKLS_bk_max)` + - `monotone::Bool = $(TRUNKLS_monotone)` + - `nm_itmax::Int = $(TRUNKLS_nm_itmax)` +""" +struct TRUNKLSParameterSet <: AbstractParameterSet + bk_max::Parameter{Int, IntegerRange{Int}} + monotone::Parameter{Bool, BinaryRange{Bool}} + nm_itmax::Parameter{Int, IntegerRange{Int}} +end + +# add a default constructor +function TRUNKLSParameterSet( + nlp::AbstractNLPModel; + bk_max::Int = get(TRUNKLS_bk_max, nlp), + monotone::Bool = get(TRUNKLS_monotone, nlp), + nm_itmax::Int = get(TRUNKLS_nm_itmax, nlp), +) + TRUNKLSParameterSet( + Parameter(bk_max, IntegerRange(1, typemax(Int))), + Parameter(monotone, BinaryRange()), + Parameter(nm_itmax, IntegerRange(1, typemax(Int))), + ) +end + """ trunk(nls; kwargs...) @@ -28,9 +72,9 @@ The keyword arguments may include - `max_eval::Int = -1`: maximum number of objective function evaluations. - `max_time::Float64 = 30.0`: maximum time limit in seconds. - `max_iter::Int = typemax(Int)`: maximum number of iterations. -- `bk_max::Int = 10`: algorithm parameter. -- `monotone::Bool = true`: algorithm parameter. -- `nm_itmax::Int = 25`: algorithm parameter. +- `bk_max::Int = $(TRUNKLS_bk_max)`: algorithm parameter, see [`TRUNKLSParameterSet`](@ref). +- `monotone::Bool = $(TRUNKLS_monotone)`: algorithm parameter, see [`TRUNKLSParameterSet`](@ref). +- `nm_itmax::Int = $(TRUNKLS_nm_itmax)`: algorithm parameter, see [`TRUNKLSParameterSet`](@ref). - `verbose::Int = 0`: if > 0, display iteration details every `verbose` iteration. - `subsolver_verbose::Int = 0`: if > 0, display iteration information every `subsolver_verbose` iteration of the subsolver. @@ -96,12 +140,17 @@ mutable struct TrunkSolverNLS{ Atv::V A::Op subsolver::Sub + params::TRUNKLSParameterSet end function TrunkSolverNLS( nlp::AbstractNLPModel{T, V}; + bk_max::Int = get(TRUNKLS_bk_max, nlp), + monotone::Bool = get(TRUNKLS_monotone, nlp), + nm_itmax::Int = get(TRUNKLS_nm_itmax, nlp), subsolver_type::Type{<:KrylovSolver} = LsmrSolver, ) where {T, V <: AbstractVector{T}} + params = TRUNKLSParameterSet(nlp; bk_max = bk_max, monotone = monotone, nm_itmax = nm_itmax) subsolver_type in trunkls_allowed_subsolvers || error("subproblem solver must be one of $(trunkls_allowed_subsolvers)") @@ -124,7 +173,21 @@ function TrunkSolverNLS( subsolver = subsolver_type(nequ, nvar, V) Sub = typeof(subsolver) - return TrunkSolverNLS{T, V, Sub, Op}(x, xt, temp, gx, gt, tr, rt, Fx, Av, Atv, A, subsolver) + return TrunkSolverNLS{T, V, Sub, Op}( + x, + xt, + temp, + gx, + gt, + tr, + rt, + Fx, + Av, + Atv, + A, + subsolver, + params, + ) end function SolverCore.reset!(solver::TrunkSolverNLS) @@ -163,9 +226,6 @@ function SolverCore.solve!( max_eval::Int = -1, max_iter::Int = typemax(Int), max_time::Float64 = 30.0, - bk_max::Int = 10, - monotone::Bool = true, - nm_itmax::Int = 25, verbose::Int = 0, subsolver_verbose::Int = 0, ) where {T, V <: AbstractVector{T}} @@ -176,6 +236,11 @@ function SolverCore.solve!( error("trunk should only be called for unconstrained problems. Try tron instead") end + # parameters + bk_max = value(solver.params.bk_max) + monotone = value(solver.params.monotone) + nm_itmax = value(solver.params.nm_itmax) + reset!(stats) start_time = time() set_time!(stats, 0.0)