From 53a9419506dae40cec276020cf1122438a3f1b45 Mon Sep 17 00:00:00 2001 From: "Michael P. Friedlander" Date: Sun, 6 Oct 2024 09:52:48 -0700 Subject: [PATCH] Add optional preconditioner for linear solves (#279) * Use correct cg iteration count * Add optional preconditioner * Scale gradient norm by preconditioner for TR radius * Use correct cg iteration count * Add optional preconditioner * Scale gradient norm by preconditioner for TR radius * Update src/trunk.jl Co-authored-by: Tangi Migot * Add unit test * Update src/trunk.jl Co-authored-by: Tangi Migot * Fix status check in precon unit test * Ensure that the preconditioner is pos def. This doesn't seem to be a great preconditioner, but it does test the precon option. --------- Co-authored-by: Tangi Migot Co-authored-by: Alexis Montoison <35051714+amontoison@users.noreply.github.com> --- src/JSOSolvers.jl | 15 +++++++++++++++ src/trunk.jl | 15 ++++++++++----- test/runtests.jl | 19 +++++++++++++++++++ 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/JSOSolvers.jl b/src/JSOSolvers.jl index 9cc4703c..914ee092 100644 --- a/src/JSOSolvers.jl +++ b/src/JSOSolvers.jl @@ -26,6 +26,21 @@ Notably, you can access, and modify, the following: - `stats.elapsed_time`: elapsed time in seconds. " + +""" + normM!(n, x, M, z) + +Weighted norm of `x` with respect to `M`, i.e., `z = sqrt(x' * M * x)`. Uses `z` as workspace. +""" +function normM!(n, x, M, z) + if M === I + return nrm2(n, x) + else + mul!(z, M, x) + return √(x⋅z) + end +end + # Unconstrained solvers include("lbfgs.jl") include("trunk.jl") diff --git a/src/trunk.jl b/src/trunk.jl index 7e6660d7..37754e08 100644 --- a/src/trunk.jl +++ b/src/trunk.jl @@ -27,6 +27,7 @@ The keyword arguments may include - `nm_itmax::Int = 25`: algorithm parameter. - `verbose::Int = 0`: if > 0, display iteration information every `verbose` iteration. - `subsolver_verbose::Int = 0`: if > 0, display iteration information every `subsolver_verbose` iteration of the subsolver. +- `M`: linear operator that models a Hermitian positive-definite matrix of size `n`; passed to Krylov subsolvers. # Output The returned value is a `GenericExecutionStats`, see `SolverCore.jl`. @@ -146,6 +147,7 @@ function SolverCore.solve!( nm_itmax::Int = 25, verbose::Int = 0, subsolver_verbose::Int = 0, + M = I, ) where {T, V <: AbstractVector{T}} if !(nlp.meta.minimize) error("trunk only works for minimization problem") @@ -178,10 +180,11 @@ function SolverCore.solve!( f = obj(nlp, x) grad!(nlp, x, ∇f) isa(nlp, QuasiNewtonModel) && (∇fn .= ∇f) - ∇fNorm2 = nrm2(n, ∇f) + ∇fNorm2 = norm(∇f) + ∇fNormM = normM!(n, ∇f, M, Hs) ϵ = atol + rtol * ∇fNorm2 tr = solver.tr - tr.radius = min(max(∇fNorm2 / 10, one(T)), T(100)) + tr.radius = min(max(∇fNormM / 10, one(T)), T(100)) # Non-monotone mode parameters. # fmin: current best overall objective value @@ -226,9 +229,9 @@ function SolverCore.solve!( while !done # Compute inexact solution to trust-region subproblem - # minimize g's + 1/2 s'Hs subject to ‖s‖ ≤ radius. + # minimize g's + 1/2 s'Hs subject to ‖s‖_M ≤ radius. # In this particular case, we may use an operator with preallocation. - cgtol = max(rtol, min(T(0.1), √∇fNorm2, T(0.9) * cgtol)) + cgtol = max(rtol, min(T(0.1), √∇fNormM, T(0.9) * cgtol)) ∇f .*= -1 Krylov.solve!( subsolver, @@ -240,6 +243,7 @@ function SolverCore.solve!( itmax = max(2 * n, 50), timemax = max_time - stats.elapsed_time, verbose = subsolver_verbose, + M = M, ) s, cg_stats = subsolver.x, subsolver.stats @@ -354,6 +358,7 @@ function SolverCore.solve!( tr.good_grad = false end ∇fNorm2 = nrm2(n, ∇f) + ∇fNormM = normM!(n, ∇f, M, Hs) set_objective!(stats, f) set_time!(stats, time() - start_time) @@ -374,7 +379,7 @@ function SolverCore.solve!( ∇fNorm2, tr.radius, tr.ratio, - length(cg_stats.residuals), + cg_stats.niter, bk, cg_stats.status, ]) diff --git a/test/runtests.jl b/test/runtests.jl index 4d2ebe9e..52d49958 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -78,3 +78,22 @@ include("objgrad-on-tron.jl") nls = ADNLSModel(x -> [100 * (x[2] - x[1]^2); x[1] - 1], [-1.2; 1.0], 2) stats = tron(nls, max_radius = max_radius, increase_factor = increase_factor, callback = cb) end + +@testset "Preconditioner in Trunk" begin + x0 = [-1.2; 1.0] + nlp = ADNLPModel(x -> 100 * (x[2] - x[1]^2)^2 + (x[1] - 1)^2, x0) + function DiagPrecon(x) + H = Matrix(hess(nlp, x)) + λmin = minimum(eigvals(H)) + Diagonal(H + λmin * I) + end + M = DiagPrecon(x0) + function LinearAlgebra.ldiv!(y, M::Diagonal, x) + y .= M \ x + end + function callback(nlp, solver, stats) + M[:] = DiagPrecon(solver.x) + end + stats = trunk(nlp, callback=callback, M=M) + @test stats.status == :first_order +end \ No newline at end of file