Skip to content

Commit

Permalink
Add optional preconditioner for linear solves (#279)
Browse files Browse the repository at this point in the history
* Use correct cg iteration count

* Add optional preconditioner

* Scale gradient norm by preconditioner for TR radius

* Use correct cg iteration count

* Add optional preconditioner

* Scale gradient norm by preconditioner for TR radius

* Update src/trunk.jl

Co-authored-by: Tangi Migot <[email protected]>

* Add unit test

* Update src/trunk.jl

Co-authored-by: Tangi Migot <[email protected]>

* Fix status check in precon unit test

* Ensure that the preconditioner is pos def.

This doesn't seem to be a great preconditioner, but it does test the precon option.

---------

Co-authored-by: Tangi Migot <[email protected]>
Co-authored-by: Alexis Montoison <[email protected]>
  • Loading branch information
3 people authored Oct 6, 2024
1 parent da5e309 commit 53a9419
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 5 deletions.
15 changes: 15 additions & 0 deletions src/JSOSolvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,21 @@ Notably, you can access, and modify, the following:
- `stats.elapsed_time`: elapsed time in seconds.
"


"""
normM!(n, x, M, z)
Weighted norm of `x` with respect to `M`, i.e., `z = sqrt(x' * M * x)`. Uses `z` as workspace.
"""
function normM!(n, x, M, z)
if M === I
return nrm2(n, x)
else
mul!(z, M, x)
return (xz)
end
end

# Unconstrained solvers
include("lbfgs.jl")
include("trunk.jl")
Expand Down
15 changes: 10 additions & 5 deletions src/trunk.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ The keyword arguments may include
- `nm_itmax::Int = 25`: algorithm parameter.
- `verbose::Int = 0`: if > 0, display iteration information every `verbose` iteration.
- `subsolver_verbose::Int = 0`: if > 0, display iteration information every `subsolver_verbose` iteration of the subsolver.
- `M`: linear operator that models a Hermitian positive-definite matrix of size `n`; passed to Krylov subsolvers.
# Output
The returned value is a `GenericExecutionStats`, see `SolverCore.jl`.
Expand Down Expand Up @@ -146,6 +147,7 @@ function SolverCore.solve!(
nm_itmax::Int = 25,
verbose::Int = 0,
subsolver_verbose::Int = 0,
M = I,
) where {T, V <: AbstractVector{T}}
if !(nlp.meta.minimize)
error("trunk only works for minimization problem")
Expand Down Expand Up @@ -178,10 +180,11 @@ function SolverCore.solve!(
f = obj(nlp, x)
grad!(nlp, x, ∇f)
isa(nlp, QuasiNewtonModel) && (∇fn .= ∇f)
∇fNorm2 = nrm2(n, ∇f)
∇fNorm2 = norm(∇f)
∇fNormM = normM!(n, ∇f, M, Hs)
ϵ = atol + rtol * ∇fNorm2
tr = solver.tr
tr.radius = min(max(∇fNorm2 / 10, one(T)), T(100))
tr.radius = min(max(∇fNormM / 10, one(T)), T(100))

# Non-monotone mode parameters.
# fmin: current best overall objective value
Expand Down Expand Up @@ -226,9 +229,9 @@ function SolverCore.solve!(

while !done
# Compute inexact solution to trust-region subproblem
# minimize g's + 1/2 s'Hs subject to ‖s‖ ≤ radius.
# minimize g's + 1/2 s'Hs subject to ‖s‖_M ≤ radius.
# In this particular case, we may use an operator with preallocation.
cgtol = max(rtol, min(T(0.1), fNorm2, T(0.9) * cgtol))
cgtol = max(rtol, min(T(0.1), fNormM, T(0.9) * cgtol))
∇f .*= -1
Krylov.solve!(
subsolver,
Expand All @@ -240,6 +243,7 @@ function SolverCore.solve!(
itmax = max(2 * n, 50),
timemax = max_time - stats.elapsed_time,
verbose = subsolver_verbose,
M = M,
)
s, cg_stats = subsolver.x, subsolver.stats

Expand Down Expand Up @@ -354,6 +358,7 @@ function SolverCore.solve!(
tr.good_grad = false
end
∇fNorm2 = nrm2(n, ∇f)
∇fNormM = normM!(n, ∇f, M, Hs)

set_objective!(stats, f)
set_time!(stats, time() - start_time)
Expand All @@ -374,7 +379,7 @@ function SolverCore.solve!(
∇fNorm2,
tr.radius,
tr.ratio,
length(cg_stats.residuals),
cg_stats.niter,
bk,
cg_stats.status,
])
Expand Down
19 changes: 19 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,22 @@ include("objgrad-on-tron.jl")
nls = ADNLSModel(x -> [100 * (x[2] - x[1]^2); x[1] - 1], [-1.2; 1.0], 2)
stats = tron(nls, max_radius = max_radius, increase_factor = increase_factor, callback = cb)
end

@testset "Preconditioner in Trunk" begin
x0 = [-1.2; 1.0]
nlp = ADNLPModel(x -> 100 * (x[2] - x[1]^2)^2 + (x[1] - 1)^2, x0)
function DiagPrecon(x)
H = Matrix(hess(nlp, x))
λmin = minimum(eigvals(H))
Diagonal(H + λmin * I)
end
M = DiagPrecon(x0)
function LinearAlgebra.ldiv!(y, M::Diagonal, x)
y .= M \ x
end
function callback(nlp, solver, stats)
M[:] = DiagPrecon(solver.x)
end
stats = trunk(nlp, callback=callback, M=M)
@test stats.status == :first_order
end

0 comments on commit 53a9419

Please sign in to comment.