From 2ea278022cb726128abdb68c0d7309a24c15155e Mon Sep 17 00:00:00 2001 From: geoffroyleconte <47035783+geoffroyleconte@users.noreply.github.com> Date: Fri, 24 Mar 2023 13:03:05 -0400 Subject: [PATCH] Enable subsolver tolerances tuning, fix negative tolerance (#93) * tune inner solver parameters, fix error negative tol * improve tables, change test parameters, modif demos * keep default value bpdn * update tables parameters * update parameters name noredTR --- benchmarks/tables/bpdn-constr-table.jl | 97 +++++++++++++----- benchmarks/tables/bpdn-table.jl | 89 +++++++++++----- benchmarks/tables/fh-table.jl | 100 +++++++++++++----- benchmarks/tables/nnmf-table.jl | 77 +++++++++----- benchmarks/tables/regulopt-tables.jl | 136 ++++++++++++++++++------- benchmarks/tables/svm-table.jl | 95 +++++++++++++---- examples/demo-fh.jl | 5 +- examples/demo-nnmf-constr.jl | 12 +-- src/LMTR_alg.jl | 14 ++- src/LM_alg.jl | 14 ++- src/R2_alg.jl | 2 +- src/TRDH_alg.jl | 18 ++-- src/TR_alg.jl | 13 ++- 13 files changed, 495 insertions(+), 177 deletions(-) diff --git a/benchmarks/tables/bpdn-constr-table.jl b/benchmarks/tables/bpdn-constr-table.jl index 438a4d74..d081b0e0 100644 --- a/benchmarks/tables/bpdn-constr-table.jl +++ b/benchmarks/tables/bpdn-constr-table.jl @@ -7,51 +7,101 @@ model, nls_model, sol = bpdn_model(compound, bounds = true) f = LSR1Model(model) λ = norm(grad(model, zeros(model.meta.nvar)), Inf) / 10 -h = NormL1(λ) +h = NormL0(λ) verbose = 0 # 10 -ϵ = 1.0e-6 +ν = 1.0 +ϵ = 1.0e-5 +ϵi = 1.0e-5 +ϵri = 1.0e-6 maxIter = 500 -maxIter_inner = 20 +maxIter_inner = 100 options = - ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) -options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner) -options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner) -options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner) -options6 = ROSolverOptions( - ν = 1.0, + ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) +options_nrTR = + ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true, reduce_TR = false) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options2_nrTR = ROSolverOptions(spectral = false, psb = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options3_nrTR = ROSolverOptions(spectral = false, psb = false, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) +options4 = ROSolverOptions(spectral = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options4_nrTR = + ROSolverOptions(spectral = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) +options5 = ROSolverOptions( + ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, - psb = false, + psb = true, ) -options5 = ROSolverOptions( - ν = 1.0, +options5_nrTR = ROSolverOptions( + ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = true, + reduce_TR = false, ) -options7 = ROSolverOptions( +options6 = ROSolverOptions( + ν = ν, + ϵa = ϵ, + ϵr = ϵ, + verbose = verbose, + maxIter = maxIter, spectral = false, - psb = true, + psb = false, +) +options6_nrTR = ROSolverOptions( + ν = ν, ϵa = ϵ, ϵr = ϵ, - maxIter = maxIter_inner, + verbose = verbose, + maxIter = maxIter, + spectral = false, + psb = false, reduce_TR = false, ) -solvers = [:R2, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR] -subsolvers = [:None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH] -solver_options = [options, options, options5, options6, options, options, options, options, options] -subsolver_options = - [options2, options2, options2, options2, options2, options7, options2, options3, options4] # n'importe lequel si subsolver = :None - -benchmark_table( +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [ + options, + options, + options_nrTR, + options5, + options5_nrTR, + options6, + options6_nrTR, + options, + options, + options, + options, + options, + options, + options, + options, +] +subsolver_options = [ + options2, + options2, + options2, + options2, + options2, + options2, + options2, + options2, + options2, + options2_nrTR, + options3, + options3_nrTR, + options4, + options4_nrTR, +] # n'importe lequel si subsolver = :None +stats = benchmark_table( f, 1:(f.meta.nvar), sol, @@ -62,4 +112,5 @@ benchmark_table( solver_options, subsolver_options, "BPDN-cstr", -) + tex = false, +); diff --git a/benchmarks/tables/bpdn-table.jl b/benchmarks/tables/bpdn-table.jl index fe2b116f..f3d918e1 100644 --- a/benchmarks/tables/bpdn-table.jl +++ b/benchmarks/tables/bpdn-table.jl @@ -8,64 +8,102 @@ model, nls_model, sol = bpdn_model(compound, bounds = false) # parameters f = LSR1Model(model) λ = norm(grad(model, zeros(model.meta.nvar)), Inf) / 10 -h = NormL1(λ) +h = NormL0(λ) verbose = 0 # 10 -ϵ = 1.0e-6 +ν = 1.0 +ϵ = 1.0e-5 +ϵi = 1.0e-5 +ϵri = 1.0e-6 maxIter = 500 -maxIter_inner = 40 +maxIter_inner = 100 options = - ROSolverOptions(ν = 1.0, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) -options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner) -options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner) -options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner) -options6 = ROSolverOptions( - ν = 1.0, + ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) +options_nrTR = + ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true, reduce_TR = false) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options2_nrTR = ROSolverOptions(spectral = false, psb = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options3_nrTR = ROSolverOptions(spectral = false, psb = false, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) +options4 = ROSolverOptions(spectral = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options4_nrTR = + ROSolverOptions(spectral = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) +options5 = ROSolverOptions( + ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, - psb = false, + psb = true, ) -options5 = ROSolverOptions( - ν = 1.0, +options5_nrTR = ROSolverOptions( + ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = false, psb = true, + reduce_TR = false, ) -options7 = ROSolverOptions( +options6 = ROSolverOptions( + ν = ν, + ϵa = ϵ, + ϵr = ϵ, + verbose = verbose, + maxIter = maxIter, spectral = false, - psb = true, + psb = false, +) +options6_nrTR = ROSolverOptions( + ν = ν, ϵa = ϵ, ϵr = ϵ, - maxIter = maxIter_inner, + verbose = verbose, + maxIter = maxIter, + spectral = false, + psb = false, reduce_TR = false, ) -options8 = - ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner, reduce_TR = false) -solvers = [:R2, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR] -subsolvers = [:None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] -solver_options = - [options, options, options5, options6, options, options, options, options, options, options] +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [ + options, + options, + options_nrTR, + options5, + options5_nrTR, + options6, + options6_nrTR, + options, + options, + options, + options, + options, + options, + options, + options, +] subsolver_options = [ options2, options2, options2, options2, options2, - options7, options2, + options2, + options2, + options2, + options2_nrTR, options3, + options3_nrTR, options4, - options8, + options4_nrTR, ] # n'importe lequel si subsolver = :None -benchmark_table( +stats = benchmark_table( f, 1:(f.meta.nvar), sol, @@ -76,4 +114,5 @@ benchmark_table( solver_options, subsolver_options, "BPDN", -) + tex = false, +); diff --git a/benchmarks/tables/fh-table.jl b/benchmarks/tables/fh-table.jl index 99f78bfc..25adccfa 100644 --- a/benchmarks/tables/fh-table.jl +++ b/benchmarks/tables/fh-table.jl @@ -1,23 +1,33 @@ include("regulopt-tables.jl") using ADNLPModels, DifferentialEquations +display_sol = true + Random.seed!(1234) -data, simulate, resid, misfit = RegularizedProblems.FH_smooth_term() +data, simulate, resid, misfit, x0 = RegularizedProblems.FH_smooth_term() model = ADNLPModel(misfit, ones(5)) f = LBFGSModel(model) -λ = 1.0 -h = NormL0(λ) -ν = 1.0e2 +λ = 1.0e1 +h = NormL1(λ) +ν = 1.0e0 verbose = 0 #10 -maxIter = 1000 -maxIter_sub = 200 # max iter for subsolver +maxIter = 500 +maxIter_inner = 200 # max iter for subsolver ϵ = 1.0e-4 +ϵi = 1.0e-3 +ϵri = 1.0e-6 options = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) -options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) -options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) -options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_sub) +options_nrTR = + ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true, reduce_TR = false) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options2_nrTR = ROSolverOptions(spectral = false, psb = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options3_nrTR = ROSolverOptions(spectral = false, psb = false, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) +options4 = ROSolverOptions(spectral = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options4_nrTR = + ROSolverOptions(spectral = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) options5 = ROSolverOptions( ν = ν, ϵa = ϵ, @@ -27,24 +37,26 @@ options5 = ROSolverOptions( spectral = false, psb = true, ) -options6 = ROSolverOptions( +options5_nrTR = ROSolverOptions( ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, - spectral = false, - psb = false, -) -options7 = ROSolverOptions( spectral = false, psb = true, reduce_TR = false, +) +options6 = ROSolverOptions( + ν = ν, ϵa = ϵ, ϵr = ϵ, - maxIter = maxIter_sub, + verbose = verbose, + maxIter = maxIter, + spectral = false, + psb = false, ) -options8 = ROSolverOptions( +options6_nrTR = ROSolverOptions( ν = ν, ϵa = ϵ, ϵr = ϵ, @@ -55,10 +67,25 @@ options8 = ROSolverOptions( reduce_TR = false, ) -solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR] -subsolvers = [:None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH] -solver_options = - [options, options, options5, options6, options8, options, options, options, options, options] +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [ + options, + options, + options_nrTR, + options5, + options5_nrTR, + options6, + options6_nrTR, + options, + options, + options, + options, + options, + options, + options, + options, +] subsolver_options = [ options2, options2, @@ -66,17 +93,21 @@ subsolver_options = [ options2, options2, options2, - options7, options2, + options2, + options2, + options2_nrTR, options3, + options3_nrTR, options4, -] -subset = 2:10 # issues with R2 alone + options4_nrTR, +] # n'importe lequel si subsolver = :None +subset = 2:length(solvers) # issues with R2 alone -benchmark_table( +names, stats = benchmark_table( f, 1:(f.meta.nvar), - [], + x0, h, λ, solvers[subset], @@ -84,4 +115,21 @@ benchmark_table( solver_options[subset], subsolver_options[subset], "FH with ν = $ν, λ = $λ", -) + tex = false, +); + +if display_sol + data = zeros(length(subset) + 1, 5) + data[1, :] .= x0 + for i=1:length(subset) + data[i+1, :] .= stats[i].solution + end + pretty_table( + data; + header = [L"$x_1$", L"$x_2$", L"$x_3$", L"$x_4$", L"$x_5$"], + row_names = vcat(["True"], names), + title = "Solution FH", + formatters = ft_printf("%1.2f"), + # backend = Val(:latex), + ) +end diff --git a/benchmarks/tables/nnmf-table.jl b/benchmarks/tables/nnmf-table.jl index 915e53ee..d9bd4ca1 100644 --- a/benchmarks/tables/nnmf-table.jl +++ b/benchmarks/tables/nnmf-table.jl @@ -4,18 +4,26 @@ Random.seed!(1234) m, n, k = 100, 50, 5 model, A, selected = nnmf_model(m, n, k) f = LSR1Model(model) -λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 100 +λ = 1.0e-1 # norm(grad(model, rand(model.meta.nvar)), Inf) / 100 h = NormL0(λ) ν = 1.0 ϵ = 1.0e-5 +ϵi = 1.0e-3 +ϵri = 1.0e-6 maxIter = 500 -maxIter_inner = 40 +maxIter_inner = 100 verbose = 0 #10 options = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) -options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner) -options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner) -options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner) +options_nrTR = + ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true, reduce_TR = false) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options2_nrTR = ROSolverOptions(spectral = false, psb = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options3_nrTR = ROSolverOptions(spectral = false, psb = false, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) +options4 = ROSolverOptions(spectral = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options4_nrTR = + ROSolverOptions(spectral = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) options5 = ROSolverOptions( ν = ν, ϵa = ϵ, @@ -25,24 +33,26 @@ options5 = ROSolverOptions( spectral = false, psb = true, ) -options6 = ROSolverOptions( +options5_nrTR = ROSolverOptions( ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, - spectral = false, - psb = false, -) -options7 = ROSolverOptions( spectral = false, psb = true, reduce_TR = false, +) +options6 = ROSolverOptions( + ν = ν, ϵa = ϵ, ϵr = ϵ, - maxIter = maxIter_inner, + verbose = verbose, + maxIter = maxIter, + spectral = false, + psb = false, ) -options8 = ROSolverOptions( +options6_nrTR = ROSolverOptions( ν = ν, ϵa = ϵ, ϵr = ϵ, @@ -53,10 +63,25 @@ options8 = ROSolverOptions( reduce_TR = false, ) -solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR] -subsolvers = [:None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] -solver_options = - [options, options, options5, options6, options8, options, options, options, options, options] +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [ + options, + options, + options_nrTR, + options5, + options5_nrTR, + options6, + options6_nrTR, + options, + options, + options, + options, + options, + options, + options, + options, +] subsolver_options = [ options2, options2, @@ -64,11 +89,16 @@ subsolver_options = [ options2, options2, options2, - options7, options2, + options2, + options2, + options2_nrTR, options3, + options3_nrTR, options4, -] + options4_nrTR, +] # n'importe lequel si subsolver = :None +subset = 1:length(solvers) benchmark_table( f, @@ -76,9 +106,10 @@ benchmark_table( [], h, λ, - solvers, - subsolvers, - solver_options, - subsolver_options, + solvers[subset], + subsolvers[subset], + solver_options[subset], + subsolver_options[subset], "NNMF with m = $m, n = $n, k = $k, ν = $ν, λ = $λ", -) + tex = false, +); diff --git a/benchmarks/tables/regulopt-tables.jl b/benchmarks/tables/regulopt-tables.jl index e005f7d8..8233e327 100644 --- a/benchmarks/tables/regulopt-tables.jl +++ b/benchmarks/tables/regulopt-tables.jl @@ -1,4 +1,4 @@ -using PrettyTables +using PrettyTables, LaTeXStrings using Random using LinearAlgebra using ProximalOperators @@ -23,12 +23,12 @@ function options_str( subsolver::Symbol, ) if solver == :TRDH - out_str = !options.spectral ? (options.psb ? "-DiagQN-PSB" : "-DiagQN-Andrei") : "-Spectral" + out_str = !options.spectral ? (options.psb ? "-PSB" : "-Andrei") : "-Spec" out_str = (options.reduce_TR) ? out_str : string(out_str, "-noredTR") elseif solver == :TR && subsolver == :TRDH out_str = - !subsolver_options.spectral ? (subsolver_options.psb ? "-DiagQN-PSB" : "-DiagQN-Andrei") : - "-Spectral" + !subsolver_options.spectral ? (subsolver_options.psb ? "-PSB" : "-Andrei") : + "-Spec" out_str = (subsolver_options.reduce_TR) ? out_str : string(out_str, "-noredTR") else out_str = "" @@ -37,6 +37,8 @@ function options_str( end grad_evals(nlp::AbstractNLPModel) = neval_grad(nlp) grad_evals(nls::AbstractNLSModel) = neval_jtprod_residual(nls) + neval_jprod_residual(nls) +obj_evals(nlp::AbstractNLPModel) = neval_obj(nlp) +obj_evals(nls::AbstractNLSModel) = neval_residual(nls) function nb_prox_evals(stats, solver::Symbol) if solver ∈ [:TR, :R2, :TRDH] prox_evals = sum(stats.solver_specific[:SubsolverCounter]) @@ -46,6 +48,8 @@ function nb_prox_evals(stats, solver::Symbol) return prox_evals end +acc = vec -> length(findall(x -> x < 1, vec)) / length(vec) * 100 # for SVM + function benchmark_table( f::AbstractNLPModel, selected, @@ -56,14 +60,18 @@ function benchmark_table( subsolvers, solver_options, subsolver_options, - pb_name::String, + pb_name::String; + tex::Bool = false, + nls_train::Union{Nothing, AbstractNLSModel} = nothing, # for SVM + nls_test::Union{Nothing, AbstractNLSModel} = nothing, # for SVM ) - row_names = [ + solver_names = [ "$(solver)$(subsolvername(subsolver))$(options_str(opt, solver, subsolver_opt, subsolver))" for (solver, opt, subsolver, subsolver_opt) in zip(solvers, solver_options, subsolvers, subsolver_options) ] + nf_evals = [] n∇f_evals = [] nprox_evals = [] solver_stats = [] @@ -86,60 +94,120 @@ function benchmark_table( selected = selected, ) end + push!(nf_evals, obj_evals(f)) push!(n∇f_evals, grad_evals(f)) push!(nprox_evals, nb_prox_evals(solver_out, solver)) push!(solver_stats, solver_out) reset!(f) end - if length(sol) == 0 - header = ["f(x)", "h(x)/λ", "ξ", "∇f evals", "prox calls"] + if tex + if length(sol) == 0 + header = [ + "solver", + L"$f(x)$", + L"$h(x) / \lambda$", + L"$\xi$", + L"$\# \ f$", + L"$\# \ \nabla f$", + L"$\# \ prox$", + L"$t$ ($s$)", + ] + else + header = [ + "solver", + "\$f(x)\$", + L"$h(x)/\lambda$", + L"$\xi$", + pb_name[1:3] == "SVM" ? L"$(Train, Test)$" : L"$\|x-x_T\|_2$", + L"$\# \ f$", + L"$\# \ \nabla f$", + L"$\# \ prox$", + L"$t$ ($s$)", + ] + end else - header = [ - "f(x) (true = $(round(obj(model, sol); sigdigits = 4)))", - "h(x)/λ", - "ξ", - "||x-x*||/||x*||", - "∇f evals", - "prox calls", - ] + if length(sol) == 0 + header = ["solver", "f(x)", "h(x)/λ", "ξ", "# f", "# ∇f", "# prox", "t (s)"] + else + header = [ + "solver", + "f(x)", + "h(x)/λ", + "ξ", + pb_name[1:3] == "SVM" ? "(Train, Test)" : "||x-x*||", + "# f", + "# ∇f", + "# prox", + "t(s)", + ] + end end - n_solvers = length(row_names) - data = Matrix{Any}(undef, n_solvers, length(header)) + nh = length(header) + n_solvers = length(solver_names) + data = Matrix{Any}(undef, n_solvers, nh) for i = 1:n_solvers + sname = solver_names[i] solver_out = solver_stats[i] x = solver_out.solution fx = solver_out.solver_specific[:Fhist][end] hx = solver_out.solver_specific[:Hhist][end] ξ = solver_out.dual_feas + nf = nf_evals[i] n∇f = n∇f_evals[i] nprox = nprox_evals[i] + t = solver_out.elapsed_time if length(sol) == 0 - data[i, :] .= [fx, hx / λ, ξ, n∇f, nprox] + data[i, :] .= [sname, fx, hx / λ, ξ, nf, n∇f, nprox, t] else - err = norm(x - sol) / norm(sol) - data[i, :] .= [fx, hx / λ, ξ, err, n∇f, nprox] + if pb_name[1:3] == "SVM" + string(round(t,digits=2)) + err = "($( + round(acc(residual(nls_train, solver_out.solution)), digits=1)), $( + round(acc(residual(nls_test, solver_out.solution)), digits = 1)))" + else + err = norm(x - sol) + end + data[i, :] .= [sname, fx, hx / λ, ξ, err, nf, n∇f, nprox, t] end end + h_format = h isa NormL0 ? "%i" : "%7.1e" if length(sol) == 0 - print_formats = ft_printf(["%7.3e", "%7.1e", "%7.1e", "%i", "%i"], 1:length(header)) + print_formats = ft_printf(["%s", "%7.2e", h_format, "%7.1e", "%i", "%i", "%i", "%7.1e"], 1:nh) else - print_formats = ft_printf(["%7.3e", "%7.1e", "%7.1e", "%7.3e", "%i", "%i"], 1:length(header)) + if pb_name[1:3] == "SVM" + print_formats = ft_printf(["%s", "%7.2e", h_format, "%7.1e", "%7s", "%i", "%i", "%i", "%7.1e"], 1:nh) + else + print_formats = ft_printf(["%s", "%7.2e", h_format, "%7.1e", "%7.1e", "%i", "%i", "%i", "%7.1e"], 1:nh) + end end - return pretty_table( - data; - header = header, - row_names = row_names, - title = "$pb_name $(modelname(f)) $(typeof(h).name.name)", - # backend = Val(:latex), - formatters = ( - print_formats, - # (v, i, j) -> (SolverBenchmark.safe_latex_AbstractFloat(v)), - ), - ) + title = "$pb_name $(modelname(f)) $(typeof(h).name.name)" + if (length(sol) != 0) && pb_name[1:3] != "SVM" + title = string(title, " \$f(x_T) = $(@sprintf("%.2e", obj(model, sol)))\$") + end + if tex + pretty_table( + data; + header = header, + title = title, + backend = Val(:latex), + formatters = ( + print_formats, + (v, i, j) -> (j == 1 ? v : SolverBenchmark.safe_latex_AbstractFloat(v)), + ), + ) + else + pretty_table( + data; + header = header, + title = title, + formatters = (print_formats,), + ) + end + return solver_names, solver_stats end # λ = norm(grad(model, rand(model.meta.nvar)), Inf) / 100000 diff --git a/benchmarks/tables/svm-table.jl b/benchmarks/tables/svm-table.jl index a8c302f1..78daa8e7 100644 --- a/benchmarks/tables/svm-table.jl +++ b/benchmarks/tables/svm-table.jl @@ -4,21 +4,29 @@ using MLDatasets Random.seed!(1234) nlp_train, nls_train, sol_train = RegularizedProblems.svm_train_model() nlp_test, nls_test, sol_test = RegularizedProblems.svm_test_model() -f = LSR1Model(nlp_train) -f_test = LSR1Model(nlp_test) -λ = 10.0 #norm(grad(model, rand(model.meta.nvar)), Inf) / 10 +f = LBFGSModel(nlp_train) +f_test = LBFGSModel(nlp_test) +λ = 1.0e-1 #norm(grad(model, rand(model.meta.nvar)), Inf) / 10 h = NormL1(λ) ν = 1.0e0 verbose = 0 #10 -ϵ = 1.0e-6 +ϵ = 1.0e-4 +ϵi = 1.0e-3 +ϵri = 1.0e-6 maxIter = 500 -maxIter_inner = 40 +maxIter_inner = 100 options = ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true) -options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner) -options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner) -options4 = ROSolverOptions(spectral = true, ϵa = ϵ, ϵr = ϵ, maxIter = maxIter_inner) +options_nrTR = + ROSolverOptions(ν = ν, ϵa = ϵ, ϵr = ϵ, verbose = verbose, maxIter = maxIter, spectral = true, reduce_TR = false) +options2 = ROSolverOptions(spectral = false, psb = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options2_nrTR = ROSolverOptions(spectral = false, psb = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) +options3 = ROSolverOptions(spectral = false, psb = false, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options3_nrTR = ROSolverOptions(spectral = false, psb = false, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) +options4 = ROSolverOptions(spectral = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner) +options4_nrTR = + ROSolverOptions(spectral = true, ϵa = ϵi, ϵr = ϵri, maxIter = maxIter_inner, reduce_TR = false) options5 = ROSolverOptions( ν = ν, ϵa = ϵ, @@ -28,6 +36,16 @@ options5 = ROSolverOptions( spectral = false, psb = true, ) +options5_nrTR = ROSolverOptions( + ν = ν, + ϵa = ϵ, + ϵr = ϵ, + verbose = verbose, + maxIter = maxIter, + spectral = false, + psb = true, + reduce_TR = false, +) options6 = ROSolverOptions( ν = ν, ϵa = ϵ, @@ -37,26 +55,58 @@ options6 = ROSolverOptions( spectral = false, psb = false, ) -options7 = ROSolverOptions( - spectral = false, - psb = true, - reduce_TR = false, +options6_nrTR = ROSolverOptions( + ν = ν, ϵa = ϵ, ϵr = ϵ, - maxIter = maxIter_inner, + verbose = verbose, + maxIter = maxIter, + spectral = false, + psb = false, + reduce_TR = false, ) -solvers = [:R2, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR] -subsolvers = [:None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH] -solver_options = [options, options, options5, options6, options, options, options, options, options] -subsolver_options = - [options2, options2, options2, options2, options2, options2, options7, options3, options4] -subset = 1:9 +solvers = [:R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TR, :TR, :TR, :TR, :TR, :TR, :TR] +subsolvers = [:None, :None, :None, :None, :None, :None, :None, :R2, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH, :TRDH] +solver_options = [ + options, + options, + options_nrTR, + options5, + options5_nrTR, + options6, + options6_nrTR, + options, + options, + options, + options, + options, + options, + options, + options, +] +subsolver_options = [ + options2, + options2, + options2, + options2, + options2, + options2, + options2, + options2, + options2, + options2_nrTR, + options3, + options3_nrTR, + options4, + options4_nrTR, +] # n'importe lequel si subsolver = :None +subset = 1:length(solvers) benchmark_table( f, 1:(f.meta.nvar), - [], + (sol_train, sol_test), h, λ, solvers[subset], @@ -64,4 +114,7 @@ benchmark_table( solver_options[subset], subsolver_options[subset], "SVM with ν = $ν, λ = $λ", -) + nls_train = nls_train, + nls_test = nls_test, + tex = false, +); diff --git a/examples/demo-fh.jl b/examples/demo-fh.jl index 7e37629a..ffef31f7 100644 --- a/examples/demo-fh.jl +++ b/examples/demo-fh.jl @@ -4,7 +4,7 @@ using ADNLPModels, NLPModels, NLPModelsModifiers, RegularizedOptimization, Regul include("plot-utils-fh.jl") function demo_fh() - data, simulate, resid, misfit = RegularizedProblems.FH_smooth_term() + data, simulate, resid, misfit, x0 = RegularizedProblems.FH_smooth_term() model = ADNLPModel(misfit, ones(5)) h = NormL0(1.0) χ = NormLinf(1.0) @@ -13,16 +13,19 @@ function demo_fh() lbfgs_model = LBFGSModel(model) TR_out = TR(lbfgs_model, h, χ, options) + @info "TR relative error" norm(TR_out.solution - x0) / norm(x0) plot_fh(TR_out, simulate(TR_out.solution), data, "tr-r2") nls_model = ADNLSModel(resid, ones(5), 202) options.σmin = 1e-6 LMTR_out = LMTR(nls_model, h, χ, options) + @info "LMTR relative error" norm(LMTR_out.solution - x0) / norm(x0) plot_fh(LMTR_out, simulate(LMTR_out.solution), data, "lmtr-r2") reset!(nls_model) options.σmin = 1e+3 LM_out = LM(nls_model, h, options) + @info "LM relative error" norm(LM_out.solution - x0) / norm(x0) plot_fh(LM_out, simulate(LM_out.solution), data, "lm-r2") end diff --git a/examples/demo-nnmf-constr.jl b/examples/demo-nnmf-constr.jl index 5ebffeb1..1300608a 100644 --- a/examples/demo-nnmf-constr.jl +++ b/examples/demo-nnmf-constr.jl @@ -21,19 +21,19 @@ function demo_solver(f, h, χ, selected, Avec, m, n, k, suffix = "l0-linf") @info " using TR to solve with" h χ reset!(f) TR_out = TR(f, h, χ, options, selected = selected) - # plot_nnmf(TR_out, Avec, m, n, k, "tr-r2-$suffix") + plot_nnmf(TR_out, Avec, m, n, k, "tr-r2-$suffix") @info " using R2 to solve with" h reset!(f) R2_out = R2(f, h, options, selected = selected) - # plot_nnmf(R2_out, Avec, m, n, k, "r2-$suffix") + plot_nnmf(R2_out, Avec, m, n, k, "r2-$suffix") @info " using TR with R2 as subproblem to solve with" h χ reset!(f) TR_out = TR(f, h, χ, options, selected = selected) - # plot_nnmf(TR_out, Avec, m, n, k, "tr-r2-$suffix") + plot_nnmf(TR_out, Avec, m, n, k, "tr-r2-$suffix") - subsolver_options = ROSolverOptions(spectral = false, psb = true) + subsolver_options = ROSolverOptions(spectral = false, psb = true, ϵa = options.ϵa) @info " using TR with TRDH as subproblem to solve with" h χ reset!(f) TR2_out = TR( @@ -45,12 +45,12 @@ function demo_solver(f, h, χ, selected, Avec, m, n, k, suffix = "l0-linf") subsolver = TRDH, subsolver_options = subsolver_options, ) - # plot_nnmf(TR2_out, Avec, m, n, k, "tr-trdh-$suffix") + plot_nnmf(TR2_out, Avec, m, n, k, "tr-trdh-$suffix") @info " using TRDH to solve with" h χ reset!(f) TRDH_out = TRDH(f, h, χ, options, selected = selected) - # plot_nnmf(TRDH_out, Avec, m, n, k, "trdh-$suffix") + plot_nnmf(TRDH_out, Avec, m, n, k, "trdh-$suffix") end function demo_nnmf() diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl index f7a549af..49306b26 100644 --- a/src/LMTR_alg.jl +++ b/src/LMTR_alg.jl @@ -47,13 +47,16 @@ function LMTR( x0::AbstractVector = nls.meta.x0, subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), subsolver = R2, - subsolver_options = ROSolverOptions(), + subsolver_options = ROSolverOptions(ϵa = options.ϵa), selected::AbstractVector{<:Integer} = 1:(nls.meta.nvar), ) where {H, X} start_time = time() elapsed_time = 0.0 # initialize passed options ϵ = options.ϵa + ϵ_subsolver = subsolver_options.ϵa + ϵ_subsolver_init = subsolver_options.ϵa + ϵ_subsolver = copy(ϵ_subsolver_init) ϵr = options.ϵr Δk = options.Δk verbose = options.verbose @@ -167,7 +170,9 @@ function LMTR( ξ1 > 0 || error("LMTR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") if ξ1 ≥ 0 && k == 1 - ϵ += ϵr * sqrt(ξ1) # make stopping test absolute and relative + ϵ_increment = ϵr * sqrt(ξ1) + ϵ += ϵ_increment # make stopping test absolute and relative + ϵ_subsolver += ϵ_increment end if sqrt(ξ1) < ϵ @@ -176,7 +181,7 @@ function LMTR( continue end - subsolver_options.ϵa = k == 1 ? 1.0e-5 : max(ϵ, min(1.0e-1, ξ1 / 10)) + subsolver_options.ϵa = k == 1 ? 1.0e-5 : max(ϵ_subsolver, min(1.0e-1, ξ1 / 10)) ∆_effective = min(β * χ(s), Δk) has_bounds(nls) ? set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) : @@ -184,6 +189,9 @@ function LMTR( s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_options, s) end + # restore initial subsolver_options.ϵa here so that subsolver_options.ϵa + # is not modified if there is an error + subsolver_options.ϵa = ϵ_subsolver_init Complex_hist[k] = iter diff --git a/src/LM_alg.jl b/src/LM_alg.jl index 75bfd847..5bf1ec36 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -45,13 +45,16 @@ function LM( x0::AbstractVector = nls.meta.x0, subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), subsolver = R2, - subsolver_options = ROSolverOptions(), + subsolver_options = ROSolverOptions(ϵa = options.ϵa), selected::AbstractVector{<:Integer} = 1:(nls.meta.nvar), ) where {H} start_time = time() elapsed_time = 0.0 # initialize passed options ϵ = options.ϵa + ϵ_subsolver = subsolver_options.ϵa + ϵ_subsolver_init = subsolver_options.ϵa + ϵ_subsolver = copy(ϵ_subsolver_init) ϵr = options.ϵr verbose = options.verbose maxIter = options.maxIter @@ -167,7 +170,9 @@ function LM( ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") if ξ1 ≥ 0 && k == 1 - ϵ += ϵr * sqrt(ξ1) # make stopping test absolute and relative + ϵ_increment = ϵr * sqrt(ξ1) + ϵ += ϵ_increment # make stopping test absolute and relative + ϵ_subsolver += ϵ_increment end if sqrt(ξ1) < ϵ @@ -176,11 +181,14 @@ function LM( continue end - subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ, min(1.0e-2, ξ1 / 10)) + subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) @debug "setting inner stopping tolerance to" subsolver_options.optTol s, iter, _ = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_options, s) end + # restore initial subsolver_options.ϵa here so that subsolver_options.ϵa + # is not modified if there is an error + subsolver_options.ϵa = ϵ_subsolver_init Complex_hist[k] = iter diff --git a/src/R2_alg.jl b/src/R2_alg.jl index 9c955214..3f372da0 100644 --- a/src/R2_alg.jl +++ b/src/R2_alg.jl @@ -173,7 +173,7 @@ function R2( ϵ += ϵr * sqrt(ξ) # make stopping test absolute and relative end - if (ξ < 0 && sqrt(-ξ) ≤ -neg_tol) || (ξ ≥ 0 && sqrt(ξ) ≤ ϵ) + if (ξ < 0 && sqrt(-ξ) ≤ neg_tol) || (ξ ≥ 0 && sqrt(ξ) ≤ ϵ) optimal = true continue end diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index da35b030..2ad61355 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -109,6 +109,7 @@ function TRDH( ϵ = options.ϵa ϵr = options.ϵr Δk = options.Δk + neg_tol = options.neg_tol verbose = options.verbose maxIter = options.maxIter maxTime = options.maxTime @@ -226,17 +227,18 @@ function TRDH( prox!(s, ψ, mν∇fk, ν) Complex_hist[k] += 1 ξ1 = hk - mk1(s) + max(1, abs(hk)) * 10 * eps() - ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") if ξ1 ≥ 0 && k == 1 ϵ += ϵr * sqrt(ξ1) # make stopping test absolute and relative end - if sqrt(ξ1) < ϵ + if (ξ1 < 0 && sqrt(-ξ1) ≤ neg_tol) || (ξ1 ≥ 0 && sqrt(ξ1) < ϵ) # the current xk is approximately first-order stationary optimal = true continue end + + ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") end Δ_effective = reduce_TR ? min(β * χ(s), Δk) : Δk @@ -264,22 +266,22 @@ function TRDH( Δobj = fk + hk - (fkn + hkn) + max(1, abs(fk + hk)) * 10 * eps() ξ = hk - mk(s) + max(1, abs(hk)) * 10 * eps() - if (ξ ≤ 0 || isnan(ξ)) - error("TRDH: failed to compute a step: ξ = $ξ") - end - if !reduce_TR if ξ ≥ 0 && k == 1 ϵ += ϵr * sqrt(ξ) # make stopping test absolute and relative end - if sqrt(ξ) < ϵ + if (ξ < 0 && sqrt(-ξ) ≤ neg_tol) || (ξ ≥ 0 && sqrt(ξ) < ϵ) # the current xk is approximately first-order stationary optimal = true continue end end + if (ξ ≤ 0 || isnan(ξ)) + error("TRDH: failed to compute a step: ξ = $ξ") + end + ρk = Δobj / ξ TR_stat = (η2 ≤ ρk < Inf) ? "↗" : (ρk < η1 ? "↘" : "=") @@ -363,7 +365,7 @@ function TRDH( :status => status, :fk => fk, :hk => hk, - :ξ => ξ1 ≥ 0 ? sqrt(ξ1) : ξ1, + :ξ => ξ1, :elapsed_time => elapsed_time, ) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 7e4368a7..f048c9e6 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -52,13 +52,15 @@ function TR( x0::AbstractVector = f.meta.x0, subsolver_logger::Logging.AbstractLogger = Logging.NullLogger(), subsolver = R2, - subsolver_options = ROSolverOptions(), + subsolver_options = ROSolverOptions(ϵa = options.ϵa), selected::AbstractVector{<:Integer} = 1:(f.meta.nvar), ) where {H, X} start_time = time() elapsed_time = 0.0 # initialize passed options ϵ = options.ϵa + ϵ_subsolver_init = subsolver_options.ϵa + ϵ_subsolver = copy(ϵ_subsolver_init) ϵr = options.ϵr Δk = options.Δk verbose = options.verbose @@ -160,7 +162,9 @@ function TR( ξ1 > 0 || error("TR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") if ξ1 ≥ 0 && k == 1 - ϵ += ϵr * sqrt(ξ1) # make stopping test absolute and relative + ϵ_increment = ϵr * sqrt(ξ1) + ϵ += ϵ_increment # make stopping test absolute and relative + ϵ_subsolver += ϵ_increment end if sqrt(ξ1) < ϵ @@ -169,7 +173,7 @@ function TR( continue end - subsolver_options.ϵa = k == 1 ? 1.0e-5 : max(ϵ, min(1e-2, sqrt(ξ1)) * ξ1) + subsolver_options.ϵa = k == 1 ? 1.0e-5 : max(ϵ_subsolver, min(1e-2, sqrt(ξ1)) * ξ1) ∆_effective = min(β * χ(s), Δk) (has_bounds(f) || subsolver == TRDH) ? set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) : @@ -178,6 +182,9 @@ function TR( s, iter, outdict = with_logger(subsolver_logger) do subsolver(φ, ∇φ!, ψ, subsolver_options, s; Bk = Bk) end + # restore initial subsolver_options.ϵa here so that subsolver_options.ϵa + # is not modified if there is an error + subsolver_options.ϵa = ϵ_subsolver_init Complex_hist[k] = sum(outdict[:Chist]) sNorm = χ(s)