Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/quda_work' into quda_work_eigens…
Browse files Browse the repository at this point in the history
…olver
  • Loading branch information
aniketsen committed Jun 28, 2023
2 parents 721f3ba + 6ac0c6e commit 9c27ed5
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 21 deletions.
2 changes: 1 addition & 1 deletion default_input_values.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@
#define _default_subprocess_flag 0
#define _default_lowmem_flag 0

#define _default_g_barrier_monomials_convergence 0
#define _default_g_barrier_monomials_convergence 1

/* default input values for QUDA interface */
/* These follow the recommendations of https://github.com/lattice/quda/wiki/Multigrid-Solver */
Expand Down
24 changes: 16 additions & 8 deletions doc/quda.tex
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ \subsubsection{QUDA-MG interface}
\item{ \texttt{MGEigSolverUsePolynomialAcceleration}: Whether or not to use polynomial acceleration in the eigensolver on a per-level basis. (comma-separated list of booleans, default: \texttt{yes} everywhere)}
\item{ \texttt{MGEigSolverPolynomialDegree}: Degree of the polynomial used for polynomial acceleration of the eigensolver, specified on a per-level basis. This will be ignored except if \texttt{MGEigSolverUsePolynomialAcceleration = yes} is set on a given level. Values between $50$ to $100$ seem to work well. (comma-separated list of postive integers, default: $100$ on all levels)}
\item{ \texttt{MGEigSolverPolyMin}: Smallest eigenvalue to suppress via polynomial acceleration. It should not be set too low and only lowered step by step from its default value. Has a large impact on the rate of convergence of the eigensolver when set correctly. Ignored unless \texttt{MGEigSolverUsePolynomialAcceleration = yes} is set on the corresponding level. (comma-separated list of positive real numbers, default: $1.0$ on all levels)}
\item{ \texttt{MGEigSolverPolyMax}: Largest eigenvalue which should be suppressed via polynomial acceleration. This should be set about 10\% larger than the actual largest eigenvalue of the operator. Ignored unless \texttt{MGEigSolverUsePolynomialAcceleration = yes} is set. (comma-separated list of positive real numbers, default: $5.0$ on all levels)}
\item{ \texttt{MGEigSolverPolyMax}: Largest eigenvalue which should be suppressed via polynomial acceleration. This should be set about 20\% larger than the actual largest eigenvalue of the operator. Ignored unless \texttt{MGEigSolverUsePolynomialAcceleration = yes} is set. (comma-separated list of positive real numbers, default: $5.0$ on all levels)}
\end{itemize}

A typical coarse-grid-deflated setup might look something like:
Expand Down Expand Up @@ -421,18 +421,26 @@ \subsubsection{QUDA-MG interface}
MGEigSolverType = tr_lanczos, tr_lanczos, tr_lanczos
MGEigSolverSpectrum = smallest_real, smallest_real, smallest_real
MGEigPreserveDeflationSubspace = yes
MGEigSolverNumberOfVectors = 24, 32, 1024
MGEigSolverKrylovSubspaceSize = 24, 32, 2048
MGEigSolverNumberOfVectors = 0, 0, 1024
MGEigSolverKrylovSubspaceSize = 0, 0, 3072
MGEigSolverRequireConvergence = no, no, yes
MGEigSolverMaxRestarts = 100, 100, 100
MGEigSolverMaxRestarts = 0, 0, 100
MGEigSolverTolerance = 1e-4, 1e-4, 1e-4
MGEigSolverUseNormOp = no, no, yes
MGEigSolverUseDagger = no, no, no
MGEigSolverUsePolynomialAcceleration = no, no, yes
MGEigSolverPolynomialDegree = 100, 100, 100
MGEigSolverPolyMin = 0.6, 0.6, 0,6
MGEigSolverPolyMax = 8.0, 8.0, 8.0
MGCoarseSolverCABasisSize = 8, 8, 8
MGEigSolverPolynomialDegree = 0, 0, 100
MGEigSolverPolyMin = 0.0, 0.0, 0.01
MGEigSolverPolyMax = 0.0, 0.0, 3.6
MGCoarseSolverCABasisSize = 4, 4, 4
EndExternalInverter
\end{verbatim}

In order to determine approppriate values for \texttt{MGEigSolverPolyMin} and \texttt{MGEigSolverPolyMax}, one should run the eigensolver once without polynomial acceleration (\texttt{MGEigSolverUsePolynomialAcceleration = no}) and run tmLQCD at \texttt{DebugLevel = 3} to trigger the eigenvalues to be printed by the eigensolver.
Then, setting \texttt{MGEigSolverSpectrum = largest\_real}, \texttt{MGEigSolverNumberOfVectors = 1} and \texttt{MGEigSolverKrylovSubspaceSize = 16} (all on the coarsest grid, the third number in the example above), one can easily obtain the largest eigenvalue of the coarse grid operator for a particular configuration or set of configurations.
Increasing this number by 20\% or so gives a good value for \texttt{MGEigSolverPolyMax}.
Subsequently, setting \texttt{MGEigSolverSpectrum = smallest\_real}, \texttt{MGEigSolverNumberOfVectors = 0, 0, 1024}, \texttt{MGEigSolverKrylovSubspaceSize = 0, 0, 3072} will give the requisite 1024 smallest eigenvalues.
Noting the largest of these, the next largest order of magnitude can be used for \texttt{MGEigSolverPolyMin}.
In other words, if the largest of these smallest eigenvalues is $4\cdot10^{-3}$, for example, then \texttt{MGEigSolverPolyMin} can be set to 0.01.
This ensures that the desired (smallest) part of the spectrum is smaller than \texttt{MGEigSolverPolyMin} and that the entire spectrum is contained in the range up to \texttt{MGEigSolverPolyMax}.
After this, polynomial acceleration can be enabled, which should reduce setup time significantly.
26 changes: 20 additions & 6 deletions quda_interface.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@
#include "tm_debug_printf.h"
#include "phmc.h"
#include "quda_gauge_paths.inc"
#include "io/gauge.h"
#include "measure_gauge_action.h"

// nstore is generally like a gauge id, for measurements it identifies the gauge field
// uniquely
Expand Down Expand Up @@ -1842,15 +1844,18 @@ void _setMGInvertParam(QudaInvertParam * mg_inv_param, const QudaInvertParam * c
mg_inv_param->cuda_prec = inv_param->cuda_prec;
mg_inv_param->cuda_prec_sloppy = inv_param->cuda_prec_sloppy;
mg_inv_param->cuda_prec_refinement_sloppy = inv_param->cuda_prec_refinement_sloppy;
mg_inv_param->cuda_prec_precondition = inv_param->cuda_prec_precondition;
mg_inv_param->cuda_prec_eigensolver = inv_param->cuda_prec_eigensolver;

mg_inv_param->clover_cpu_prec = inv_param->clover_cpu_prec;
mg_inv_param->clover_cuda_prec = inv_param->clover_cuda_prec;
mg_inv_param->clover_cuda_prec_sloppy = inv_param->clover_cuda_prec_sloppy;
mg_inv_param->clover_cuda_prec_refinement_sloppy = inv_param->clover_cuda_prec_refinement_sloppy;
mg_inv_param->clover_cuda_prec_precondition = inv_param->clover_cuda_prec_precondition;
mg_inv_param->clover_cuda_prec_eigensolver = inv_param->clover_cuda_prec_eigensolver;

// it seems that the MG-internal preconditioner and eigensolver need to be
// consistent with sloppy precision
mg_inv_param->cuda_prec_precondition = inv_param->cuda_prec_sloppy;
mg_inv_param->cuda_prec_eigensolver = inv_param->cuda_prec_sloppy;
mg_inv_param->clover_cuda_prec_precondition = inv_param->clover_cuda_prec_sloppy;
mg_inv_param->clover_cuda_prec_eigensolver = inv_param->clover_cuda_prec_sloppy;

mg_inv_param->clover_order = inv_param->clover_order;
mg_inv_param->gcrNkrylov = inv_param->gcrNkrylov;
Expand Down Expand Up @@ -2046,7 +2051,9 @@ void _setQudaMultigridParam(QudaMultigridParam* mg_param) {

mg_eig_param[level].n_ev = quda_input.mg_eig_nEv[level];
mg_eig_param[level].n_kr = quda_input.mg_eig_nKr[level];
mg_eig_param[level].n_conv = quda_input.mg_n_vec[level];
mg_eig_param[level].n_conv = quda_input.mg_eig_nEv[level]; // require convergence of all eigenvalues
mg_eig_param[level].n_ev_deflate = mg_eig_param[level].n_conv; // deflate all converged eigenvalues
// TODO expose this setting: mg_eig_param[level].batched_rotate = 128;
mg_eig_param[level].require_convergence = quda_input.mg_eig_require_convergence[level];

mg_eig_param[level].tol = quda_input.mg_eig_tol[level];
Expand Down Expand Up @@ -2236,9 +2243,16 @@ int invert_eo_degenerate_quda(spinor * const out,
rel_prec, even_odd_flag, solver_params,
sloppy_precision, compression, QpQm);
if (ret_value >= max_iter) {
char outname[200];
snprintf(outname, 200, "conf_mg_refresh_fail.%.6f.%04d", g_gauge_state.gauge_id, nstore);
paramsXlfInfo * xlfInfo = construct_paramsXlfInfo(
measure_plaquette((const su3**)g_gauge_field)/(6.*VOLUME*g_nproc), nstore);
int status = write_gauge_field(outname, 64, xlfInfo);
free(xlfInfo);

char errmsg[200];
snprintf(errmsg, 200, "QUDA-MG solver failed to converge in %d iterations even after forced setup refresh. Terminating!",
max_iter);
max_iter);
fatal_error(errmsg, __func__);
return -1;
} else {
Expand Down
23 changes: 17 additions & 6 deletions solver/monomial_solve.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,24 @@
#endif
#include "fatal_error.h"

#include <io/params.h>
#include <io/spinor.h>
#include "io/params.h"
#include "io/spinor.h"
#include "io/gauge.h"
#include "measure_gauge_action.h"

#ifdef TM_USE_QUDA
# include "quda_interface.h"
#endif

void solve_fail_write_config_and_abort(const char * const solver) {
char outname[200];
snprintf(outname, 200, "conf_monomial_solve_fail.%.6f.%04d", g_gauge_state.gauge_id, nstore);
paramsXlfInfo * xlfInfo = construct_paramsXlfInfo(measure_plaquette((const su3**)g_gauge_field)/(6.*VOLUME*g_nproc), nstore);
int status = write_gauge_field(outname, 64, xlfInfo);
free(xlfInfo);
fatal_error("Error: solver reported -1 iterations.", solver);
}

int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_params,
const int max_iter, double eps_sq, const int rel_prec,
const int N, matrix_mult f, int solver_type){
Expand Down Expand Up @@ -216,7 +227,7 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
tm_stopwatch_pop(&g_timers, 0, 1, "");

if (iteration_count == -1 && g_barrier_monomials_convergence) {
fatal_error("Error: solver reported -1 iterations.", "solve_degenerate");
solve_fail_write_config_and_abort("solve_degenerate");
}

return (iteration_count);
Expand Down Expand Up @@ -425,7 +436,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
tm_stopwatch_pop(&g_timers, 0, 1, "");

if (iteration_count == -1 && g_barrier_monomials_convergence) {
fatal_error("Error: solver reported -1 iterations.", "solve_mms_tm");
solve_fail_write_config_and_abort("solve_mms_tm");
}

return(iteration_count);
Expand Down Expand Up @@ -671,7 +682,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
tm_stopwatch_pop(&g_timers, 0, 1, "");

if (iteration_count == -1 && g_barrier_monomials_convergence) {
fatal_error("Error: solver reported -1 iterations.", "solve_mms_nd");
solve_fail_write_config_and_abort("solve_mms_nd");
}

return (iteration_count);
Expand Down Expand Up @@ -726,7 +737,7 @@ int solve_mms_nd_plus(spinor ** const Pup, spinor ** const Pdn,
tm_stopwatch_pop(&g_timers, 0, 1, "");

if (iteration_count == -1 && g_barrier_monomials_convergence) {
fatal_error("Error: solver reported -1 iterations.", "solve_mms_nd_plus");
solve_fail_write_config_and_abort("solve_mms_nd_plus");
}

return iteration_count;
Expand Down

0 comments on commit 9c27ed5

Please sign in to comment.