Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/quda_work' into deriv_mg_tune
Browse files Browse the repository at this point in the history
  • Loading branch information
kostrzewa committed Aug 1, 2023
2 parents f7c21be + 6ac0c6e commit 2fe1762
Show file tree
Hide file tree
Showing 9 changed files with 428 additions and 91 deletions.
2 changes: 1 addition & 1 deletion default_input_values.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@
#define _default_subprocess_flag 0
#define _default_lowmem_flag 0

#define _default_g_barrier_monomials_convergence 0
#define _default_g_barrier_monomials_convergence 1

/* default input values for QUDA interface */
/* These follow the recommendations of https://github.com/lattice/quda/wiki/Multigrid-Solver */
Expand Down
14 changes: 14 additions & 0 deletions doc/bibliography.bib
Original file line number Diff line number Diff line change
Expand Up @@ -7904,3 +7904,17 @@ @inbook{Joo2013
year = "2013"
}

@article{Kostrzewa:2022hsv,
author = "Kostrzewa, Bartosz and Bacchio, Simone and Finkenrath, Jacob and Garofalo, Marco and Pittler, Ferenc and Romiti, Simone and Urbach, Carsten",
collaboration = "ETM",
title = "{Twisted mass ensemble generation on GPU machines}",
eprint = "2212.06635",
archivePrefix = "arXiv",
primaryClass = "hep-lat",
doi = "10.22323/1.430.0340",
journal = "PoS",
volume = "LATTICE2022",
pages = "340",
year = "2023"
}

372 changes: 305 additions & 67 deletions doc/quda.tex

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions quda_dummy_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ typedef enum QudaInverterType_s {
QUDA_CG_INVERTER = CG,
QUDA_MR_INVERTER = MR,
QUDA_GCR_INVERTER = GCR,
QUDA_CGNE_INVERTER = CGNE,
QUDA_CGNR_INVERTER = CGNR,
QUDA_CA_CG_INVERTER = CA_CG,
QUDA_CA_CGNE_INVERTER = CA_CGNE,
QUDA_CA_CGNR_INVERTER = CA_CGNR,
QUDA_CA_GCR_INVERTER = CA_GCR
} QudaInverterType;

Expand Down
35 changes: 28 additions & 7 deletions quda_interface.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@
#include "tm_debug_printf.h"
#include "phmc.h"
#include "quda_gauge_paths.inc"
#include "io/gauge.h"
#include "measure_gauge_action.h"

// nstore is generally like a gauge id, for measurements it identifies the gauge field
// uniquely
Expand Down Expand Up @@ -245,7 +247,7 @@ void _setDefaultQudaParam(void){
QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec = QUDA_DOUBLE_PRECISION;
QudaPrecision cuda_prec_sloppy = QUDA_SINGLE_PRECISION;
QudaPrecision cuda_prec_precondition = QUDA_HALF_PRECISION;
QudaPrecision cuda_prec_precondition = QUDA_SINGLE_PRECISION;

QudaTune tune = QUDA_TUNE_YES;

Expand Down Expand Up @@ -1838,15 +1840,18 @@ void _setMGInvertParam(QudaInvertParam * mg_inv_param, const QudaInvertParam * c
mg_inv_param->cuda_prec = inv_param->cuda_prec;
mg_inv_param->cuda_prec_sloppy = inv_param->cuda_prec_sloppy;
mg_inv_param->cuda_prec_refinement_sloppy = inv_param->cuda_prec_refinement_sloppy;
mg_inv_param->cuda_prec_precondition = inv_param->cuda_prec_precondition;
mg_inv_param->cuda_prec_eigensolver = inv_param->cuda_prec_eigensolver;

mg_inv_param->clover_cpu_prec = inv_param->clover_cpu_prec;
mg_inv_param->clover_cuda_prec = inv_param->clover_cuda_prec;
mg_inv_param->clover_cuda_prec_sloppy = inv_param->clover_cuda_prec_sloppy;
mg_inv_param->clover_cuda_prec_refinement_sloppy = inv_param->clover_cuda_prec_refinement_sloppy;
mg_inv_param->clover_cuda_prec_precondition = inv_param->clover_cuda_prec_precondition;
mg_inv_param->clover_cuda_prec_eigensolver = inv_param->clover_cuda_prec_eigensolver;

// it seems that the MG-internal preconditioner and eigensolver need to be
// consistent with sloppy precision
mg_inv_param->cuda_prec_precondition = inv_param->cuda_prec_sloppy;
mg_inv_param->cuda_prec_eigensolver = inv_param->cuda_prec_sloppy;
mg_inv_param->clover_cuda_prec_precondition = inv_param->clover_cuda_prec_sloppy;
mg_inv_param->clover_cuda_prec_eigensolver = inv_param->clover_cuda_prec_sloppy;

mg_inv_param->clover_order = inv_param->clover_order;
mg_inv_param->gcrNkrylov = inv_param->gcrNkrylov;
Expand All @@ -1865,6 +1870,9 @@ void _setQudaMultigridParam(QudaMultigridParam* mg_param) {
QudaInvertParam * mg_inv_param = mg_param->invert_param;
_setMGInvertParam(mg_inv_param, &inv_param);

mg_param->setup_type = QUDA_NULL_VECTOR_SETUP;

mg_param->coarse_guess = quda_input.mg_coarse_guess;
mg_param->preserve_deflation = quda_input.mg_eig_preserve_deflation;

mg_param->n_level = quda_input.mg_n_level;
Expand Down Expand Up @@ -2013,6 +2021,10 @@ void _setQudaMultigridParam(QudaMultigridParam* mg_param) {
mg_param->coarse_solver_ca_lambda_min[level] = quda_input.mg_coarse_solver_ca_lambda_min[level];
mg_param->coarse_solver_ca_lambda_max[level] = quda_input.mg_coarse_solver_ca_lambda_max[level];

mg_param->smoother_solver_ca_basis[level] = quda_input.mg_smoother_solver_ca_basis[level];
mg_param->smoother_solver_ca_lambda_min[level] = quda_input.mg_smoother_solver_ca_lambda_min[level];
mg_param->smoother_solver_ca_lambda_max[level] = quda_input.mg_smoother_solver_ca_lambda_max[level];


// set the MG EigSolver parameters, almost equivalent to
// setEigParam from QUDA's multigrid_invert_test, except
Expand All @@ -2035,7 +2047,9 @@ void _setQudaMultigridParam(QudaMultigridParam* mg_param) {

mg_eig_param[level].n_ev = quda_input.mg_eig_nEv[level];
mg_eig_param[level].n_kr = quda_input.mg_eig_nKr[level];
mg_eig_param[level].n_conv = quda_input.mg_n_vec[level];
mg_eig_param[level].n_conv = quda_input.mg_eig_nEv[level]; // require convergence of all eigenvalues
mg_eig_param[level].n_ev_deflate = mg_eig_param[level].n_conv; // deflate all converged eigenvalues
// TODO expose this setting: mg_eig_param[level].batched_rotate = 128;
mg_eig_param[level].require_convergence = quda_input.mg_eig_require_convergence[level];

mg_eig_param[level].tol = quda_input.mg_eig_tol[level];
Expand Down Expand Up @@ -2231,9 +2245,16 @@ int invert_eo_degenerate_quda(spinor * const out,
rel_prec, even_odd_flag, solver_params,
sloppy_precision, compression, QpQm);
if (ret_value >= max_iter) {
char outname[200];
snprintf(outname, 200, "conf_mg_refresh_fail.%.6f.%04d", g_gauge_state.gauge_id, nstore);
paramsXlfInfo * xlfInfo = construct_paramsXlfInfo(
measure_plaquette((const su3**)g_gauge_field)/(6.*VOLUME*g_nproc), nstore);
int status = write_gauge_field(outname, 64, xlfInfo);
free(xlfInfo);

char errmsg[200];
snprintf(errmsg, 200, "QUDA-MG solver failed to converge in %d iterations even after forced setup refresh. Terminating!",
max_iter);
max_iter);
fatal_error(errmsg, __func__);
return -1;
} else {
Expand Down
4 changes: 4 additions & 0 deletions quda_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ typedef struct tm_QudaParams_t {
int mg_coarse_solver_ca_basis_size[QUDA_MAX_MG_LEVEL];
double mg_coarse_solver_ca_lambda_min[QUDA_MAX_MG_LEVEL];
double mg_coarse_solver_ca_lambda_max[QUDA_MAX_MG_LEVEL];

QudaCABasis mg_smoother_solver_ca_basis[QUDA_MAX_MG_LEVEL];
double mg_smoother_solver_ca_lambda_min[QUDA_MAX_MG_LEVEL];
double mg_smoother_solver_ca_lambda_max[QUDA_MAX_MG_LEVEL];

// parameters related to coarse grid deflation in the MG
int mg_use_eig_solver[QUDA_MAX_MG_LEVEL];
Expand Down
59 changes: 49 additions & 10 deletions read_input.l
Original file line number Diff line number Diff line change
Expand Up @@ -1739,10 +1739,33 @@ static inline double fltlist_next_token(int * const list_end){
quda_input.mg_setup_2kappamu = c;
if(myverbose) printf(" MGSetup2KappaMu set to %e line %d\n", quda_input.mg_setup_2kappamu, line_of_file);
}
/* TODO: the MGSetupSolver should be set on a per-level basis
allowing communication-avoiding solvers to be used
on the coarser grids */
{SPC}*MGSetupSolver{EQL}cg {
quda_input.mg_setup_inv_type = QUDA_CG_INVERTER;
if(myverbose) printf(" MGSetupSolver set to CG line %d\n", line_of_file);
}
{SPC}*MGSetupSolver{EQL}cgne {
quda_input.mg_setup_inv_type = QUDA_CGNE_INVERTER;
if(myverbose) printf(" MGSetupSolver set to CGNE line %d\n", line_of_file);
}
{SPC}*MGSetupSolver{EQL}cgnr {
quda_input.mg_setup_inv_type = QUDA_CGNE_INVERTER;
if(myverbose) printf(" MGSetupSolver set to CGNR line %d\n", line_of_file);
}
{SPC}*MGSetupSolver{EQL}cacg {
quda_input.mg_setup_inv_type = QUDA_CA_CG_INVERTER;
if(myverbose) printf(" MGSetupSolver set to CA-CG line %d\n", line_of_file);
}
{SPC}*MGSetupSolver{EQL}cacgne {
quda_input.mg_setup_inv_type = QUDA_CA_CGNE_INVERTER;
if(myverbose) printf(" MGSetupSolver set to CA-CGNE line %d\n", line_of_file);
}
{SPC}*MGSetupSolver{EQL}cacgnr {
quda_input.mg_setup_inv_type = QUDA_CA_CGNR_INVERTER;
if(myverbose) printf(" MGSetupSolver set to CA-CGNR line %d\n", line_of_file);
}
{SPC}*MGSetupSolver{EQL}bicgstab {
quda_input.mg_setup_inv_type = QUDA_BICGSTAB_INVERTER;
if(myverbose) printf(" MGSetupSolver set to BiCGstab line %d\n", line_of_file);
Expand Down Expand Up @@ -1821,6 +1844,14 @@ static inline double fltlist_next_token(int * const list_end){
quda_input.mg_reset_setup_mdu_threshold = c;
if(myverbose) printf(" MGResetSetupMDUThreshold set to %f line %d\n", quda_input.mg_reset_setup_mdu_threshold, line_of_file);
}
{SPC}*MGCoarseGuess{EQL}yes {
quda_input.mg_coarse_guess = QUDA_BOOLEAN_YES;
if(myverbose) printf(" MGCoarseGuess set to YES in line %d\n", line_of_file);
}
{SPC}*MGCoarseGuess{EQL}no {
quda_input.mg_coarse_guess = QUDA_BOOLEAN_NO;
if(myverbose) printf(" MGCoarseGuess set to NO in line %d\n", line_of_file);
}
{SPC}*MGUseEigSolver{EQL}{STRLIST} {
parse_quda_mg_bool_par_array(yytext, &(quda_input.mg_use_eig_solver[0]));
}
Expand Down Expand Up @@ -1912,14 +1943,6 @@ static inline double fltlist_next_token(int * const list_end){
quda_input.mg_eig_preserve_deflation = QUDA_BOOLEAN_YES;
if(myverbose) printf(" MGEigPreserveDeflationSubspace set to YES in line %d\n", line_of_file);
}
{SPC}*MGEigSolverCoarseGuess{EQL}yes {
quda_input.mg_coarse_guess = QUDA_BOOLEAN_YES;
if(myverbose) printf(" MGEigSolverCoarseGuess set to YES in line %d\n", line_of_file);
}
{SPC}*MGEigSolverCoarseGuess{EQL}no {
quda_input.mg_coarse_guess = QUDA_BOOLEAN_NO;
if(myverbose) printf(" MGEigSolverCoarseGuess set to NO in line %d\n", line_of_file);
}
{SPC}*MGEigSolverNumberOfVectors{EQL}{STRLIST} {
parse_quda_mg_int_par_array(yytext, &(quda_input.mg_eig_nEv[0]));
}
Expand Down Expand Up @@ -1968,6 +1991,15 @@ static inline double fltlist_next_token(int * const list_end){
{SPC}*MGSetupCABasisLambdaMax{EQL}{STRLIST} {
parse_quda_mg_dbl_par_array(yytext, &(quda_input.mg_setup_ca_lambda_max[0]));
}
{SPC}*MGSmootherSolverCABasisType{EQL}{STRLIST} {
parse_quda_mg_cabasis_par_array(yytext, &(quda_input.mg_smoother_solver_ca_basis[0]));
}
{SPC}*MGSmootherSolverCABasisLambdaMin{EQL}{STRLIST} {
parse_quda_mg_dbl_par_array(yytext, &(quda_input.mg_smoother_solver_ca_lambda_min[0]));
}
{SPC}*MGSmootherSolverCABasisLambdaMax{EQL}{STRLIST} {
parse_quda_mg_dbl_par_array(yytext, &(quda_input.mg_smoother_solver_ca_lambda_max[0]));
}
{SPC}*MGCoarseSolverCABasisType{EQL}{STRLIST} {
parse_quda_mg_cabasis_par_array(yytext, &(quda_input.mg_coarse_solver_ca_basis[0]));
}
Expand Down Expand Up @@ -3903,6 +3935,7 @@ int read_input(char * conf_file){
quda_input.fermionbc = TM_QUDA_THETABC;
quda_input.pipeline = 0;
quda_input.gcrNkrylov = 10;
quda_input.mg_coarse_guess = QUDA_BOOLEAN_NO;
quda_input.reliable_delta = 1e-3; // anything smaller than this and we break down in double-half
quda_input.mg_n_level = _default_quda_mg_n_level;
quda_input.mg_setup_2kappamu = _default_quda_mg_setup_2kappamu;
Expand All @@ -3921,11 +3954,11 @@ int read_input(char * conf_file){
quda_input.mg_n_vec[level] = _default_quda_mg_n_vec;
quda_input.mg_mu_factor[level] = 1.0;
quda_input.mg_coarse_solver_type[level] = QUDA_GCR_INVERTER;
quda_input.mg_smoother_type[level] = QUDA_MR_INVERTER ;
quda_input.mg_smoother_type[level] = QUDA_CA_GCR_INVERTER;

quda_input.mg_use_eig_solver[level] = QUDA_BOOLEAN_NO;
quda_input.mg_eig_preserve_deflation = QUDA_BOOLEAN_NO;
quda_input.mg_eig_tol[level] = 1.0e-3;
quda_input.mg_eig_tol[level] = 1.0e-6;
quda_input.mg_eig_require_convergence[level] = QUDA_BOOLEAN_YES;
quda_input.mg_eig_type[level] = QUDA_EIG_TR_LANCZOS;
quda_input.mg_eig_spectrum[level] = QUDA_SPECTRUM_SR_EIG;
Expand All @@ -3937,6 +3970,8 @@ int read_input(char * conf_file){
quda_input.mg_eig_poly_deg[level] = 100;
quda_input.mg_eig_amin[level] = 1.0;
quda_input.mg_eig_amax[level] = 5.0;
quda_input.mg_eig_nEv[level] = 0;
quda_input.mg_eig_nKr[level] = 0;

quda_input.mg_setup_ca_basis[level] = QUDA_POWER_BASIS;
quda_input.mg_setup_ca_basis_size[level] = 4;
Expand All @@ -3947,6 +3982,10 @@ int read_input(char * conf_file){
quda_input.mg_coarse_solver_ca_basis_size[level] = 4;
quda_input.mg_coarse_solver_ca_lambda_min[level] = 0.0;
quda_input.mg_coarse_solver_ca_lambda_max[level] = -1.0;

quda_input.mg_smoother_solver_ca_basis[level] = QUDA_POWER_BASIS;
quda_input.mg_smoother_solver_ca_lambda_min[level] = 0.0;
quda_input.mg_smoother_solver_ca_lambda_max[level] = -1.0;

/* note: when the user does not specify any blocking parameters,
* a reasonable set will be computed automatically in the MG setup
Expand Down
23 changes: 17 additions & 6 deletions solver/monomial_solve.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,24 @@
#endif
#include "fatal_error.h"

#include <io/params.h>
#include <io/spinor.h>
#include "io/params.h"
#include "io/spinor.h"
#include "io/gauge.h"
#include "measure_gauge_action.h"

#ifdef TM_USE_QUDA
# include "quda_interface.h"
#endif

void solve_fail_write_config_and_abort(const char * const solver) {
char outname[200];
snprintf(outname, 200, "conf_monomial_solve_fail.%.6f.%04d", g_gauge_state.gauge_id, nstore);
paramsXlfInfo * xlfInfo = construct_paramsXlfInfo(measure_plaquette((const su3**)g_gauge_field)/(6.*VOLUME*g_nproc), nstore);
int status = write_gauge_field(outname, 64, xlfInfo);
free(xlfInfo);
fatal_error("Error: solver reported -1 iterations.", solver);
}

int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_params,
const int max_iter, double eps_sq, const int rel_prec,
const int N, matrix_mult f, int solver_type){
Expand Down Expand Up @@ -216,7 +227,7 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
tm_stopwatch_pop(&g_timers, 0, 1, "");

if (iteration_count == -1 && g_barrier_monomials_convergence) {
fatal_error("Error: solver reported -1 iterations.", "solve_degenerate");
solve_fail_write_config_and_abort("solve_degenerate");
}

return (iteration_count);
Expand Down Expand Up @@ -425,7 +436,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
tm_stopwatch_pop(&g_timers, 0, 1, "");

if (iteration_count == -1 && g_barrier_monomials_convergence) {
fatal_error("Error: solver reported -1 iterations.", "solve_mms_tm");
solve_fail_write_config_and_abort("solve_mms_tm");
}

return(iteration_count);
Expand Down Expand Up @@ -671,7 +682,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
tm_stopwatch_pop(&g_timers, 0, 1, "");

if (iteration_count == -1 && g_barrier_monomials_convergence) {
fatal_error("Error: solver reported -1 iterations.", "solve_mms_nd");
solve_fail_write_config_and_abort("solve_mms_nd");
}

return (iteration_count);
Expand Down Expand Up @@ -726,7 +737,7 @@ int solve_mms_nd_plus(spinor ** const Pup, spinor ** const Pdn,
tm_stopwatch_pop(&g_timers, 0, 1, "");

if (iteration_count == -1 && g_barrier_monomials_convergence) {
fatal_error("Error: solver reported -1 iterations.", "solve_mms_nd_plus");
solve_fail_write_config_and_abort("solve_mms_nd_plus");
}

return iteration_count;
Expand Down
5 changes: 5 additions & 0 deletions solver/solver_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ typedef enum SOLVER_TYPE {
MIXEDBICGSTAB,
DUMMYHERMTEST,
CA_GCR,
CGNE,
CGNR,
CA_CG,
CA_CGNE,
CA_CGNR,
INVALID_SOLVER
} SOLVER_TYPE;

Expand Down

0 comments on commit 2fe1762

Please sign in to comment.