diff --git a/default_input_values.h b/default_input_values.h index bec54816b..3993ffbe8 100644 --- a/default_input_values.h +++ b/default_input_values.h @@ -147,6 +147,10 @@ #define _default_phmc_pure_phmc 0 #define _default_stilde_max 3. #define _default_stilde_min 0.01 +#define _default_eig_polydeg 128 +#define _default_eig_amin 0.001 +#define _default_eig_amax 4 +#define _default_eig_n_kr 96 #define _default_degree_of_p 48 #define _default_propagator_splitted 1 #define _default_source_splitted 1 @@ -199,6 +203,8 @@ #define _default_external_inverter 0 +#define _default_external_eigsolver 0 + #define _default_external_library 0 #define _default_subprocess_flag 0 diff --git a/doc/quda.tex b/doc/quda.tex index 81b7b6b83..b034b1a16 100644 --- a/doc/quda.tex +++ b/doc/quda.tex @@ -444,3 +444,18 @@ \subsubsection{QUDA-MG interface} In other words, if the largest of these smallest eigenvalues is $4\cdot10^{-3}$, for example, then \texttt{MGEigSolverPolyMin} can be set to 0.01. This ensures that the desired (smallest) part of the spectrum is smaller than \texttt{MGEigSolverPolyMin} and that the entire spectrum is contained in the range up to \texttt{MGEigSolverPolyMax}. After this, polynomial acceleration can be enabled, which should reduce setup time significantly. + +\subsubsection{Using the QUDA eigensolver in the HMC} + +When employing the rational approximation, in order to make sure that the eigenvalue bounds are chosen appropriately, it is necessary to measure the maximal and minimal eigenvalues of the operator involved in the given monomial. +For the monomials \texttt{NDRAT, NDRATCOR, NDCLOVERRAT} and \texttt{NDCLOVERRATCOR}, this can be done using QUDA's eigensolver when, in addition to a non-zero setting for \texttt{ComputeEVFreq}, \texttt{UseExternalEigSolver = quda} is set. + +The eigensolver further offers the following parameters: +\begin{itemize} + \item{ \texttt{EigSolverPolynomialDegree}: Once appropriate parameters for the polynomial filter have been determined (see \texttt{EigSolverPolyMin} and \texttt{EigSolverPolyMax} below), when \texttt{EigSolverPolynomialDegree} is set to a non-zero value, polynomial acceleration will be used in the measurent of the smallest eigenvalue. (integer, default: \texttt{128}) } + \item{ \texttt{EigSolverPolyMin}: Smallest eigenvalue to be excluded by the polynomial filter when polynomial acceleration is used. A good value for this should be determined by first running the eigensolver without acceleration (\texttt{EigSolverPolynomialDegree = 0}). \texttt{EigSolverPolyMin} should then be set to about $3\lambda_\mathrm{min}$. Note that this is specified in the operator normalisation, such that $\lambda_\mathrm{min}$ obtained from the measurement should be multiplied by \texttt{StildeMax} to get an appropriate value for \texttt{EigSolverPolyMin}. (positive real number, default: \texttt{0.001})} + \item{ \texttt{EigSolverPolyMax}: Largest eigenvalue to be excluded by the polynomial filter when polynomial acceleration is used. This should be set to a value in excess of the measured largest eigenvalue, $1.5\lambda_\mathrm{max}$, say. Note that this is specified in the operator normalisation such that the measured $\lambda_\mathrm{max}$ should be multiplied by \texttt{StildeMax} to obtain an appropriate value for \texttt{EigSolverPolyMax}. (positive real number, defaullt: \texttt{4.0})} + \item{ \texttt{EigSolverKrylovSubspaceSize}: Size of the Krylov space used for the determination of the smallest and largest eigenvalues. The default seems to work well even for large lattices. (integer, default: \texttt{96})} +\end{itemize} + + diff --git a/misc_types.h b/misc_types.h index c0f22aad9..947c7d65e 100644 --- a/misc_types.h +++ b/misc_types.h @@ -90,6 +90,12 @@ typedef enum ExternalInverter_s { QPHIX_INVERTER } ExternalInverter; +/* enumeration type for the external eigensolver */ +typedef enum ExternalEigSolver_s { + NO_EXT_EIGSOLVER = 0, + QUDA_EIGSOLVER +} ExternalEigSolver; + /* enumeration type for the external inverter */ typedef enum ExternalLibrary_s { NO_EXT_LIB = 0, diff --git a/monomial/monomial.c b/monomial/monomial.c index ee1c406a7..b7e8a55c5 100644 --- a/monomial/monomial.c +++ b/monomial/monomial.c @@ -116,6 +116,7 @@ int add_monomial(const int type) { monomial_list[no_monomials].solver_params.external_inverter = _default_external_inverter; monomial_list[no_monomials].solver_params.sloppy_precision = _default_operator_sloppy_precision_flag; monomial_list[no_monomials].external_library = _default_external_library; + monomial_list[no_monomials].external_eigsolver = _default_external_eigsolver; monomial_list[no_monomials].solver_params.refinement_precision = _default_operator_sloppy_precision_flag; monomial_list[no_monomials].HB_solver_params = monomial_list[no_monomials].solver_params; monomial_list[no_monomials].even_odd_flag = _default_even_odd_flag; @@ -143,6 +144,11 @@ int add_monomial(const int type) { monomial_list[no_monomials].PrecisionHfinal = _default_g_acc_Hfin; monomial_list[no_monomials].PrecisionPtilde = _default_g_acc_Ptilde; + monomial_list[no_monomials].eig_polydeg = _default_eig_polydeg; + monomial_list[no_monomials].eig_amin = _default_eig_amin; + monomial_list[no_monomials].eig_amax = _default_eig_amax; + monomial_list[no_monomials].eig_n_kr = _default_eig_n_kr; + monomial_list[no_monomials].rat.order = 12; monomial_list[no_monomials].rat.range[0] = _default_stilde_min; monomial_list[no_monomials].rat.range[1] = _default_stilde_max; diff --git a/monomial/monomial.h b/monomial/monomial.h index 26aa1af48..bbdb30dd2 100644 --- a/monomial/monomial.h +++ b/monomial/monomial.h @@ -114,7 +114,10 @@ typedef struct { double PrecisionHfinal; double StildeMin, StildeMax; double EVMin, EVMax, EVMaxInv; + int eig_polydeg, eig_n_kr; + double eig_amin, eig_amax; ExternalLibrary external_library; + ExternalEigSolver external_eigsolver; double * MDPolyCoefs, * PtildeCoefs; /* rational approximation */ rational_t rat; diff --git a/phmc.c b/phmc.c index 009457c4c..a0f072e80 100644 --- a/phmc.c +++ b/phmc.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "global.h" @@ -40,6 +41,10 @@ #include "solver/matrix_mult_typedef_bi.h" #include "gettime.h" +#ifdef TM_USE_QUDA +# include "quda_interface.h" +#endif + // --> in monomial double phmc_Cpol; // --> MDPolyLocNormConst double phmc_cheb_evmin, phmc_cheb_evmax; // --> EVMin, EVMax @@ -206,7 +211,9 @@ void init_phmc() { void phmc_compute_ev(const int trajectory_counter, const int id, matrix_mult_bi Qsq) { - double atime, etime, temp=0., temp2=0.; + double atime, etime; + _Complex double eval_min = 0.0; + _Complex double eval_max = 0.0; int max_iter_ev, no_eigenvalues; char buf[100]; char * phmcfilename = buf; @@ -223,28 +230,75 @@ void phmc_compute_ev(const int trajectory_counter, } no_eigenvalues = 1; - - temp = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq); + if(mnl->external_eigsolver == QUDA_EIGSOLVER) { + #ifdef TM_USE_QUDA + eigsolveQuda(&eval_min, no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 0, + mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, + mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, + 1, // we only support even-odd here + mnl->solver_params.refinement_precision, + mnl->solver_params.sloppy_precision, + mnl->solver_params.compression_type, 0); + if( fabs(mnl->EVMax - 1) < 2*DBL_EPSILON ) { + eval_min /= mnl->StildeMax; + } + #else + if(g_proc_id == 0) { + fprintf(stderr, "Error: Attempted to use QUDA eigensolver but this build was not configured for QUDA usage.\n"); + #ifdef TM_USE_MPI + MPI_Finalize(); + #endif + exit(-2); + } + #endif + }else { + eval_min = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq); + } + no_eigenvalues = 1; - temp2 = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq); + if(mnl->external_eigsolver == QUDA_EIGSOLVER) { + #ifdef TM_USE_QUDA + eigsolveQuda(&eval_max, no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 1, + mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, + mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, + 1, // we only support even-odd here + mnl->solver_params.refinement_precision, + mnl->solver_params.sloppy_precision, + mnl->solver_params.compression_type, 0); + if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { + eval_max /= mnl->StildeMax; + } + #else + if(g_proc_id == 0) { + fprintf(stderr, "Error: Attempted to use QUDA eigensolver but this build was not configured for QUDA usage.\n"); + #ifdef TM_USE_MPI + MPI_Finalize(); + #endif + exit(-2); + } + #endif + }else { + eval_max = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq); + } + if((g_proc_id == 0) && (g_debug_level > 1)) { printf("# %s: lowest eigenvalue end of trajectory %d = %e\n", - mnl->name, trajectory_counter, temp); + mnl->name, trajectory_counter, creal(eval_min)); printf("# %s: maximal eigenvalue end of trajectory %d = %e\n", - mnl->name, trajectory_counter, temp2); + mnl->name, trajectory_counter, creal(eval_max)); } if(g_proc_id == 0) { - if(temp2 > mnl->EVMax) { - fprintf(stderr, "\nWarning: largest eigenvalue for monomial %s larger than upper bound!\n\n", mnl->name); + if(creal(eval_max) > mnl->EVMax) { + fprintf(stderr, "\nWarning: largest eigenvalue for monomial %s: %.6f is larger than upper bound: %.6f\n\n", mnl->name, creal(eval_max), mnl->EVMax); } - if(temp < mnl->EVMin) { - fprintf(stderr, "\nWarning: smallest eigenvalue for monomial %s smaller than lower bound!\n\n", mnl->name); + if(creal(eval_min) < mnl->EVMin) { + fprintf(stderr, "\nWarning: smallest eigenvalue for monomial %s: %.6f is smaller than lower bound: %.6f\n\n", mnl->name, creal(eval_min), mnl->EVMin); } countfile = fopen(phmcfilename, "a"); fprintf(countfile, "%.8d %1.5e %1.5e %1.5e %1.5e\n", - trajectory_counter, temp, temp2, mnl->EVMin, mnl->EVMax); + trajectory_counter, creal(eval_min), creal(eval_max), mnl->EVMin, mnl->EVMax); fclose(countfile); } etime = gettime(); diff --git a/quda_interface.c b/quda_interface.c index ebfdba7ec..6902cdf90 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -5,6 +5,8 @@ * 2018 Bartosz Kostrzewa, Ferenc Pittler * 2019, 2020 Bartosz Kostrzewa * 2021 Bartosz Kostrzewa, Marco Garofalo, Ferenc Pittler, Simone Bacchio + * 2022 Simone Romiti, Bartosz Kostrzewa + * 2023 Aniket Sen, Bartosz Kostrzewa * * This file is part of tmLQCD. * @@ -152,6 +154,9 @@ tm_QudaParams_t quda_input; // parameters to control the automatic tuning of the QUDA MG tm_QudaMGTuningPlan_t quda_mg_tuning_plan; +// parameters for the eigensolver +QudaEigParam eig_param; + // pointer to the QUDA gaugefield double *gauge_quda[4]; @@ -3085,3 +3090,145 @@ void compute_WFlow_quda(const double eps, const double tmax, const int traj, FI tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); } + +/******************************************************** + +Interface function for Eigensolver on Quda + +*********************************************************/ + + +void eigsolveQuda(_Complex double * evals, int n_evals, double tol, int blksize, int blkwise, int max_iterations, int maxmin, + const double precision, const int max_iter, const int polydeg, const double amin, + const double amax, const int n_kr, const int solver_flag, const int rel_prec, + const int even_odd_flag, const SloppyPrecision refinement_precision, + SloppyPrecision sloppy_precision, CompressionType compression, const int oneFlavourFlag) { + + tm_stopwatch_push(&g_timers, __func__, ""); + + + // it returns if quda is already init + _initQuda(); + + if ( rel_prec ) + inv_param.residual_type = QUDA_L2_RELATIVE_RESIDUAL; + else + inv_param.residual_type = QUDA_L2_ABSOLUTE_RESIDUAL; + + inv_param.kappa = g_kappa; + + // figure out which BC tu use (theta, trivial...) + set_boundary_conditions(&compression, &gauge_param); + + set_sloppy_prec(sloppy_precision, refinement_precision, &gauge_param, &inv_param); + + // load gauge after setting precision + _loadGaugeQuda(compression); + + if ( oneFlavourFlag ) { + _setOneFlavourSolverParam(g_kappa, g_c_sw, g_mu, solver_flag, even_odd_flag, precision, max_iter, + 1 /*single_parity_solve */, + 1 /*always QpQm*/); + }else { + _setTwoFlavourSolverParam(g_kappa, g_c_sw, g_mubar, g_epsbar, solver_flag, even_odd_flag, precision, max_iter, + 1 /*single_parity_solve */, + 1 /*always QpQm*/); + } + + // create new eig_param + eig_param = newQudaEigParam(); + + // need our own QudaInvertParam for passing the operator properties + // as we modify the precision below + QudaInvertParam eig_invert_param = newQudaInvertParam(); + eig_invert_param = inv_param; + eig_param.invert_param = &eig_invert_param; + eig_param.invert_param->verbosity = QUDA_VERBOSE; + /* AS The following two are set to cuda_prec, otherwise + * it gives an error. Such high precision might not be + * necessary. But have not found a way to consistently set + * the different precisions. */ + eig_param.invert_param->cuda_prec_eigensolver = inv_param.cuda_prec; + eig_param.invert_param->clover_cuda_prec_eigensolver = inv_param.clover_cuda_prec; + + // for consistency with tmLQCD's own eigensolver we require a precision of at least + // 1e-14 + if(tol < 1.e-14) { + eig_param.tol = 1.e-14; + eig_param.qr_tol = 1.e-14; + }else { + eig_param.tol = tol; + eig_param.qr_tol = tol; + } + + if(blkwise == 1) { + eig_param.eig_type = QUDA_EIG_BLK_TR_LANCZOS; + eig_param.block_size = blksize; + }else { + eig_param.eig_type = QUDA_EIG_TR_LANCZOS; + eig_param.block_size = 1; + } + + if(eig_param.invert_param->solve_type == QUDA_NORMOP_PC_SOLVE) { + eig_param.use_pc = QUDA_BOOLEAN_TRUE; + eig_param.use_norm_op = QUDA_BOOLEAN_TRUE; + }else if(eig_param.invert_param->solve_type == QUDA_DIRECT_PC_SOLVE) { + eig_param.use_pc = QUDA_BOOLEAN_TRUE; + eig_param.use_norm_op = QUDA_BOOLEAN_FALSE; + }else if(eig_param.invert_param->solve_type == QUDA_NORMOP_SOLVE) { + eig_param.use_pc = QUDA_BOOLEAN_FALSE; + eig_param.use_norm_op = QUDA_BOOLEAN_TRUE; + }else { + eig_param.use_pc = QUDA_BOOLEAN_FALSE; + eig_param.use_norm_op = QUDA_BOOLEAN_FALSE; + } + + eig_param.use_poly_acc = (maxmin == 1) || (polydeg == 0) ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; + eig_param.poly_deg = polydeg; + eig_param.a_min = amin; + eig_param.a_max = amax; + + /* Daggers the operator. Not necessary for + * most cases. */ + eig_param.use_dagger = QUDA_BOOLEAN_FALSE; + + /* Most likely not necessary. Set TRUE to use + * Eigen routines to eigensolve the upper Hessenberg via QR */ + eig_param.use_eigen_qr = QUDA_BOOLEAN_FALSE; + + eig_param.compute_svd = QUDA_BOOLEAN_FALSE; + + /* Set TRUE to performs the \gamma_5 OP solve by + * post multipling the eignvectors with \gamma_5 + * before computing the eigenvalues */ + eig_param.compute_gamma5 = QUDA_BOOLEAN_FALSE; + + + if(maxmin == 1) eig_param.spectrum = QUDA_SPECTRUM_LR_EIG; + else eig_param.spectrum = QUDA_SPECTRUM_SR_EIG; + + + /* At the moment, the eigenvalues and eigenvectors are neither + * written to or read from disk, but if necessary, can be added + * as a feature in future, by setting the following filenames */ + strncpy(eig_param.vec_outfile,"",256); + strncpy(eig_param.vec_infile,"",256); + + + /* The size of eigenvector search space and + * the number of required converged eigenvectors + * is both set to n_evals */ + eig_param.n_conv = n_evals; + eig_param.n_ev = n_evals; + /* The size of the Krylov space is set to 96. + * From my understanding, QUDA automatically scales + * this search space, however more testing on this + * might be necessary */ + eig_param.n_kr = n_kr; + + eig_param.max_restarts = max_iterations; + + eigensolveQuda(NULL, evals, &eig_param); + + tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); +} diff --git a/quda_interface.h b/quda_interface.h index d555544d0..05fc5444a 100644 --- a/quda_interface.h +++ b/quda_interface.h @@ -174,4 +174,11 @@ int invert_eo_quda_twoflavour_mshift(spinor ** const out_up, spinor ** const out void compute_gauge_derivative_quda(monomial * const mnl, hamiltonian_field_t * const hf); void compute_WFlow_quda(const double eps ,const double tmax, const int traj, FILE* outfile); + +void eigsolveQuda(_Complex double * evals, int n_evals, double tol, int blksize, int blkwise, int max_iterations, int maxmin, + const double precision, const int max_iter, const int polydeg, const double amin, + const double amax, const int n_kr, const int solver_flag, const int rel_prec, + const int even_odd_flag, const SloppyPrecision refinement_precision, + SloppyPrecision sloppy_precision, CompressionType compression, const int oneFlavourFlag); + #endif /* QUDA_INTERFACE_H_ */ diff --git a/read_input.l b/read_input.l index 0fc555013..43609105c 100644 --- a/read_input.l +++ b/read_input.l @@ -2693,6 +2693,34 @@ static inline double fltlist_next_token(int * const list_end){ mnl->rec_ev = a; if(myverbose!=0) printf(" Frequency for computing EV's set to %d in line %d monomial %d\n", mnl->rec_ev, line_of_file, current_monomial); } + {SPC}*UseExternalEigSolver{EQL}quda { + if(myverbose) printf(" Use Quda eigensolver line %d monomial %d\n", line_of_file, current_monomial); + mnl->external_eigsolver = QUDA_EIGSOLVER; + } + {SPC}*UseExternalEigSolver{EQL}no { + if(myverbose) printf(" Do not use external eigensolver line %d monomial %d\n", line_of_file, current_monomial); + mnl->external_eigsolver = NO_EXT_EIGSOLVER; + } + {SPC}*EigSolverPolyMin{EQL}{FLT} { + sscanf(yytext, " %[a-zA-Z] = %lf", name, &c); + mnl->eig_amin = c; + if(myverbose!=0) printf(" Minimum eigenvalue to exclude using polynomial acceleration in eigensolver set to %e line %d monomial %d\n", c, line_of_file, current_monomial); + } + {SPC}*EigSolverPolyMax{EQL}{FLT} { + sscanf(yytext, " %[a-zA-Z] = %lf", name, &c); + mnl->eig_amax = c; + if(myverbose!=0) printf(" Maximum eigenvalues to exclude using polynomial acceleration in eigensolver set to %e line %d monomial %d\n", c, line_of_file, current_monomial); + } + {SPC}*EigSolverPolynomialDegree{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + mnl->eig_polydeg = a; + if(myverbose!=0) printf(" Degree of polynomial acceleration in eigensolver set to %d line %d monomial %d\n", a, line_of_file, current_monomial); + } + {SPC}*EigSolverKrylovSubspaceSize{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + mnl->eig_n_kr = a; + if(myverbose!=0) printf(" Krylov subspace size for eigensolver set to %d line %d monomial %d\n", a, line_of_file, current_monomial); + } } { {SPC}*MaxPtildeDegree{EQL}{DIGIT}+ {