etmc · kostrzewa · Oct 4, 2023 · Mar 13, 2023 · Mar 24, 2023 · Mar 24, 2023
diff --git a/default_input_values.h b/default_input_values.h
@@ -147,6 +147,10 @@
 #define _default_phmc_pure_phmc 0
 #define _default_stilde_max 3.
 #define _default_stilde_min 0.01
+#define _default_eig_polydeg 128
+#define _default_eig_amin 0.001
+#define _default_eig_amax 4
+#define _default_eig_n_kr 96
 #define _default_degree_of_p 48
 #define _default_propagator_splitted 1
 #define _default_source_splitted 1
@@ -199,6 +203,8 @@
 
 #define _default_external_inverter 0
 
+#define _default_external_eigsolver 0
+
 #define _default_external_library 0
 
 #define _default_subprocess_flag 0

diff --git a/doc/quda.tex b/doc/quda.tex
@@ -444,3 +444,18 @@ \subsubsection{QUDA-MG interface}
 In other words, if the largest of these smallest eigenvalues is $4\cdot10^{-3}$, for example, then \texttt{MGEigSolverPolyMin} can be set to 0.01.
 This ensures that the desired (smallest) part of the spectrum is smaller than \texttt{MGEigSolverPolyMin} and that the entire spectrum is contained in the range up to \texttt{MGEigSolverPolyMax}.
 After this, polynomial acceleration can be enabled, which should reduce setup time significantly.
+
+\subsubsection{Using the QUDA eigensolver in the HMC}
+
+When employing the rational approximation, in order to make sure that the eigenvalue bounds are chosen appropriately, it is necessary to measure the maximal and minimal eigenvalues of the operator involved in the given monomial.
+For the monomials \texttt{NDRAT, NDRATCOR, NDCLOVERRAT} and \texttt{NDCLOVERRATCOR}, this can be done using QUDA's eigensolver when, in addition to a non-zero setting for \texttt{ComputeEVFreq}, \texttt{UseExternalEigSolver = quda} is set.
+
+The eigensolver further offers the following parameters:
+\begin{itemize}
+  \item{ \texttt{EigSolverPolynomialDegree}: Once appropriate parameters for the polynomial filter have been determined (see \texttt{EigSolverPolyMin} and \texttt{EigSolverPolyMax} below), when \texttt{EigSolverPolynomialDegree} is set to a non-zero value, polynomial acceleration will be used in the measurent of the smallest eigenvalue. (integer, default: \texttt{128}) }
+  \item{ \texttt{EigSolverPolyMin}: Smallest eigenvalue to be excluded by the polynomial filter when polynomial acceleration is used. A good value for this should be determined by first running the eigensolver without acceleration (\texttt{EigSolverPolynomialDegree = 0}). \texttt{EigSolverPolyMin} should then be set to about $3\lambda_\mathrm{min}$. Note that this is specified in the operator normalisation, such that $\lambda_\mathrm{min}$ obtained from the measurement should be multiplied by \texttt{StildeMax} to get an appropriate value for \texttt{EigSolverPolyMin}.  (positive real number, default: \texttt{0.001})}
+  \item{ \texttt{EigSolverPolyMax}: Largest eigenvalue to be excluded by the polynomial filter when polynomial acceleration is used. This should be set to a value in excess of the measured largest eigenvalue, $1.5\lambda_\mathrm{max}$, say. Note that this is specified in the operator normalisation such that the measured $\lambda_\mathrm{max}$ should be multiplied by \texttt{StildeMax} to obtain an appropriate value for \texttt{EigSolverPolyMax}. (positive real number, defaullt: \texttt{4.0})}
+  \item{ \texttt{EigSolverKrylovSubspaceSize}: Size of the Krylov space used for the determination of the smallest and largest eigenvalues. The default seems to work well even for large lattices. (integer, default: \texttt{96})}
+\end{itemize}
+
+
diff --git a/misc_types.h b/misc_types.h
@@ -89,6 +89,12 @@ typedef enum ExternalInverter_s {
   QPHIX_INVERTER
 } ExternalInverter;
 
+/* enumeration type for the external eigensolver */
+typedef enum ExternalEigSolver_s {
+  NO_EXT_EIGSOLVER = 0,
+  QUDA_EIGSOLVER
+} ExternalEigSolver;
+
 /* enumeration type for the external inverter */
 typedef enum ExternalLibrary_s {
   NO_EXT_LIB = 0,

diff --git a/monomial/monomial.c b/monomial/monomial.c
@@ -116,6 +116,7 @@ int add_monomial(const int type) {
   monomial_list[no_monomials].solver_params.external_inverter = _default_external_inverter;
   monomial_list[no_monomials].solver_params.sloppy_precision = _default_operator_sloppy_precision_flag;
   monomial_list[no_monomials].external_library = _default_external_library;
+  monomial_list[no_monomials].external_eigsolver = _default_external_eigsolver;
   monomial_list[no_monomials].solver_params.refinement_precision = _default_operator_sloppy_precision_flag;
   monomial_list[no_monomials].HB_solver_params = monomial_list[no_monomials].solver_params;
   monomial_list[no_monomials].even_odd_flag = _default_even_odd_flag;
@@ -143,6 +144,11 @@ int add_monomial(const int type) {
   monomial_list[no_monomials].PrecisionHfinal = _default_g_acc_Hfin;
   monomial_list[no_monomials].PrecisionPtilde = _default_g_acc_Ptilde;
 
+  monomial_list[no_monomials].eig_polydeg = _default_eig_polydeg;
+  monomial_list[no_monomials].eig_amin = _default_eig_amin;
+  monomial_list[no_monomials].eig_amax = _default_eig_amax;
+  monomial_list[no_monomials].eig_n_kr = _default_eig_n_kr;
+
   monomial_list[no_monomials].rat.order = 12;
   monomial_list[no_monomials].rat.range[0] = _default_stilde_min;
   monomial_list[no_monomials].rat.range[1] = _default_stilde_max;

diff --git a/monomial/monomial.h b/monomial/monomial.h
@@ -114,7 +114,10 @@ typedef struct {
   double PrecisionHfinal;
   double StildeMin, StildeMax;
   double EVMin, EVMax, EVMaxInv;
+  int eig_polydeg, eig_n_kr;
+  double eig_amin, eig_amax;
   ExternalLibrary external_library;
+  ExternalEigSolver external_eigsolver;
   double * MDPolyCoefs, * PtildeCoefs;
   /* rational approximation */
   rational_t rat;

diff --git a/phmc.c b/phmc.c
@@ -25,6 +25,7 @@
 #include <math.h>
 #include <string.h>
 #include <time.h>
+#include <float.h>
 
 #include "global.h"
 
@@ -40,6 +41,10 @@
 #include "solver/matrix_mult_typedef_bi.h"
 #include "gettime.h"
 
+#ifdef TM_USE_QUDA
+#  include "quda_interface.h"
+#endif
+
 //                                          --> in  monomial
 double phmc_Cpol;                        // --> MDPolyLocNormConst
 double phmc_cheb_evmin, phmc_cheb_evmax; // --> EVMin, EVMax
@@ -206,7 +211,9 @@ void init_phmc() {
 void phmc_compute_ev(const int trajectory_counter,
 		     const int id,
 		     matrix_mult_bi Qsq) {
-  double atime, etime, temp=0., temp2=0.;
+  double atime, etime;
+  _Complex double eval_min = 0.0;
+  _Complex double eval_max = 0.0;
   int max_iter_ev, no_eigenvalues;
   char buf[100];
   char * phmcfilename = buf;
@@ -223,28 +230,75 @@ void phmc_compute_ev(const int trajectory_counter,
   }
 
   no_eigenvalues = 1;
-
-  temp = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq);
+  if(mnl->external_eigsolver == QUDA_EIGSOLVER) {
+  #ifdef TM_USE_QUDA
+    eigsolveQuda(&eval_min, no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 0,
+                 mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, 
+                 mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag,
+                 1, // we only support even-odd here
+                 mnl->solver_params.refinement_precision,
+                 mnl->solver_params.sloppy_precision,
+                 mnl->solver_params.compression_type, 0);
+    if( fabs(mnl->EVMax - 1) < 2*DBL_EPSILON ) {
+      eval_min /= mnl->StildeMax;
+    }
+  #else
+    if(g_proc_id == 0) {
+      fprintf(stderr, "Error: Attempted to use QUDA eigensolver but this build was not configured for QUDA usage.\n");
+    #ifdef TM_USE_MPI
+      MPI_Finalize();
+    #endif
+      exit(-2);
+    }
+  #endif
+  }else {
+    eval_min = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq);
+  }
+
 
   no_eigenvalues = 1;
-  temp2 = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq);
+  if(mnl->external_eigsolver == QUDA_EIGSOLVER) {
+  #ifdef TM_USE_QUDA
+    eigsolveQuda(&eval_max, no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 1,
+                 mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, 
+                 mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag,
+                 1, // we only support even-odd here
+                 mnl->solver_params.refinement_precision,
+                 mnl->solver_params.sloppy_precision,
+                 mnl->solver_params.compression_type, 0);
+    if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) {
+      eval_max /= mnl->StildeMax;
+    }
+  #else
+    if(g_proc_id == 0) {
+      fprintf(stderr, "Error: Attempted to use QUDA eigensolver but this build was not configured for QUDA usage.\n");
+    #ifdef TM_USE_MPI
+      MPI_Finalize();
+    #endif
+      exit(-2);
+    }
+  #endif
+  }else {
+    eval_max = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq);
+  }
+
 
   if((g_proc_id == 0) && (g_debug_level > 1)) {
     printf("# %s: lowest eigenvalue end of trajectory %d = %e\n", 
-	   mnl->name, trajectory_counter, temp);
+	   mnl->name, trajectory_counter, creal(eval_min));
     printf("# %s: maximal eigenvalue end of trajectory %d = %e\n", 
-	   mnl->name, trajectory_counter, temp2);
+	   mnl->name, trajectory_counter, creal(eval_max));
   }
   if(g_proc_id == 0) {
-    if(temp2 > mnl->EVMax) {
-      fprintf(stderr, "\nWarning: largest eigenvalue for monomial %s larger than upper bound!\n\n", mnl->name);
+    if(creal(eval_max) > mnl->EVMax) {
+      fprintf(stderr, "\nWarning: largest eigenvalue for monomial %s: %.6f is larger than upper bound: %.6f\n\n", mnl->name, creal(eval_max), mnl->EVMax);
     }
-    if(temp < mnl->EVMin) {
-      fprintf(stderr, "\nWarning: smallest eigenvalue for monomial %s smaller than lower bound!\n\n", mnl->name);
+    if(creal(eval_min) < mnl->EVMin) {
+      fprintf(stderr, "\nWarning: smallest eigenvalue for monomial %s: %.6f is smaller than lower bound: %.6f\n\n", mnl->name, creal(eval_min), mnl->EVMin);
     }
     countfile = fopen(phmcfilename, "a");
     fprintf(countfile, "%.8d %1.5e %1.5e %1.5e %1.5e\n", 
-	    trajectory_counter, temp, temp2, mnl->EVMin, mnl->EVMax);
+	    trajectory_counter, creal(eval_min), creal(eval_max), mnl->EVMin, mnl->EVMax);
     fclose(countfile);
   }
   etime = gettime();

diff --git a/quda_interface.c b/quda_interface.c
@@ -5,6 +5,8 @@
  *               2018       Bartosz Kostrzewa, Ferenc Pittler
  *               2019, 2020 Bartosz Kostrzewa
  *               2021       Bartosz Kostrzewa, Marco Garofalo, Ferenc Pittler, Simone Bacchio
+ *               2022       Simone Romiti, Bartosz Kostrzewa
+ *               2023       Aniket Sen, Bartosz Kostrzewa
  *
  * This file is part of tmLQCD.
  *
@@ -150,6 +152,10 @@ QudaEigParam mg_eig_param[QUDA_MAX_MG_LEVEL];
 // input params specific to tmLQCD QUDA interface
 tm_QudaParams_t quda_input;
 
+
+// parameters for the eigensolver
+QudaEigParam eig_param;
+
 // pointer to the QUDA gaugefield
 double *gauge_quda[4];
 
@@ -2594,3 +2600,148 @@ void  compute_WFlow_quda(const double eps, const double tmax, const int traj, FI
   free(obs_param);
   tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA");
 }
+
+
+
+
+/********************************************************
+
+Interface function for Eigensolver on Quda
+
+*********************************************************/
+
+
+void eigsolveQuda(_Complex double * evals, int n_evals, double tol, int blksize, int blkwise, int max_iterations, int maxmin,
+                  const double precision, const int max_iter, const int polydeg, const double amin, 
+                  const double amax, const int n_kr, const int solver_flag, const int rel_prec,
+                  const int even_odd_flag, const SloppyPrecision refinement_precision,
+                  SloppyPrecision sloppy_precision, CompressionType compression, const int oneFlavourFlag) {
+
+  tm_stopwatch_push(&g_timers, __func__, "");
+
+
+  // it returns if quda is already init
+  _initQuda();
+
+  if ( rel_prec )
+    inv_param.residual_type = QUDA_L2_RELATIVE_RESIDUAL;
+  else
+    inv_param.residual_type = QUDA_L2_ABSOLUTE_RESIDUAL;
+
+  inv_param.kappa = g_kappa;
+
+  // figure out which BC tu use (theta, trivial...)
+  set_boundary_conditions(&compression, &gauge_param);
+
+  set_sloppy_prec(sloppy_precision, refinement_precision, &gauge_param, &inv_param);
+
+  // load gauge after setting precision
+  _loadGaugeQuda(compression);
+
+  if ( oneFlavourFlag ) {
+    _setOneFlavourSolverParam(g_kappa, g_c_sw, g_mu, solver_flag, even_odd_flag, precision, max_iter,
+                              1 /*single_parity_solve */,
+                              1 /*always QpQm*/);
+  }else {
+    _setTwoFlavourSolverParam(g_kappa, g_c_sw, g_mubar, g_epsbar, solver_flag, even_odd_flag, precision, max_iter,
+                              1 /*single_parity_solve */,
+                              1 /*always QpQm*/);
+  }
+
+  // create new eig_param
+  eig_param = newQudaEigParam();
+
+  // need our own QudaInvertParam for passing the operator properties
+  // as we modify the precision below 
+  QudaInvertParam eig_invert_param = newQudaInvertParam();
+  eig_invert_param = inv_param;
+  eig_param.invert_param = &eig_invert_param;
+  eig_param.invert_param->verbosity = QUDA_VERBOSE;
+  /* AS The following two are set to cuda_prec, otherwise 
+   * it gives an error. Such high precision might not be
+   * necessary. But have not found a way to consistently set
+   * the different precisions. */
+  eig_param.invert_param->cuda_prec_eigensolver = inv_param.cuda_prec;
+  eig_param.invert_param->clover_cuda_prec_eigensolver = inv_param.clover_cuda_prec;
+
+  // for consistency with tmLQCD's own eigensolver we require a precision of at least
+  // 1e-14
+  if(tol < 1.e-14) {
+    eig_param.tol = 1.e-14;
+    eig_param.qr_tol = 1.e-14;
+  }else {
+    eig_param.tol = tol;
+    eig_param.qr_tol = tol;
+  }
+
+  if(blkwise == 1) {
+    eig_param.eig_type = QUDA_EIG_BLK_TR_LANCZOS;
+    eig_param.block_size = blksize;
+  }else {
+    eig_param.eig_type = QUDA_EIG_TR_LANCZOS;
+    eig_param.block_size = 1;
+  }
+
+  if(eig_param.invert_param->solve_type == QUDA_NORMOP_PC_SOLVE) {
+    eig_param.use_pc = QUDA_BOOLEAN_TRUE;
+    eig_param.use_norm_op = QUDA_BOOLEAN_TRUE;
+  }else if(eig_param.invert_param->solve_type == QUDA_DIRECT_PC_SOLVE) {
+    eig_param.use_pc = QUDA_BOOLEAN_TRUE;
+    eig_param.use_norm_op = QUDA_BOOLEAN_FALSE;
+  }else if(eig_param.invert_param->solve_type == QUDA_NORMOP_SOLVE) {
+    eig_param.use_pc = QUDA_BOOLEAN_FALSE;
+    eig_param.use_norm_op = QUDA_BOOLEAN_TRUE;
+  }else {
+    eig_param.use_pc = QUDA_BOOLEAN_FALSE;
+    eig_param.use_norm_op = QUDA_BOOLEAN_FALSE;
+  }
+
+  eig_param.use_poly_acc = (maxmin == 1) || (polydeg == 0) ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE;
+  eig_param.poly_deg = polydeg;
+  eig_param.a_min = amin;
+  eig_param.a_max = amax;
+
+  /* Daggers the operator. Not necessary for 
+   * most cases. */
+  eig_param.use_dagger = QUDA_BOOLEAN_FALSE;
+
+  /* Most likely not necessary. Set TRUE to use 
+   * Eigen routines to eigensolve the upper Hessenberg via QR */
+  eig_param.use_eigen_qr = QUDA_BOOLEAN_FALSE;    
+
+  eig_param.compute_svd = QUDA_BOOLEAN_FALSE;
+
+  /* Set TRUE to performs the \gamma_5 OP solve by 
+   * post multipling the eignvectors with \gamma_5 
+   * before computing the eigenvalues */
+  eig_param.compute_gamma5 = QUDA_BOOLEAN_FALSE;
+
+
+  if(maxmin == 1) eig_param.spectrum = QUDA_SPECTRUM_LR_EIG;
+  else eig_param.spectrum = QUDA_SPECTRUM_SR_EIG;
+
+
+  /* At the moment, the eigenvalues and eigenvectors are neither 
+   * written to or read from disk, but if necessary, can be added
+   * as a feature in future, by setting the following filenames */
+  strncpy(eig_param.vec_outfile,"",256);
+  strncpy(eig_param.vec_infile,"",256);
+
+
+  /* The size of eigenvector search space and
+   * the number of required converged eigenvectors 
+   * is both set to n_evals */
+  eig_param.n_conv = n_evals;
+  eig_param.n_ev = n_evals;
+  /* The size of the Krylov space is set to 96.
+   * From my understanding, QUDA automatically scales
+   * this search space, however more testing on this 
+   * might be necessary */
+  eig_param.n_kr = n_kr;
+
+  eig_param.max_restarts = max_iterations;
+
+  eigensolveQuda(NULL, evals, &eig_param);
+
+  tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA");
+}
diff --git a/quda_interface.h b/quda_interface.h
@@ -174,4 +174,11 @@ int invert_eo_quda_twoflavour_mshift(spinor ** const out_up, spinor ** const out
 void compute_gauge_derivative_quda(monomial * const mnl, hamiltonian_field_t * const hf);
 void compute_WFlow_quda(const double eps ,const double tmax, const int traj, FILE* outfile);
 
+
+void eigsolveQuda(_Complex double * evals, int n_evals, double tol, int blksize, int blkwise, int max_iterations, int maxmin,
+                  const double precision, const int max_iter, const int polydeg, const double amin, 
+                  const double amax, const int n_kr, const int solver_flag, const int rel_prec,
+                  const int even_odd_flag, const SloppyPrecision refinement_precision,
+                  SloppyPrecision sloppy_precision, CompressionType compression, const int oneFlavourFlag);
+
 #endif /* QUDA_INTERFACE_H_ */