From 489ee3d3a4f55d1d6ab57ccdfaa13bb318a1fae8 Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Mon, 13 Mar 2023 15:58:32 +0100 Subject: [PATCH 01/27] initial commit, interface works with quda --- monomial/ndrat_monomial.c | 17 ++++-- quda_interface.c | 106 ++++++++++++++++++++++++++++++++++++++ quda_interface.h | 16 ++++++ solver/eigenvalues_bi.c | 37 ++++++++++++- 4 files changed, 169 insertions(+), 7 deletions(-) diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c index 1dd669b33..fd4018027 100644 --- a/monomial/ndrat_monomial.c +++ b/monomial/ndrat_monomial.c @@ -213,11 +213,7 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) { sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar); copy_32_sw_fields(); } - // we measure before the trajectory! - if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) { - if(mnl->type != NDCLOVERRAT) phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi); - else phmc_compute_ev(hf->traj_counter-1, id, &Qsw_pm_ndbipsi); - } + // the Gaussian distributed random fields tm_stopwatch_push(&g_timers, "random_energy0", ""); @@ -243,6 +239,17 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) { } mnl->solver_params.sdim = VOLUME/2; mnl->solver_params.rel_prec = g_relative_precision_flag; + initQudaforEig(mnl->solver_params.squared_solver_prec, mnl->solver_params.max_iter, + mnl->solver_params.type, mnl->solver_params.rel_prec, + 1, // we only support even-odd here + mnl->solver_params.refinement_precision, + mnl->solver_params.sloppy_precision, + mnl->solver_params.compression_type); + // we measure before the trajectory! + if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) { + if(mnl->type != NDCLOVERRAT) phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi); + else phmc_compute_ev(hf->traj_counter-1, id, &Qsw_pm_ndbipsi); + } mnl->iter0 = solve_mms_nd_plus(g_chi_up_spinor_field, g_chi_dn_spinor_field, mnl->pf, mnl->pf2, &(mnl->solver_params) ); diff --git a/quda_interface.c b/quda_interface.c index 36c7974ed..d454325cd 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -148,6 +148,10 @@ QudaEigParam mg_eig_param[QUDA_MAX_MG_LEVEL]; // input params specific to tmLQCD QUDA interface tm_QudaParams_t quda_input; + +// parameters for the eigensolver +QudaEigParam eig_param; + // pointer to the QUDA gaugefield double *gauge_quda[4]; @@ -2323,6 +2327,47 @@ int invert_eo_quda_oneflavour_mshift(spinor ** const out, return(iterations); } +void initQudaforEig(const double precision, const int max_iter, + const int solver_flag, const int rel_prec, + const int even_odd_flag, const SloppyPrecision refinement_precision, + SloppyPrecision sloppy_precision, CompressionType compression) { + + + // it returns if quda is already init + _initQuda(); + + if ( rel_prec ) + inv_param.residual_type = QUDA_L2_RELATIVE_RESIDUAL; + else + inv_param.residual_type = QUDA_L2_ABSOLUTE_RESIDUAL; + + inv_param.kappa = g_kappa; + + // figure out which BC to use (theta, trivial...) + set_boundary_conditions(&compression, &gauge_param); + // set the sloppy precision of the mixed prec solver + set_sloppy_prec(sloppy_precision, refinement_precision, &gauge_param, &inv_param); + + // load gauge after setting precision + _loadGaugeQuda(compression); + + _setTwoFlavourSolverParam(g_kappa, + g_c_sw, + g_mubar, + g_epsbar, + solver_flag, + even_odd_flag, + precision, + max_iter, + 1 /*single_parity_solve */, + 1 /*always QpQm*/); + + // QUDA applies the MMdag operator, we need QpQm^{-1) in the end + // so we want QUDA to use the MdagM operator + inv_param.dagger = QUDA_DAG_YES; + +} + int invert_eo_quda_twoflavour_mshift(spinor ** const out_up, spinor ** const out_dn, spinor * const in_up, spinor * const in_dn, const double precision, const int max_iter, @@ -2573,3 +2618,64 @@ void compute_WFlow_quda(const double eps, const double tmax, const int traj, FI free(obs_param); tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); } + + + + +/******************************************************** + +Interface function for Eigensolver on Quda + +*********************************************************/ + + +void eigsolveQuda(int n, int lda, double tau, double tol, + int kmax, int jmax, int jmin, int itmax, + int blksize, int blkwise, + int V0dim, _Complex double *V0, + int solver_flag, + int linitmax, double eps_tr, double toldecay, + int verbosity, + int *k_conv, _Complex double ** host_evecs, _Complex double *host_evals, int *it, + int maxmin, const int shift_mode) { + + eig_param = newQudaEigParam(); + + eig_param.invert_param = &inv_param; + eig_param.tol = tol; + eig_param.qr_tol = tol; + //eig_param.invert_param->verbosity = QUDA_DEBUG_VERBOSE; + if(blkwise == 1) { + eig_param.eig_type = QUDA_EIG_BLK_IR_ARNOLDI; + eig_param.block_size = blksize; + }else { + eig_param.eig_type = QUDA_EIG_IR_ARNOLDI; + eig_param.block_size = 1; + } + eig_param.use_poly_acc = QUDA_BOOLEAN_FALSE; + eig_param.preserve_deflation = QUDA_BOOLEAN_FALSE; + eig_param.use_dagger = QUDA_BOOLEAN_TRUE; + eig_param.use_norm_op = QUDA_BOOLEAN_TRUE; + eig_param.use_pc = QUDA_BOOLEAN_FALSE; + eig_param.use_eigen_qr = QUDA_BOOLEAN_FALSE; + eig_param.compute_svd = QUDA_BOOLEAN_FALSE; + eig_param.compute_gamma5 = QUDA_BOOLEAN_FALSE; + if(maxmin == 1) eig_param.spectrum = QUDA_SPECTRUM_LM_EIG; + else eig_param.spectrum = QUDA_SPECTRUM_SM_EIG; + + //eig_param.save_prec = inv_param.cuda_prec_eigensolver; + eig_param.invert_param->cuda_prec_eigensolver = inv_param.cuda_prec; + eig_param.invert_param->clover_cuda_prec_eigensolver = inv_param.cuda_prec; + + strncpy(eig_param.vec_outfile,"",256); + + + eig_param.n_conv = 1; + eig_param.n_ev = 1; + eig_param.n_kr = 96; + + eig_param.max_restarts = linitmax; + + eigensolveQuda((void **)host_evecs, host_evals, &eig_param); + +} diff --git a/quda_interface.h b/quda_interface.h index d555544d0..d96f081b4 100644 --- a/quda_interface.h +++ b/quda_interface.h @@ -174,4 +174,20 @@ int invert_eo_quda_twoflavour_mshift(spinor ** const out_up, spinor ** const out void compute_gauge_derivative_quda(monomial * const mnl, hamiltonian_field_t * const hf); void compute_WFlow_quda(const double eps ,const double tmax, const int traj, FILE* outfile); + +void eigsolveQuda(int n, int lda, double tau, double tol, + int kmax, int jmax, int jmin, int itmax, + int blksize, int blkwise, + int V0dim, _Complex double *V0, + int solver_flag, + int linitmax, double eps_tr, double toldecay, + int verbosity, + int *k_conv, _Complex double ** host_evecs, _Complex double *host_evals, int *it, + int maxmin, const int shift_mode); + +void initQudaforEig(const double precision, const int max_iter, + const int solver_flag, const int rel_prec, + const int even_odd_flag, const SloppyPrecision refinement_precision, + SloppyPrecision sloppy_precision, CompressionType compression); + #endif /* QUDA_INTERFACE_H_ */ diff --git a/solver/eigenvalues_bi.c b/solver/eigenvalues_bi.c index 63d78e483..e610f48d2 100644 --- a/solver/eigenvalues_bi.c +++ b/solver/eigenvalues_bi.c @@ -54,6 +54,10 @@ #include "eigenvalues_bi.h" #include "operator/tm_operators_nd.h" +#ifdef TM_USE_QUDA +# include "quda_interface.h" +#endif + double eigenvalues_bi(int * nr_of_eigenvalues, const int max_iterations, const double precision, @@ -64,6 +68,8 @@ double eigenvalues_bi(int * nr_of_eigenvalues, static int allocated = 0; static bispinor *eigenvectors_bi = NULL; static double * eigenvls_bi = NULL; + static _Complex double * eigenvls_quda = NULL; + static _Complex double ** eigenvectors_quda = NULL; /********************** * For Jacobi-Davidson @@ -126,6 +132,10 @@ double eigenvalues_bi(int * nr_of_eigenvalues, eigenvls_bi = (double*)malloc((*nr_of_eigenvalues)*sizeof(double)); } + eigenvls_quda = (_Complex double *)malloc((*nr_of_eigenvalues)*sizeof(_Complex double)); + eigenvectors_bi_= calloc((VOLUME)/2*(*nr_of_eigenvalues), sizeof(bispinor)); + eigenvectors_quda = calloc((VOLUME)/2*(*nr_of_eigenvalues), sizeof(bispinor)); + /* compute eigenvalues */ if((g_proc_id==0) && (g_debug_level > 4)) { @@ -135,7 +145,7 @@ double eigenvalues_bi(int * nr_of_eigenvalues, /* here n and lda are equal, because Q_Qdagger_ND_BI does an internal */ /* conversion to non _bi fields which are subject to xchange_fields */ /* so _bi fields do not need boundary */ - jdher_bi((VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), (VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), + /*jdher_bi((VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), (VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), startvalue, prec, (*nr_of_eigenvalues), j_max, j_min, max_iterations, blocksize, blockwise, v0dim, (_Complex double*) eigenvectors_bi, @@ -144,9 +154,32 @@ double eigenvalues_bi(int * nr_of_eigenvalues, &converged, (_Complex double*) eigenvectors_bi, eigenvls_bi, &returncode, maxmin, 1, Qsq); + + if(g_proc_id == g_stdio_proc) { + printf("\n*****************************\nThis is for testing\n\n"); + printf("Eigenvalue from tmLQCD = %e\n\n",eigenvls_bi[0]); + }*/ + + if(g_proc_id == g_stdio_proc) { + printf("Using QUDA now.\n"); + } + + eigsolveQuda((VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), (VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), + startvalue, prec, + (*nr_of_eigenvalues), j_max, j_min, + max_iterations, blocksize, blockwise, v0dim, (_Complex double*) eigenvectors_bi, + BICGSTAB, solver_it_max, + threshold, decay, verbosity, + &converged, (_Complex double**) eigenvectors_quda, eigenvls_quda, + &returncode, maxmin, 1); + + if(g_proc_id == g_stdio_proc) { + printf("Eigenvalue from Quda = %e\n\n",eigenvls_quda[0]); + } + *nr_of_eigenvalues = converged; - returnvalue = eigenvls_bi[0]; + returnvalue = eigenvls_quda[0]; return(returnvalue); } From ee79b0f7e4537f162085740d39dc390ca9d6a468 Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Fri, 24 Mar 2023 15:33:44 +0100 Subject: [PATCH 02/27] moved initQudaforEig into phmc_compute_ev --- monomial/ndrat_monomial.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c index fd4018027..1e37d9b51 100644 --- a/monomial/ndrat_monomial.c +++ b/monomial/ndrat_monomial.c @@ -239,12 +239,7 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) { } mnl->solver_params.sdim = VOLUME/2; mnl->solver_params.rel_prec = g_relative_precision_flag; - initQudaforEig(mnl->solver_params.squared_solver_prec, mnl->solver_params.max_iter, - mnl->solver_params.type, mnl->solver_params.rel_prec, - 1, // we only support even-odd here - mnl->solver_params.refinement_precision, - mnl->solver_params.sloppy_precision, - mnl->solver_params.compression_type); + // we measure before the trajectory! if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) { if(mnl->type != NDCLOVERRAT) phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi); From 7a48974f53d9c75c8b0f25e1b45201b90f3f49fc Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Fri, 24 Mar 2023 15:36:15 +0100 Subject: [PATCH 03/27] initialize QUDA and eigenvals from QUDA are scaled --- phmc.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/phmc.c b/phmc.c index 009457c4c..71e19bf81 100644 --- a/phmc.c +++ b/phmc.c @@ -40,6 +40,10 @@ #include "solver/matrix_mult_typedef_bi.h" #include "gettime.h" +#ifdef TM_USE_QUDA +# include "quda_interface.h" +#endif + // --> in monomial double phmc_Cpol; // --> MDPolyLocNormConst double phmc_cheb_evmin, phmc_cheb_evmax; // --> EVMin, EVMax @@ -222,12 +226,31 @@ void phmc_compute_ev(const int trajectory_counter, printf("# Computing eigenvalues for heavy doublet\n"); } - no_eigenvalues = 1; +#ifdef TM_USE_QUDA + /* Here we initialize QUDA */ + initQudaforEig(mnl->solver_params.squared_solver_prec, mnl->solver_params.max_iter, + mnl->solver_params.type, mnl->solver_params.rel_prec, 1, // we only support even-odd here + mnl->solver_params.refinement_precision, mnl->solver_params.sloppy_precision, mnl->solver_params.compression_type); +#endif + + no_eigenvalues = 1; temp = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq); + +#ifdef TM_USE_QUDA + if(mnl->EVMax == 1.) { + temp = temp / mnl->StildeMax; + } +#endif no_eigenvalues = 1; temp2 = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq); + +#ifdef TM_USE_QUDA + if(mnl->EVMax == 1.) { + temp2 = temp2 / mnl->StildeMax; + } +#endif if((g_proc_id == 0) && (g_debug_level > 1)) { printf("# %s: lowest eigenvalue end of trajectory %d = %e\n", From e030595355d11ef81decfdc02356c811567c2250 Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Fri, 24 Mar 2023 15:37:27 +0100 Subject: [PATCH 04/27] call eigensolver on QUDA if TM_USE_QUDA is defined --- solver/eigenvalues_bi.c | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/solver/eigenvalues_bi.c b/solver/eigenvalues_bi.c index e610f48d2..aba826c3c 100644 --- a/solver/eigenvalues_bi.c +++ b/solver/eigenvalues_bi.c @@ -69,7 +69,6 @@ double eigenvalues_bi(int * nr_of_eigenvalues, static bispinor *eigenvectors_bi = NULL; static double * eigenvls_bi = NULL; static _Complex double * eigenvls_quda = NULL; - static _Complex double ** eigenvectors_quda = NULL; /********************** * For Jacobi-Davidson @@ -134,7 +133,6 @@ double eigenvalues_bi(int * nr_of_eigenvalues, eigenvls_quda = (_Complex double *)malloc((*nr_of_eigenvalues)*sizeof(_Complex double)); eigenvectors_bi_= calloc((VOLUME)/2*(*nr_of_eigenvalues), sizeof(bispinor)); - eigenvectors_quda = calloc((VOLUME)/2*(*nr_of_eigenvalues), sizeof(bispinor)); /* compute eigenvalues */ @@ -142,10 +140,24 @@ double eigenvalues_bi(int * nr_of_eigenvalues, printf(" Values of mu = %e mubar = %e eps = %e precision = %e \n \n", g_mu, g_mubar, g_epsbar, precision); } + /* For now, using the TM_USE_QUDA flag + * Ideally, one would use an operator flag + * like useExternalEigSolver. */ +#ifdef TM_USE_QUDA + + if(g_proc_id == g_stdio_proc) { + printf("Using external eigensolver on QUDA.\n"); + } + + eigsolveQuda((*nr_of_eigenvalues), eigenvls_quda, prec, + blocksize, blockwise, max_iterations, maxmin); + +#else + /* here n and lda are equal, because Q_Qdagger_ND_BI does an internal */ /* conversion to non _bi fields which are subject to xchange_fields */ /* so _bi fields do not need boundary */ - /*jdher_bi((VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), (VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), + jdher_bi((VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), (VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), startvalue, prec, (*nr_of_eigenvalues), j_max, j_min, max_iterations, blocksize, blockwise, v0dim, (_Complex double*) eigenvectors_bi, @@ -155,28 +167,7 @@ double eigenvalues_bi(int * nr_of_eigenvalues, &returncode, maxmin, 1, Qsq); - if(g_proc_id == g_stdio_proc) { - printf("\n*****************************\nThis is for testing\n\n"); - printf("Eigenvalue from tmLQCD = %e\n\n",eigenvls_bi[0]); - }*/ - - if(g_proc_id == g_stdio_proc) { - printf("Using QUDA now.\n"); - } - - eigsolveQuda((VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), (VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), - startvalue, prec, - (*nr_of_eigenvalues), j_max, j_min, - max_iterations, blocksize, blockwise, v0dim, (_Complex double*) eigenvectors_bi, - BICGSTAB, solver_it_max, - threshold, decay, verbosity, - &converged, (_Complex double**) eigenvectors_quda, eigenvls_quda, - &returncode, maxmin, 1); - - if(g_proc_id == g_stdio_proc) { - printf("Eigenvalue from Quda = %e\n\n",eigenvls_quda[0]); - } - +#endif *nr_of_eigenvalues = converged; From 14c4e9f101288be159872a95fddc4d662db9194e Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Fri, 24 Mar 2023 15:38:02 +0100 Subject: [PATCH 05/27] clean up and finalize eigparams for QUDA --- quda_interface.c | 74 ++++++++++++++++++++++++++++++++---------------- quda_interface.h | 12 ++------ 2 files changed, 53 insertions(+), 33 deletions(-) diff --git a/quda_interface.c b/quda_interface.c index d454325cd..b037f92cf 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -2629,53 +2629,79 @@ Interface function for Eigensolver on Quda *********************************************************/ -void eigsolveQuda(int n, int lda, double tau, double tol, - int kmax, int jmax, int jmin, int itmax, - int blksize, int blkwise, - int V0dim, _Complex double *V0, - int solver_flag, - int linitmax, double eps_tr, double toldecay, - int verbosity, - int *k_conv, _Complex double ** host_evecs, _Complex double *host_evals, int *it, - int maxmin, const int shift_mode) { +void eigsolveQuda(int n, _Complex double *host_evals, double tol, + int blksize, int blkwise, + int max_iterations, int maxmin) { eig_param = newQudaEigParam(); eig_param.invert_param = &inv_param; eig_param.tol = tol; eig_param.qr_tol = tol; - //eig_param.invert_param->verbosity = QUDA_DEBUG_VERBOSE; + + if(blkwise == 1) { - eig_param.eig_type = QUDA_EIG_BLK_IR_ARNOLDI; + eig_param.eig_type = QUDA_EIG_BLK_TR_LANCZOS; eig_param.block_size = blksize; }else { - eig_param.eig_type = QUDA_EIG_IR_ARNOLDI; + eig_param.eig_type = QUDA_EIG_TR_LANCZOS; eig_param.block_size = 1; } + + if(eig_param.invert_param->solve_type == QUDA_NORMOP_PC_SOLVE) { + eig_param.use_pc = QUDA_BOOLEAN_TRUE; + eig_param.use_norm_op = QUDA_BOOLEAN_TRUE; + }else if(eig_param.invert_param->solve_type == QUDA_DIRECT_PC_SOLVE) { + eig_param.use_pc = QUDA_BOOLEAN_TRUE; + eig_param.use_norm_op = QUDA_BOOLEAN_FALSE; + }else if(eig_param.invert_param->solve_type == QUDA_NORMOP_SOLVE) { + eig_param.use_pc = QUDA_BOOLEAN_FALSE; + eig_param.use_norm_op = QUDA_BOOLEAN_TRUE; + }else { + eig_param.use_pc = QUDA_BOOLEAN_FALSE; + eig_param.use_norm_op = QUDA_BOOLEAN_FALSE; + } + + /* Not using polynomial acceleration for now. + * Might be useful to add the support. */ eig_param.use_poly_acc = QUDA_BOOLEAN_FALSE; - eig_param.preserve_deflation = QUDA_BOOLEAN_FALSE; - eig_param.use_dagger = QUDA_BOOLEAN_TRUE; - eig_param.use_norm_op = QUDA_BOOLEAN_TRUE; - eig_param.use_pc = QUDA_BOOLEAN_FALSE; - eig_param.use_eigen_qr = QUDA_BOOLEAN_FALSE; + + /* Daggers the operator. Not necessary for + * most cases. */ + eig_param.use_dagger = QUDA_BOOLEAN_FALSE; + + /* Most likely not necessary. Set TRUE to use + * Eigen routines to eigensolve the upper Hessenberg via QR */ + eig_param.use_eigen_qr = QUDA_BOOLEAN_FALSE; + eig_param.compute_svd = QUDA_BOOLEAN_FALSE; + + /* Set TRUE to performs the \gamma_5 OP solve by + * post multipling the eignvectors with \gamma_5 + * before computing the eigenvalues */ eig_param.compute_gamma5 = QUDA_BOOLEAN_FALSE; - if(maxmin == 1) eig_param.spectrum = QUDA_SPECTRUM_LM_EIG; - else eig_param.spectrum = QUDA_SPECTRUM_SM_EIG; - //eig_param.save_prec = inv_param.cuda_prec_eigensolver; + + if(maxmin == 1) eig_param.spectrum = QUDA_SPECTRUM_LR_EIG; + else eig_param.spectrum = QUDA_SPECTRUM_SR_EIG; + + /* The following two are set to cuda_prec, otherwise + * it gives an error. Such high precision might not be + * necessary. But have not found a way to consistently set + * the different precisions. */ eig_param.invert_param->cuda_prec_eigensolver = inv_param.cuda_prec; eig_param.invert_param->clover_cuda_prec_eigensolver = inv_param.cuda_prec; strncpy(eig_param.vec_outfile,"",256); + strncpy(eig_param.vec_infile,"",256); - eig_param.n_conv = 1; - eig_param.n_ev = 1; + eig_param.n_conv = n; + eig_param.n_ev = n; eig_param.n_kr = 96; - eig_param.max_restarts = linitmax; + eig_param.max_restarts = max_iterations; - eigensolveQuda((void **)host_evecs, host_evals, &eig_param); + eigensolveQuda(NULL, host_evals, &eig_param); } diff --git a/quda_interface.h b/quda_interface.h index d96f081b4..0babe4a27 100644 --- a/quda_interface.h +++ b/quda_interface.h @@ -175,15 +175,9 @@ void compute_gauge_derivative_quda(monomial * const mnl, hamiltonian_field_t * c void compute_WFlow_quda(const double eps ,const double tmax, const int traj, FILE* outfile); -void eigsolveQuda(int n, int lda, double tau, double tol, - int kmax, int jmax, int jmin, int itmax, - int blksize, int blkwise, - int V0dim, _Complex double *V0, - int solver_flag, - int linitmax, double eps_tr, double toldecay, - int verbosity, - int *k_conv, _Complex double ** host_evecs, _Complex double *host_evals, int *it, - int maxmin, const int shift_mode); +void eigsolveQuda(int n, _Complex double *host_evals, double tol, + int blksize, int blkwise, + int max_iterations, int maxmin); void initQudaforEig(const double precision, const int max_iter, const int solver_flag, const int rel_prec, From cc247bdf19bf1a0e8285dd4dc1cbe28d73e6fdae Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Fri, 24 Mar 2023 16:09:43 +0100 Subject: [PATCH 06/27] fix bug with the array storing the eigenvalues --- solver/eigenvalues_bi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/solver/eigenvalues_bi.c b/solver/eigenvalues_bi.c index aba826c3c..3ab67df53 100644 --- a/solver/eigenvalues_bi.c +++ b/solver/eigenvalues_bi.c @@ -68,7 +68,6 @@ double eigenvalues_bi(int * nr_of_eigenvalues, static int allocated = 0; static bispinor *eigenvectors_bi = NULL; static double * eigenvls_bi = NULL; - static _Complex double * eigenvls_quda = NULL; /********************** * For Jacobi-Davidson @@ -128,12 +127,13 @@ double eigenvalues_bi(int * nr_of_eigenvalues, eigenvectors_bi_= calloc((VOLUME)/2*(*nr_of_eigenvalues), sizeof(bispinor)); eigenvectors_bi = eigenvectors_bi_; #endif +#ifdef TM_USE_QUDA + eigenvls_bi = (_Complex double *)malloc((*nr_of_eigenvalues)*sizeof(_Complex double)); +#else eigenvls_bi = (double*)malloc((*nr_of_eigenvalues)*sizeof(double)); +#endif } - eigenvls_quda = (_Complex double *)malloc((*nr_of_eigenvalues)*sizeof(_Complex double)); - eigenvectors_bi_= calloc((VOLUME)/2*(*nr_of_eigenvalues), sizeof(bispinor)); - /* compute eigenvalues */ if((g_proc_id==0) && (g_debug_level > 4)) { @@ -149,7 +149,7 @@ double eigenvalues_bi(int * nr_of_eigenvalues, printf("Using external eigensolver on QUDA.\n"); } - eigsolveQuda((*nr_of_eigenvalues), eigenvls_quda, prec, + eigsolveQuda((*nr_of_eigenvalues), eigenvls_bi, prec, blocksize, blockwise, max_iterations, maxmin); #else @@ -171,6 +171,6 @@ double eigenvalues_bi(int * nr_of_eigenvalues, *nr_of_eigenvalues = converged; - returnvalue = eigenvls_quda[0]; + returnvalue = eigenvls_bi[0]; return(returnvalue); } From f4cd2781de0d1794cf381f5b3c49b258032fe578 Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Wed, 19 Apr 2023 11:59:13 +0200 Subject: [PATCH 07/27] revert changes in ndrat_monomial, eigenvalues_bi --- monomial/ndrat_monomial.c | 12 +++++------- solver/eigenvalues_bi.c | 24 ------------------------ 2 files changed, 5 insertions(+), 31 deletions(-) diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c index 1e37d9b51..1dd669b33 100644 --- a/monomial/ndrat_monomial.c +++ b/monomial/ndrat_monomial.c @@ -213,7 +213,11 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) { sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar); copy_32_sw_fields(); } - + // we measure before the trajectory! + if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) { + if(mnl->type != NDCLOVERRAT) phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi); + else phmc_compute_ev(hf->traj_counter-1, id, &Qsw_pm_ndbipsi); + } // the Gaussian distributed random fields tm_stopwatch_push(&g_timers, "random_energy0", ""); @@ -239,12 +243,6 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) { } mnl->solver_params.sdim = VOLUME/2; mnl->solver_params.rel_prec = g_relative_precision_flag; - - // we measure before the trajectory! - if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) { - if(mnl->type != NDCLOVERRAT) phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi); - else phmc_compute_ev(hf->traj_counter-1, id, &Qsw_pm_ndbipsi); - } mnl->iter0 = solve_mms_nd_plus(g_chi_up_spinor_field, g_chi_dn_spinor_field, mnl->pf, mnl->pf2, &(mnl->solver_params) ); diff --git a/solver/eigenvalues_bi.c b/solver/eigenvalues_bi.c index 3ab67df53..63d78e483 100644 --- a/solver/eigenvalues_bi.c +++ b/solver/eigenvalues_bi.c @@ -54,10 +54,6 @@ #include "eigenvalues_bi.h" #include "operator/tm_operators_nd.h" -#ifdef TM_USE_QUDA -# include "quda_interface.h" -#endif - double eigenvalues_bi(int * nr_of_eigenvalues, const int max_iterations, const double precision, @@ -127,11 +123,7 @@ double eigenvalues_bi(int * nr_of_eigenvalues, eigenvectors_bi_= calloc((VOLUME)/2*(*nr_of_eigenvalues), sizeof(bispinor)); eigenvectors_bi = eigenvectors_bi_; #endif -#ifdef TM_USE_QUDA - eigenvls_bi = (_Complex double *)malloc((*nr_of_eigenvalues)*sizeof(_Complex double)); -#else eigenvls_bi = (double*)malloc((*nr_of_eigenvalues)*sizeof(double)); -#endif } /* compute eigenvalues */ @@ -140,20 +132,6 @@ double eigenvalues_bi(int * nr_of_eigenvalues, printf(" Values of mu = %e mubar = %e eps = %e precision = %e \n \n", g_mu, g_mubar, g_epsbar, precision); } - /* For now, using the TM_USE_QUDA flag - * Ideally, one would use an operator flag - * like useExternalEigSolver. */ -#ifdef TM_USE_QUDA - - if(g_proc_id == g_stdio_proc) { - printf("Using external eigensolver on QUDA.\n"); - } - - eigsolveQuda((*nr_of_eigenvalues), eigenvls_bi, prec, - blocksize, blockwise, max_iterations, maxmin); - -#else - /* here n and lda are equal, because Q_Qdagger_ND_BI does an internal */ /* conversion to non _bi fields which are subject to xchange_fields */ /* so _bi fields do not need boundary */ @@ -166,8 +144,6 @@ double eigenvalues_bi(int * nr_of_eigenvalues, &converged, (_Complex double*) eigenvectors_bi, eigenvls_bi, &returncode, maxmin, 1, Qsq); - -#endif *nr_of_eigenvalues = converged; From af6f781fe0887423136d55ef98a9eb108f009284 Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Wed, 19 Apr 2023 14:12:17 +0200 Subject: [PATCH 08/27] add input option UseExternalEigSolver --- default_input_values.h | 2 ++ misc_types.h | 6 ++++++ read_input.l | 8 ++++++++ 3 files changed, 16 insertions(+) diff --git a/default_input_values.h b/default_input_values.h index cf781368b..c71c9ae74 100644 --- a/default_input_values.h +++ b/default_input_values.h @@ -198,6 +198,8 @@ #define _default_external_inverter 0 +#define _default_external_eigsolver 0 + #define _default_external_library 0 #define _default_subprocess_flag 0 diff --git a/misc_types.h b/misc_types.h index c7643f002..1695e04d8 100644 --- a/misc_types.h +++ b/misc_types.h @@ -89,6 +89,12 @@ typedef enum ExternalInverter_s { QPHIX_INVERTER } ExternalInverter; +/* enumeration type for the external eigensolver */ +typedef enum ExternalEigSolver_s { + NO_EXT_EIGSOLVER = 0, + QUDA_EIGSOLVER +} ExternalEigSolver; + /* enumeration type for the external inverter */ typedef enum ExternalLibrary_s { NO_EXT_LIB = 0, diff --git a/read_input.l b/read_input.l index b91b07428..dd2d0dddf 100644 --- a/read_input.l +++ b/read_input.l @@ -2505,6 +2505,14 @@ static inline double fltlist_next_token(int * const list_end){ mnl->rec_ev = a; if(myverbose!=0) printf(" Frequency for computing EV's set to %d in line %d monomial %d\n", mnl->rec_ev, line_of_file, current_monomial); } + {SPC}*UseExternalEigSolver{EQL}quda { + if(myverbose) printf(" Use Quda eigensolver line %d monomial %d\n", line_of_file, current_monomial); + mnl->external_eigsolver = QUDA_EIGSOLVER; + } + {SPC}*UseExternalEigSolver{EQL}no { + if(myverbose) printf(" Do not use external eigensolver line %d monomial %d\n", line_of_file, current_monomial); + mnl->external_eigsolver = NO_EXT_EIGSOLVER; + } } { {SPC}*MaxPtildeDegree{EQL}{DIGIT}+ { From f50d533135ea0dbdcca2d8a15246c468dc6b09ea Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Wed, 19 Apr 2023 14:13:02 +0200 Subject: [PATCH 09/27] add parameter external_eigsolver to monomial --- monomial/monomial.c | 1 + monomial/monomial.h | 1 + 2 files changed, 2 insertions(+) diff --git a/monomial/monomial.c b/monomial/monomial.c index 5c1c6f80f..3304609ce 100644 --- a/monomial/monomial.c +++ b/monomial/monomial.c @@ -114,6 +114,7 @@ int add_monomial(const int type) { monomial_list[no_monomials].solver_params.external_inverter = _default_external_inverter; monomial_list[no_monomials].solver_params.sloppy_precision = _default_operator_sloppy_precision_flag; monomial_list[no_monomials].external_library = _default_external_library; + monomial_list[no_monomials].external_eigsolver = _default_external_eigsolver; monomial_list[no_monomials].solver_params.refinement_precision = _default_operator_sloppy_precision_flag; monomial_list[no_monomials].even_odd_flag = _default_even_odd_flag; monomial_list[no_monomials].forcefactor = 1.; diff --git a/monomial/monomial.h b/monomial/monomial.h index 48f8fcb6d..9beb1ced7 100644 --- a/monomial/monomial.h +++ b/monomial/monomial.h @@ -112,6 +112,7 @@ typedef struct { double StildeMin, StildeMax; double EVMin, EVMax, EVMaxInv; ExternalLibrary external_library; + ExternalEigSolver external_eigsolver; double * MDPolyCoefs, * PtildeCoefs; /* rational approximation */ rational_t rat; From 40a1602e1a1841f14128689549cc8ff6abadd9c3 Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Wed, 19 Apr 2023 14:14:11 +0200 Subject: [PATCH 10/27] eigsolveQuda is called based on mnl->external_eigsolver --- phmc.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/phmc.c b/phmc.c index 71e19bf81..c2a8b5ca1 100644 --- a/phmc.c +++ b/phmc.c @@ -226,31 +226,27 @@ void phmc_compute_ev(const int trajectory_counter, printf("# Computing eigenvalues for heavy doublet\n"); } -#ifdef TM_USE_QUDA - /* Here we initialize QUDA */ - initQudaforEig(mnl->solver_params.squared_solver_prec, mnl->solver_params.max_iter, - mnl->solver_params.type, mnl->solver_params.rel_prec, 1, // we only support even-odd here - mnl->solver_params.refinement_precision, mnl->solver_params.sloppy_precision, mnl->solver_params.compression_type); - -#endif - no_eigenvalues = 1; - temp = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq); - -#ifdef TM_USE_QUDA - if(mnl->EVMax == 1.) { - temp = temp / mnl->StildeMax; + if(mnl->external_eigsolver == QUDA_EIGSOLVER) { + temp = eigsolveQuda(no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 0); + if(mnl->EVMax == 1.) { + temp = temp / mnl->StildeMax; + } + }else { + temp = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq); } -#endif + no_eigenvalues = 1; - temp2 = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq); - -#ifdef TM_USE_QUDA - if(mnl->EVMax == 1.) { - temp2 = temp2 / mnl->StildeMax; + if(mnl->external_eigsolver == QUDA_EIGSOLVER) { + temp2 = eigsolveQuda(no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 1); + if(mnl->EVMax == 1.) { + temp2 = temp2 / mnl->StildeMax; + } + }else { + temp2 = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq); } -#endif + if((g_proc_id == 0) && (g_debug_level > 1)) { printf("# %s: lowest eigenvalue end of trajectory %d = %e\n", From 3922b05c673efc85aaa9cad26a0b4f0489862229 Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Wed, 19 Apr 2023 14:15:22 +0200 Subject: [PATCH 11/27] removed initQudaforEig, memory allocation for eigenvalues done in the interface, added timing and more comments --- quda_interface.c | 76 +++++++++++++++++++++--------------------------- quda_interface.h | 7 +---- 2 files changed, 34 insertions(+), 49 deletions(-) diff --git a/quda_interface.c b/quda_interface.c index b037f92cf..f47bcf648 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -2327,47 +2327,6 @@ int invert_eo_quda_oneflavour_mshift(spinor ** const out, return(iterations); } -void initQudaforEig(const double precision, const int max_iter, - const int solver_flag, const int rel_prec, - const int even_odd_flag, const SloppyPrecision refinement_precision, - SloppyPrecision sloppy_precision, CompressionType compression) { - - - // it returns if quda is already init - _initQuda(); - - if ( rel_prec ) - inv_param.residual_type = QUDA_L2_RELATIVE_RESIDUAL; - else - inv_param.residual_type = QUDA_L2_ABSOLUTE_RESIDUAL; - - inv_param.kappa = g_kappa; - - // figure out which BC to use (theta, trivial...) - set_boundary_conditions(&compression, &gauge_param); - // set the sloppy precision of the mixed prec solver - set_sloppy_prec(sloppy_precision, refinement_precision, &gauge_param, &inv_param); - - // load gauge after setting precision - _loadGaugeQuda(compression); - - _setTwoFlavourSolverParam(g_kappa, - g_c_sw, - g_mubar, - g_epsbar, - solver_flag, - even_odd_flag, - precision, - max_iter, - 1 /*single_parity_solve */, - 1 /*always QpQm*/); - - // QUDA applies the MMdag operator, we need QpQm^{-1) in the end - // so we want QUDA to use the MdagM operator - inv_param.dagger = QUDA_DAG_YES; - -} - int invert_eo_quda_twoflavour_mshift(spinor ** const out_up, spinor ** const out_dn, spinor * const in_up, spinor * const in_dn, const double precision, const int max_iter, @@ -2629,10 +2588,25 @@ Interface function for Eigensolver on Quda *********************************************************/ -void eigsolveQuda(int n, _Complex double *host_evals, double tol, +double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterations, int maxmin) { + + // check if QUDA is initialized + if (!quda_initialized) { + fatal_error("QUDA must be initialized.","eigsolveQuda"); + return -1; + } + + tm_stopwatch_push(&g_timers, __func__, ""); + + _Complex double * eigenvls; + double returnvalue; + // allocate memory for eigenvalues + eigenvls = (_Complex double *)malloc((n)*sizeof(_Complex double)); + + // create new eig_param eig_param = newQudaEigParam(); eig_param.invert_param = &inv_param; @@ -2692,16 +2666,32 @@ void eigsolveQuda(int n, _Complex double *host_evals, double tol, eig_param.invert_param->cuda_prec_eigensolver = inv_param.cuda_prec; eig_param.invert_param->clover_cuda_prec_eigensolver = inv_param.cuda_prec; + /* At the moment, the eigenvalues and eigenvectors are neither + * written to or read from disk, but if necessary, can be added + * as a feature in future, by setting the following filenames */ strncpy(eig_param.vec_outfile,"",256); strncpy(eig_param.vec_infile,"",256); + /* The size of eigenvector search space and + * the number of required converged eigenvectors + * is both set to n */ eig_param.n_conv = n; eig_param.n_ev = n; + /* The size of the Krylov space is set to 96. + * From my understanding, QUDA automatically scales + * this search space, however more testing on this + * might be necessary */ eig_param.n_kr = 96; eig_param.max_restarts = max_iterations; - eigensolveQuda(NULL, host_evals, &eig_param); + eigensolveQuda(NULL, eigenvls, &eig_param); + + returnvalue = eigenvls[0]; + + tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); + + return(returnvalue); } diff --git a/quda_interface.h b/quda_interface.h index 0babe4a27..c163c489f 100644 --- a/quda_interface.h +++ b/quda_interface.h @@ -175,13 +175,8 @@ void compute_gauge_derivative_quda(monomial * const mnl, hamiltonian_field_t * c void compute_WFlow_quda(const double eps ,const double tmax, const int traj, FILE* outfile); -void eigsolveQuda(int n, _Complex double *host_evals, double tol, +double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterations, int maxmin); -void initQudaforEig(const double precision, const int max_iter, - const int solver_flag, const int rel_prec, - const int even_odd_flag, const SloppyPrecision refinement_precision, - SloppyPrecision sloppy_precision, CompressionType compression); - #endif /* QUDA_INTERFACE_H_ */ From c308510f9f75d6e247bd5d5f35642681a749b23d Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Wed, 26 Apr 2023 14:41:51 +0200 Subject: [PATCH 12/27] QUDA and monomial parameters are initialized within the interface --- phmc.c | 19 +++++++++++++++---- quda_interface.c | 37 +++++++++++++++++++++++++++++++------ quda_interface.h | 7 ++++--- 3 files changed, 50 insertions(+), 13 deletions(-) diff --git a/phmc.c b/phmc.c index c2a8b5ca1..df36e8c65 100644 --- a/phmc.c +++ b/phmc.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "global.h" @@ -228,8 +229,13 @@ void phmc_compute_ev(const int trajectory_counter, no_eigenvalues = 1; if(mnl->external_eigsolver == QUDA_EIGSOLVER) { - temp = eigsolveQuda(no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 0); - if(mnl->EVMax == 1.) { + temp = eigsolveQuda(no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 0, + mnl->accprec, mnl->maxiter, mnl->solver, g_relative_precision_flag, + 1, // we only support even-odd here + mnl->solver_params.refinement_precision, + mnl->solver_params.sloppy_precision, + mnl->solver_params.compression_type); + if( fabs(mnl->EVMax - 1) < 2*DBL_EPSILON ) { temp = temp / mnl->StildeMax; } }else { @@ -239,8 +245,13 @@ void phmc_compute_ev(const int trajectory_counter, no_eigenvalues = 1; if(mnl->external_eigsolver == QUDA_EIGSOLVER) { - temp2 = eigsolveQuda(no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 1); - if(mnl->EVMax == 1.) { + temp2 = eigsolveQuda(no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 1, + mnl->accprec, mnl->maxiter, mnl->solver, g_relative_precision_flag, + 1, // we only support even-odd here + mnl->solver_params.refinement_precision, + mnl->solver_params.sloppy_precision, + mnl->solver_params.compression_type); + if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { temp2 = temp2 / mnl->StildeMax; } }else { diff --git a/quda_interface.c b/quda_interface.c index f47bcf648..7a23776f0 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -2588,17 +2588,42 @@ Interface function for Eigensolver on Quda *********************************************************/ -double eigsolveQuda(int n, double tol, - int blksize, int blkwise, - int max_iterations, int maxmin) { +double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterations, int maxmin, + const double precision, const int max_iter, const int solver_flag, const int rel_prec, + const int even_odd_flag, const SloppyPrecision refinement_precision, + SloppyPrecision sloppy_precision, CompressionType compression) { + + tm_stopwatch_push(&g_timers, __func__, ""); // check if QUDA is initialized if (!quda_initialized) { - fatal_error("QUDA must be initialized.","eigsolveQuda"); - return -1; + // it returns if quda is already init + _initQuda(); } - tm_stopwatch_push(&g_timers, __func__, ""); + if ( rel_prec ) + inv_param.residual_type = QUDA_L2_RELATIVE_RESIDUAL; + else + inv_param.residual_type = QUDA_L2_ABSOLUTE_RESIDUAL; + + inv_param.kappa = g_kappa; + + // figure out which BC tu use (theta, trivial...) + set_boundary_conditions(&compression, &gauge_param); + + set_sloppy_prec(sloppy_precision, refinement_precision, &gauge_param, &inv_param); + + // load gauge after setting precision + _loadGaugeQuda(compression); + + _setTwoFlavourSolverParam(g_kappa, g_c_sw, g_mubar, g_epsbar, solver_flag, even_odd_flag, precision, max_iter, + 1 /*single_parity_solve */, + 1 /*always QpQm*/); + + // QUDA applies the MMdag operator, we need QpQm^{-1) in the end + // so we want QUDA to use the MdagM operator + inv_param.dagger = QUDA_DAG_YES; + _Complex double * eigenvls; double returnvalue; diff --git a/quda_interface.h b/quda_interface.h index c163c489f..5320fd5ea 100644 --- a/quda_interface.h +++ b/quda_interface.h @@ -175,8 +175,9 @@ void compute_gauge_derivative_quda(monomial * const mnl, hamiltonian_field_t * c void compute_WFlow_quda(const double eps ,const double tmax, const int traj, FILE* outfile); -double eigsolveQuda(int n, double tol, - int blksize, int blkwise, - int max_iterations, int maxmin); +double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterations, int maxmin, + const double precision, const int max_iter, const int solver_flag, const int rel_prec, + const int even_odd_flag, const SloppyPrecision refinement_precision, + SloppyPrecision sloppy_precision, CompressionType compression); #endif /* QUDA_INTERFACE_H_ */ From a06e083aa61d84420acb8a5968a01f3256b7ee9f Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Wed, 26 Apr 2023 17:20:57 +0200 Subject: [PATCH 13/27] removed unnecessary quda_initialized check --- quda_interface.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/quda_interface.c b/quda_interface.c index 7a23776f0..bd2ff73fc 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -2595,11 +2595,9 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati tm_stopwatch_push(&g_timers, __func__, ""); - // check if QUDA is initialized - if (!quda_initialized) { - // it returns if quda is already init - _initQuda(); - } + + // it returns if quda is already init + _initQuda(); if ( rel_prec ) inv_param.residual_type = QUDA_L2_RELATIVE_RESIDUAL; From 4b7e737b9bb95ea3dcfa79ec65ada2bae35a8033 Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Tue, 9 May 2023 17:37:55 +0200 Subject: [PATCH 14/27] TM_USE_QUDA is checked before calling the interface --- phmc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/phmc.c b/phmc.c index df36e8c65..32900382e 100644 --- a/phmc.c +++ b/phmc.c @@ -229,6 +229,7 @@ void phmc_compute_ev(const int trajectory_counter, no_eigenvalues = 1; if(mnl->external_eigsolver == QUDA_EIGSOLVER) { + #ifdef TM_USE_QUDA temp = eigsolveQuda(no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 0, mnl->accprec, mnl->maxiter, mnl->solver, g_relative_precision_flag, 1, // we only support even-odd here @@ -238,6 +239,15 @@ void phmc_compute_ev(const int trajectory_counter, if( fabs(mnl->EVMax - 1) < 2*DBL_EPSILON ) { temp = temp / mnl->StildeMax; } + #else + if(g_proc_id == 0) { + fprintf(stderr, "Error: Attempted to use QUDA eigensolver but this build was not configured for QUDA usage.\n"); + #ifdef TM_USE_MPI + MPI_Finalize(); + #endif + exit(-2); + } + #endif }else { temp = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq); } @@ -245,6 +255,7 @@ void phmc_compute_ev(const int trajectory_counter, no_eigenvalues = 1; if(mnl->external_eigsolver == QUDA_EIGSOLVER) { + #ifdef TM_USE_QUDA temp2 = eigsolveQuda(no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 1, mnl->accprec, mnl->maxiter, mnl->solver, g_relative_precision_flag, 1, // we only support even-odd here @@ -254,6 +265,15 @@ void phmc_compute_ev(const int trajectory_counter, if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { temp2 = temp2 / mnl->StildeMax; } + #else + if(g_proc_id == 0) { + fprintf(stderr, "Error: Attempted to use QUDA eigensolver but this build was not configured for QUDA usage.\n"); + #ifdef TM_USE_MPI + MPI_Finalize(); + #endif + exit(-2); + } + #endif }else { temp2 = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq); } From a0d71d38422788b8e4ef23b459b17bc00eafb61d Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Tue, 9 May 2023 17:49:57 +0200 Subject: [PATCH 15/27] modifying eigenvalue_precision in the same fashion as eigenvalues_bi --- quda_interface.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/quda_interface.c b/quda_interface.c index fc8d1f51f..e33510ad2 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -2633,8 +2633,15 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati eig_param = newQudaEigParam(); eig_param.invert_param = &inv_param; - eig_param.tol = tol; - eig_param.qr_tol = tol; + + if(tol < 1.e-14) { + eig_param.tol = 1.e-14; + eig_param.qr_tol = 1.e-14; + }else { + eig_param.tol = tol; + eig_param.qr_tol = tol; + } + if(blkwise == 1) { From f6d68c8cda2f2eee8c034f37d874d279eb5ae020 Mon Sep 17 00:00:00 2001 From: Bartosz Kostrzewa Date: Wed, 24 May 2023 18:41:56 +0200 Subject: [PATCH 16/27] first stab at using polynomial acceleration for the ND eigensolver --- quda_interface.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/quda_interface.c b/quda_interface.c index 694647aa9..267a96d54 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -5,6 +5,8 @@ * 2018 Bartosz Kostrzewa, Ferenc Pittler * 2019, 2020 Bartosz Kostrzewa * 2021 Bartosz Kostrzewa, Marco Garofalo, Ferenc Pittler, Simone Bacchio + * 2022 Simone Romiti, Bartosz Kostrzewa + * 2023 Aniket Sen, Bartosz Kostrzewa * * This file is part of tmLQCD. * @@ -2673,9 +2675,14 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati eig_param.use_norm_op = QUDA_BOOLEAN_FALSE; } - /* Not using polynomial acceleration for now. - * Might be useful to add the support. */ - eig_param.use_poly_acc = QUDA_BOOLEAN_FALSE; + // BK: these defaults seem to work on a 32c64 ensemble + // at a relatively coarse lattice spacing for the eigenvalues + // of the twisted-clover ND operator with values of musigma / mudelta + // reproducing physical sea strange and charm quark masses + eig_param.use_poly_acc = maxmin == 1 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; + eig_param.poly_deg = 128; + eig_param.a_min = 1e-3; + eig_param.a_max = 4; /* Daggers the operator. Not necessary for * most cases. */ @@ -2726,6 +2733,7 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati eigensolveQuda(NULL, eigenvls, &eig_param); returnvalue = eigenvls[0]; + free(eigenvls); tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); From 4e6033f3d0633b87d1ed3e20e2ce2cac358de178 Mon Sep 17 00:00:00 2001 From: Bartosz Kostrzewa Date: Fri, 9 Jun 2023 15:19:56 +0200 Subject: [PATCH 17/27] use own QudaInvertParam struct for eig_param.invert_param to not disturb global inv_param when changing precision, adjust indentation in one place --- quda_interface.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/quda_interface.c b/quda_interface.c index 267a96d54..4a985641c 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -247,7 +247,7 @@ void _setDefaultQudaParam(void){ QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec_sloppy = QUDA_SINGLE_PRECISION; - QudaPrecision cuda_prec_precondition = QUDA_HALF_PRECISION; + QudaPrecision cuda_prec_precondition = QUDA_SINGLE_PRECISION; QudaTune tune = QUDA_TUNE_YES; @@ -2608,17 +2608,17 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati // it returns if quda is already init _initQuda(); - if ( rel_prec ) - inv_param.residual_type = QUDA_L2_RELATIVE_RESIDUAL; - else - inv_param.residual_type = QUDA_L2_ABSOLUTE_RESIDUAL; + if ( rel_prec ) + inv_param.residual_type = QUDA_L2_RELATIVE_RESIDUAL; + else + inv_param.residual_type = QUDA_L2_ABSOLUTE_RESIDUAL; - inv_param.kappa = g_kappa; - - // figure out which BC tu use (theta, trivial...) - set_boundary_conditions(&compression, &gauge_param); + inv_param.kappa = g_kappa; + + // figure out which BC tu use (theta, trivial...) + set_boundary_conditions(&compression, &gauge_param); - set_sloppy_prec(sloppy_precision, refinement_precision, &gauge_param, &inv_param); + set_sloppy_prec(sloppy_precision, refinement_precision, &gauge_param, &inv_param); // load gauge after setting precision _loadGaugeQuda(compression); @@ -2640,8 +2640,18 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati // create new eig_param eig_param = newQudaEigParam(); - - eig_param.invert_param = &inv_param; + + // need our own QudaInvertParam for passing the operator properties + // as we modify the precision below + QudaInvertParam eig_invert_param = newQudaInvertParam(); + eig_invert_param = inv_param; + eig_param.invert_param = &eig_invert_param; + /* AS The following two are set to cuda_prec, otherwise + * it gives an error. Such high precision might not be + * necessary. But have not found a way to consistently set + * the different precisions. */ + eig_param.invert_param->cuda_prec_eigensolver = inv_param.cuda_prec; + eig_param.invert_param->clover_cuda_prec_eigensolver = inv_param.clover_cuda_prec; if(tol < 1.e-14) { eig_param.tol = 1.e-14; @@ -2703,12 +2713,6 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati if(maxmin == 1) eig_param.spectrum = QUDA_SPECTRUM_LR_EIG; else eig_param.spectrum = QUDA_SPECTRUM_SR_EIG; - /* The following two are set to cuda_prec, otherwise - * it gives an error. Such high precision might not be - * necessary. But have not found a way to consistently set - * the different precisions. */ - eig_param.invert_param->cuda_prec_eigensolver = inv_param.cuda_prec; - eig_param.invert_param->clover_cuda_prec_eigensolver = inv_param.cuda_prec; /* At the moment, the eigenvalues and eigenvectors are neither * written to or read from disk, but if necessary, can be added From 3d67264370cce89aca9872bcb72a2337cd2c825f Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Sat, 17 Jun 2023 10:51:44 +0200 Subject: [PATCH 18/27] user input for poly acc settings --- default_input_values.h | 4 ++++ monomial/monomial.c | 5 +++++ monomial/monomial.h | 2 ++ phmc.c | 6 ++++-- quda_interface.c | 14 ++++++++------ quda_interface.h | 3 ++- read_input.l | 20 ++++++++++++++++++++ 7 files changed, 45 insertions(+), 9 deletions(-) diff --git a/default_input_values.h b/default_input_values.h index 64b1a3531..820f3d98e 100644 --- a/default_input_values.h +++ b/default_input_values.h @@ -147,6 +147,10 @@ #define _default_phmc_pure_phmc 0 #define _default_stilde_max 3. #define _default_stilde_min 0.01 +#define _default_eig_polydeg 128 +#define _default_eig_amin 0.001 +#define _default_eig_amax 4 +#define _default_eig_n_kr 96 #define _default_degree_of_p 48 #define _default_propagator_splitted 1 #define _default_source_splitted 1 diff --git a/monomial/monomial.c b/monomial/monomial.c index 4214fef5d..b7e8a55c5 100644 --- a/monomial/monomial.c +++ b/monomial/monomial.c @@ -144,6 +144,11 @@ int add_monomial(const int type) { monomial_list[no_monomials].PrecisionHfinal = _default_g_acc_Hfin; monomial_list[no_monomials].PrecisionPtilde = _default_g_acc_Ptilde; + monomial_list[no_monomials].eig_polydeg = _default_eig_polydeg; + monomial_list[no_monomials].eig_amin = _default_eig_amin; + monomial_list[no_monomials].eig_amax = _default_eig_amax; + monomial_list[no_monomials].eig_n_kr = _default_eig_n_kr; + monomial_list[no_monomials].rat.order = 12; monomial_list[no_monomials].rat.range[0] = _default_stilde_min; monomial_list[no_monomials].rat.range[1] = _default_stilde_max; diff --git a/monomial/monomial.h b/monomial/monomial.h index c8f7e8b40..bbdb30dd2 100644 --- a/monomial/monomial.h +++ b/monomial/monomial.h @@ -114,6 +114,8 @@ typedef struct { double PrecisionHfinal; double StildeMin, StildeMax; double EVMin, EVMax, EVMaxInv; + int eig_polydeg, eig_n_kr; + double eig_amin, eig_amax; ExternalLibrary external_library; ExternalEigSolver external_eigsolver; double * MDPolyCoefs, * PtildeCoefs; diff --git a/phmc.c b/phmc.c index 32900382e..cf4e73b14 100644 --- a/phmc.c +++ b/phmc.c @@ -231,7 +231,8 @@ void phmc_compute_ev(const int trajectory_counter, if(mnl->external_eigsolver == QUDA_EIGSOLVER) { #ifdef TM_USE_QUDA temp = eigsolveQuda(no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 0, - mnl->accprec, mnl->maxiter, mnl->solver, g_relative_precision_flag, + mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, + mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, 1, // we only support even-odd here mnl->solver_params.refinement_precision, mnl->solver_params.sloppy_precision, @@ -257,7 +258,8 @@ void phmc_compute_ev(const int trajectory_counter, if(mnl->external_eigsolver == QUDA_EIGSOLVER) { #ifdef TM_USE_QUDA temp2 = eigsolveQuda(no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 1, - mnl->accprec, mnl->maxiter, mnl->solver, g_relative_precision_flag, + mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, + mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, 1, // we only support even-odd here mnl->solver_params.refinement_precision, mnl->solver_params.sloppy_precision, diff --git a/quda_interface.c b/quda_interface.c index 4a985641c..c17b2015f 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -2598,7 +2598,8 @@ Interface function for Eigensolver on Quda double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterations, int maxmin, - const double precision, const int max_iter, const int solver_flag, const int rel_prec, + const double precision, const int max_iter, const int polydeg, const double amin, + const double amax, const int n_kr, const int solver_flag, const int rel_prec, const int even_odd_flag, const SloppyPrecision refinement_precision, SloppyPrecision sloppy_precision, CompressionType compression) { @@ -2646,6 +2647,7 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati QudaInvertParam eig_invert_param = newQudaInvertParam(); eig_invert_param = inv_param; eig_param.invert_param = &eig_invert_param; + eig_param.invert_param->verbosity = QUDA_VERBOSE; /* AS The following two are set to cuda_prec, otherwise * it gives an error. Such high precision might not be * necessary. But have not found a way to consistently set @@ -2689,10 +2691,10 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati // at a relatively coarse lattice spacing for the eigenvalues // of the twisted-clover ND operator with values of musigma / mudelta // reproducing physical sea strange and charm quark masses - eig_param.use_poly_acc = maxmin == 1 ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; - eig_param.poly_deg = 128; - eig_param.a_min = 1e-3; - eig_param.a_max = 4; + eig_param.use_poly_acc = (maxmin == 1) && (polydeg != 0) ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; + eig_param.poly_deg = polydeg; + eig_param.a_min = amin; + eig_param.a_max = amax; /* Daggers the operator. Not necessary for * most cases. */ @@ -2730,7 +2732,7 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati * From my understanding, QUDA automatically scales * this search space, however more testing on this * might be necessary */ - eig_param.n_kr = 96; + eig_param.n_kr = n_kr; eig_param.max_restarts = max_iterations; diff --git a/quda_interface.h b/quda_interface.h index 5320fd5ea..13fb578c9 100644 --- a/quda_interface.h +++ b/quda_interface.h @@ -176,7 +176,8 @@ void compute_WFlow_quda(const double eps ,const double tmax, const int traj, FIL double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterations, int maxmin, - const double precision, const int max_iter, const int solver_flag, const int rel_prec, + const double precision, const int max_iter, const int polydeg, const double amin, + const double amax, const int n_kr, const int solver_flag, const int rel_prec, const int even_odd_flag, const SloppyPrecision refinement_precision, SloppyPrecision sloppy_precision, CompressionType compression); diff --git a/read_input.l b/read_input.l index 4e7b0db83..3fde529d0 100644 --- a/read_input.l +++ b/read_input.l @@ -2607,6 +2607,26 @@ static inline double fltlist_next_token(int * const list_end){ if(myverbose) printf(" Do not use external eigensolver line %d monomial %d\n", line_of_file, current_monomial); mnl->external_eigsolver = NO_EXT_EIGSOLVER; } + {SPC}*EigAmin{EQL}{FLT} { + sscanf(yytext, " %[a-zA-Z] = %lf", name, &c); + mnl->eig_amin = c; + if(myverbose!=0) printf(" eig_amin set to %e line %d monomial %d\n", c, line_of_file, current_monomial); + } + {SPC}*EigAmax{EQL}{FLT} { + sscanf(yytext, " %[a-zA-Z] = %lf", name, &c); + mnl->eig_amax = c; + if(myverbose!=0) printf(" eig_amax set to %e line %d monomial %d\n", c, line_of_file, current_monomial); + } + {SPC}*EigPolyDeg{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + mnl->eig_polydeg = a; + if(myverbose!=0) printf(" eig_polydeg set to %d line %d monomial %d\n", a, line_of_file, current_monomial); + } + {SPC}*EigNkr{EQL}{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] = %d", name, &a); + mnl->eig_n_kr = a; + if(myverbose!=0) printf(" eig_n_kr set to %d line %d monomial %d\n", a, line_of_file, current_monomial); + } } { {SPC}*MaxPtildeDegree{EQL}{DIGIT}+ { From 362c93b942094d93961e17771e042f28a9bafa02 Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Wed, 21 Jun 2023 10:02:51 +0200 Subject: [PATCH 19/27] poly acc inputs updated --- read_input.l | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/read_input.l b/read_input.l index 3fde529d0..54fe91b8a 100644 --- a/read_input.l +++ b/read_input.l @@ -2607,25 +2607,25 @@ static inline double fltlist_next_token(int * const list_end){ if(myverbose) printf(" Do not use external eigensolver line %d monomial %d\n", line_of_file, current_monomial); mnl->external_eigsolver = NO_EXT_EIGSOLVER; } - {SPC}*EigAmin{EQL}{FLT} { + {SPC}*EigSolverPolyMin{EQL}{FLT} { sscanf(yytext, " %[a-zA-Z] = %lf", name, &c); mnl->eig_amin = c; - if(myverbose!=0) printf(" eig_amin set to %e line %d monomial %d\n", c, line_of_file, current_monomial); + if(myverbose!=0) printf(" min for polynomial acceleration in eigensolver set to %e line %d monomial %d\n", c, line_of_file, current_monomial); } - {SPC}*EigAmax{EQL}{FLT} { + {SPC}*EigSolverPolyMax{EQL}{FLT} { sscanf(yytext, " %[a-zA-Z] = %lf", name, &c); mnl->eig_amax = c; - if(myverbose!=0) printf(" eig_amax set to %e line %d monomial %d\n", c, line_of_file, current_monomial); + if(myverbose!=0) printf(" max for polynomial acceleration in eigensolver set to %e line %d monomial %d\n", c, line_of_file, current_monomial); } - {SPC}*EigPolyDeg{EQL}{DIGIT}+ { + {SPC}*EigSolverPolynomialDegree{EQL}{DIGIT}+ { sscanf(yytext, " %[a-zA-Z] = %d", name, &a); mnl->eig_polydeg = a; - if(myverbose!=0) printf(" eig_polydeg set to %d line %d monomial %d\n", a, line_of_file, current_monomial); + if(myverbose!=0) printf(" degree of polynomial acceleration in eigensolver set to %d line %d monomial %d\n", a, line_of_file, current_monomial); } - {SPC}*EigNkr{EQL}{DIGIT}+ { + {SPC}*EigSolverKrylovSubspaceSize{EQL}{DIGIT}+ { sscanf(yytext, " %[a-zA-Z] = %d", name, &a); mnl->eig_n_kr = a; - if(myverbose!=0) printf(" eig_n_kr set to %d line %d monomial %d\n", a, line_of_file, current_monomial); + if(myverbose!=0) printf(" Krylov subspace size for eigensolver set to %d line %d monomial %d\n", a, line_of_file, current_monomial); } } { From 8bbabe6af8271e3a53727c71b2611d456cb63aec Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Tue, 27 Jun 2023 17:33:26 +0200 Subject: [PATCH 20/27] added support for one flavour solver --- phmc.c | 4 ++-- quda_interface.c | 14 ++++++++++---- quda_interface.h | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/phmc.c b/phmc.c index cf4e73b14..d4c168eac 100644 --- a/phmc.c +++ b/phmc.c @@ -236,7 +236,7 @@ void phmc_compute_ev(const int trajectory_counter, 1, // we only support even-odd here mnl->solver_params.refinement_precision, mnl->solver_params.sloppy_precision, - mnl->solver_params.compression_type); + mnl->solver_params.compression_type, 0); if( fabs(mnl->EVMax - 1) < 2*DBL_EPSILON ) { temp = temp / mnl->StildeMax; } @@ -263,7 +263,7 @@ void phmc_compute_ev(const int trajectory_counter, 1, // we only support even-odd here mnl->solver_params.refinement_precision, mnl->solver_params.sloppy_precision, - mnl->solver_params.compression_type); + mnl->solver_params.compression_type, 0); if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { temp2 = temp2 / mnl->StildeMax; } diff --git a/quda_interface.c b/quda_interface.c index c17b2015f..9a6e5ef9b 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -2601,7 +2601,7 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati const double precision, const int max_iter, const int polydeg, const double amin, const double amax, const int n_kr, const int solver_flag, const int rel_prec, const int even_odd_flag, const SloppyPrecision refinement_precision, - SloppyPrecision sloppy_precision, CompressionType compression) { + SloppyPrecision sloppy_precision, CompressionType compression, const int oneFlavourFlag) { tm_stopwatch_push(&g_timers, __func__, ""); @@ -2624,9 +2624,15 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati // load gauge after setting precision _loadGaugeQuda(compression); - _setTwoFlavourSolverParam(g_kappa, g_c_sw, g_mubar, g_epsbar, solver_flag, even_odd_flag, precision, max_iter, - 1 /*single_parity_solve */, - 1 /*always QpQm*/); + if ( oneFlavourFlag ) { + _setOneFlavourSolverParam(g_kappa, g_c_sw, g_mu, solver_flag, even_odd_flag, precision, max_iter, + 1 /*single_parity_solve */, + 1 /*always QpQm*/); + }else { + _setTwoFlavourSolverParam(g_kappa, g_c_sw, g_mubar, g_epsbar, solver_flag, even_odd_flag, precision, max_iter, + 1 /*single_parity_solve */, + 1 /*always QpQm*/); + } // QUDA applies the MMdag operator, we need QpQm^{-1) in the end // so we want QUDA to use the MdagM operator diff --git a/quda_interface.h b/quda_interface.h index 13fb578c9..eb0ee4447 100644 --- a/quda_interface.h +++ b/quda_interface.h @@ -179,6 +179,6 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati const double precision, const int max_iter, const int polydeg, const double amin, const double amax, const int n_kr, const int solver_flag, const int rel_prec, const int even_odd_flag, const SloppyPrecision refinement_precision, - SloppyPrecision sloppy_precision, CompressionType compression); + SloppyPrecision sloppy_precision, CompressionType compression, const int oneFlavourFlag); #endif /* QUDA_INTERFACE_H_ */ From 721f3ba478fdeaeee8e38791c46cdc83469ed5c6 Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Wed, 28 Jun 2023 10:29:48 +0200 Subject: [PATCH 21/27] fixed bug in setting eig_param.use_poly_acc --- quda_interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quda_interface.c b/quda_interface.c index 9a6e5ef9b..eeccb6d71 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -2697,7 +2697,7 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati // at a relatively coarse lattice spacing for the eigenvalues // of the twisted-clover ND operator with values of musigma / mudelta // reproducing physical sea strange and charm quark masses - eig_param.use_poly_acc = (maxmin == 1) && (polydeg != 0) ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; + eig_param.use_poly_acc = (maxmin == 1) || (polydeg == 0) ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; eig_param.poly_deg = polydeg; eig_param.a_min = amin; eig_param.a_max = amax; From a8b0a62e88f982e64f2f2744575549b02eabdd05 Mon Sep 17 00:00:00 2001 From: Bartosz Kostrzewa Date: Wed, 19 Jul 2023 13:40:52 +0200 Subject: [PATCH 22/27] require an output location to be provided externally to eigsolveQuda in order to allow all requested eigenvalues to be returned, if desired --- phmc.c | 54 +++++++++++++++++++++++++----------------------- quda_interface.c | 43 ++++++++++---------------------------- quda_interface.h | 10 ++++----- 3 files changed, 44 insertions(+), 63 deletions(-) diff --git a/phmc.c b/phmc.c index d4c168eac..550835513 100644 --- a/phmc.c +++ b/phmc.c @@ -211,7 +211,9 @@ void init_phmc() { void phmc_compute_ev(const int trajectory_counter, const int id, matrix_mult_bi Qsq) { - double atime, etime, temp=0., temp2=0.; + double atime, etime; + double eval_min = 0.; + double eval_max = 0.; int max_iter_ev, no_eigenvalues; char buf[100]; char * phmcfilename = buf; @@ -230,15 +232,15 @@ void phmc_compute_ev(const int trajectory_counter, no_eigenvalues = 1; if(mnl->external_eigsolver == QUDA_EIGSOLVER) { #ifdef TM_USE_QUDA - temp = eigsolveQuda(no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 0, - mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, - mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, - 1, // we only support even-odd here - mnl->solver_params.refinement_precision, - mnl->solver_params.sloppy_precision, - mnl->solver_params.compression_type, 0); + eigsolveQuda(&eval_min, no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 0, + mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, + mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, + 1, // we only support even-odd here + mnl->solver_params.refinement_precision, + mnl->solver_params.sloppy_precision, + mnl->solver_params.compression_type, 0); if( fabs(mnl->EVMax - 1) < 2*DBL_EPSILON ) { - temp = temp / mnl->StildeMax; + eval_min /= mnl->StildeMax; } #else if(g_proc_id == 0) { @@ -250,22 +252,22 @@ void phmc_compute_ev(const int trajectory_counter, } #endif }else { - temp = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq); + eval_min = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq); } no_eigenvalues = 1; if(mnl->external_eigsolver == QUDA_EIGSOLVER) { #ifdef TM_USE_QUDA - temp2 = eigsolveQuda(no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 1, - mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, - mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, - 1, // we only support even-odd here - mnl->solver_params.refinement_precision, - mnl->solver_params.sloppy_precision, - mnl->solver_params.compression_type, 0); + eigsolveQuda(&eval_max, no_eigenvalues, eigenvalue_precision, 1, 0, max_iter_ev, 1, + mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, + mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, + 1, // we only support even-odd here + mnl->solver_params.refinement_precision, + mnl->solver_params.sloppy_precision, + mnl->solver_params.compression_type, 0); if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { - temp2 = temp2 / mnl->StildeMax; + eval_max /= mnl->StildeMax; } #else if(g_proc_id == 0) { @@ -277,26 +279,26 @@ void phmc_compute_ev(const int trajectory_counter, } #endif }else { - temp2 = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq); + eval_max = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq); } if((g_proc_id == 0) && (g_debug_level > 1)) { printf("# %s: lowest eigenvalue end of trajectory %d = %e\n", - mnl->name, trajectory_counter, temp); + mnl->name, trajectory_counter, eval_min); printf("# %s: maximal eigenvalue end of trajectory %d = %e\n", - mnl->name, trajectory_counter, temp2); + mnl->name, trajectory_counter, eval_max); } if(g_proc_id == 0) { - if(temp2 > mnl->EVMax) { - fprintf(stderr, "\nWarning: largest eigenvalue for monomial %s larger than upper bound!\n\n", mnl->name); + if(eval_max > mnl->EVMax) { + fprintf(stderr, "\nWarning: largest eigenvalue for monomial %s: %.6f is larger than upper bound: %.6f\n\n", mnl->name, eval_max, mnl->EVMax); } - if(temp < mnl->EVMin) { - fprintf(stderr, "\nWarning: smallest eigenvalue for monomial %s smaller than lower bound!\n\n", mnl->name); + if(eval_min < mnl->EVMin) { + fprintf(stderr, "\nWarning: smallest eigenvalue for monomial %s: %.6f is smaller than lower bound: %.6f\n\n", mnl->name, eval_min, mnl->EVMin); } countfile = fopen(phmcfilename, "a"); fprintf(countfile, "%.8d %1.5e %1.5e %1.5e %1.5e\n", - trajectory_counter, temp, temp2, mnl->EVMin, mnl->EVMax); + trajectory_counter, eval_min, eval_max, mnl->EVMin, mnl->EVMax); fclose(countfile); } etime = gettime(); diff --git a/quda_interface.c b/quda_interface.c index 6a157ce2b..0856b695c 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -2611,11 +2611,11 @@ Interface function for Eigensolver on Quda *********************************************************/ -double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterations, int maxmin, - const double precision, const int max_iter, const int polydeg, const double amin, - const double amax, const int n_kr, const int solver_flag, const int rel_prec, - const int even_odd_flag, const SloppyPrecision refinement_precision, - SloppyPrecision sloppy_precision, CompressionType compression, const int oneFlavourFlag) { +void eigsolveQuda(double * evals, int n_evals, double tol, int blksize, int blkwise, int max_iterations, int maxmin, + const double precision, const int max_iter, const int polydeg, const double amin, + const double amax, const int n_kr, const int solver_flag, const int rel_prec, + const int even_odd_flag, const SloppyPrecision refinement_precision, + SloppyPrecision sloppy_precision, CompressionType compression, const int oneFlavourFlag) { tm_stopwatch_push(&g_timers, __func__, ""); @@ -2648,17 +2648,6 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati 1 /*always QpQm*/); } - // QUDA applies the MMdag operator, we need QpQm^{-1) in the end - // so we want QUDA to use the MdagM operator - inv_param.dagger = QUDA_DAG_YES; - - - _Complex double * eigenvls; - double returnvalue; - - // allocate memory for eigenvalues - eigenvls = (_Complex double *)malloc((n)*sizeof(_Complex double)); - // create new eig_param eig_param = newQudaEigParam(); @@ -2675,6 +2664,8 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati eig_param.invert_param->cuda_prec_eigensolver = inv_param.cuda_prec; eig_param.invert_param->clover_cuda_prec_eigensolver = inv_param.clover_cuda_prec; + // for consistency with tmLQCD's own eigensolver we require a precision of at least + // 1e-14 if(tol < 1.e-14) { eig_param.tol = 1.e-14; eig_param.qr_tol = 1.e-14; @@ -2683,8 +2674,6 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati eig_param.qr_tol = tol; } - - if(blkwise == 1) { eig_param.eig_type = QUDA_EIG_BLK_TR_LANCZOS; eig_param.block_size = blksize; @@ -2707,10 +2696,6 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati eig_param.use_norm_op = QUDA_BOOLEAN_FALSE; } - // BK: these defaults seem to work on a 32c64 ensemble - // at a relatively coarse lattice spacing for the eigenvalues - // of the twisted-clover ND operator with values of musigma / mudelta - // reproducing physical sea strange and charm quark masses eig_param.use_poly_acc = (maxmin == 1) || (polydeg == 0) ? QUDA_BOOLEAN_FALSE : QUDA_BOOLEAN_TRUE; eig_param.poly_deg = polydeg; eig_param.a_min = amin; @@ -2745,9 +2730,9 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati /* The size of eigenvector search space and * the number of required converged eigenvectors - * is both set to n */ - eig_param.n_conv = n; - eig_param.n_ev = n; + * is both set to n_evals */ + eig_param.n_conv = n_evals; + eig_param.n_ev = n_evals; /* The size of the Krylov space is set to 96. * From my understanding, QUDA automatically scales * this search space, however more testing on this @@ -2756,13 +2741,7 @@ double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterati eig_param.max_restarts = max_iterations; - eigensolveQuda(NULL, eigenvls, &eig_param); - - returnvalue = eigenvls[0]; - free(eigenvls); + eigensolveQuda(NULL, evals, &eig_param); tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); - - return(returnvalue); - } diff --git a/quda_interface.h b/quda_interface.h index eb0ee4447..d133f54d6 100644 --- a/quda_interface.h +++ b/quda_interface.h @@ -175,10 +175,10 @@ void compute_gauge_derivative_quda(monomial * const mnl, hamiltonian_field_t * c void compute_WFlow_quda(const double eps ,const double tmax, const int traj, FILE* outfile); -double eigsolveQuda(int n, double tol, int blksize, int blkwise, int max_iterations, int maxmin, - const double precision, const int max_iter, const int polydeg, const double amin, - const double amax, const int n_kr, const int solver_flag, const int rel_prec, - const int even_odd_flag, const SloppyPrecision refinement_precision, - SloppyPrecision sloppy_precision, CompressionType compression, const int oneFlavourFlag); +void eigsolveQuda(double * evals, int n_evals, double tol, int blksize, int blkwise, int max_iterations, int maxmin, + const double precision, const int max_iter, const int polydeg, const double amin, + const double amax, const int n_kr, const int solver_flag, const int rel_prec, + const int even_odd_flag, const SloppyPrecision refinement_precision, + SloppyPrecision sloppy_precision, CompressionType compression, const int oneFlavourFlag); #endif /* QUDA_INTERFACE_H_ */ From 0626f45019d79775bfadd4c9ab29ba2723f9d43f Mon Sep 17 00:00:00 2001 From: Bartosz Kostrzewa Date: Wed, 19 Jul 2023 19:38:12 +0200 Subject: [PATCH 23/27] slight modification of verbose readinput output for eigensolver input parameters --- read_input.l | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/read_input.l b/read_input.l index 54fe91b8a..da6143e9a 100644 --- a/read_input.l +++ b/read_input.l @@ -2610,17 +2610,17 @@ static inline double fltlist_next_token(int * const list_end){ {SPC}*EigSolverPolyMin{EQL}{FLT} { sscanf(yytext, " %[a-zA-Z] = %lf", name, &c); mnl->eig_amin = c; - if(myverbose!=0) printf(" min for polynomial acceleration in eigensolver set to %e line %d monomial %d\n", c, line_of_file, current_monomial); + if(myverbose!=0) printf(" Minimum eigenvalue to exclude using polynomial acceleration in eigensolver set to %e line %d monomial %d\n", c, line_of_file, current_monomial); } {SPC}*EigSolverPolyMax{EQL}{FLT} { sscanf(yytext, " %[a-zA-Z] = %lf", name, &c); mnl->eig_amax = c; - if(myverbose!=0) printf(" max for polynomial acceleration in eigensolver set to %e line %d monomial %d\n", c, line_of_file, current_monomial); + if(myverbose!=0) printf(" Maximum eigenvalues to exclude using polynomial acceleration in eigensolver set to %e line %d monomial %d\n", c, line_of_file, current_monomial); } {SPC}*EigSolverPolynomialDegree{EQL}{DIGIT}+ { sscanf(yytext, " %[a-zA-Z] = %d", name, &a); mnl->eig_polydeg = a; - if(myverbose!=0) printf(" degree of polynomial acceleration in eigensolver set to %d line %d monomial %d\n", a, line_of_file, current_monomial); + if(myverbose!=0) printf(" Degree of polynomial acceleration in eigensolver set to %d line %d monomial %d\n", a, line_of_file, current_monomial); } {SPC}*EigSolverKrylovSubspaceSize{EQL}{DIGIT}+ { sscanf(yytext, " %[a-zA-Z] = %d", name, &a); From c5e8a138af09c53c63f04156935f048d84f39880 Mon Sep 17 00:00:00 2001 From: Bartosz Kostrzewa Date: Wed, 19 Jul 2023 19:48:22 +0200 Subject: [PATCH 24/27] the eigenvalues computed by QUDA are complex numbers, of course --- phmc.c | 18 +++++++++--------- quda_interface.c | 2 +- quda_interface.h | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/phmc.c b/phmc.c index 550835513..a0f072e80 100644 --- a/phmc.c +++ b/phmc.c @@ -212,8 +212,8 @@ void phmc_compute_ev(const int trajectory_counter, const int id, matrix_mult_bi Qsq) { double atime, etime; - double eval_min = 0.; - double eval_max = 0.; + _Complex double eval_min = 0.0; + _Complex double eval_max = 0.0; int max_iter_ev, no_eigenvalues; char buf[100]; char * phmcfilename = buf; @@ -285,20 +285,20 @@ void phmc_compute_ev(const int trajectory_counter, if((g_proc_id == 0) && (g_debug_level > 1)) { printf("# %s: lowest eigenvalue end of trajectory %d = %e\n", - mnl->name, trajectory_counter, eval_min); + mnl->name, trajectory_counter, creal(eval_min)); printf("# %s: maximal eigenvalue end of trajectory %d = %e\n", - mnl->name, trajectory_counter, eval_max); + mnl->name, trajectory_counter, creal(eval_max)); } if(g_proc_id == 0) { - if(eval_max > mnl->EVMax) { - fprintf(stderr, "\nWarning: largest eigenvalue for monomial %s: %.6f is larger than upper bound: %.6f\n\n", mnl->name, eval_max, mnl->EVMax); + if(creal(eval_max) > mnl->EVMax) { + fprintf(stderr, "\nWarning: largest eigenvalue for monomial %s: %.6f is larger than upper bound: %.6f\n\n", mnl->name, creal(eval_max), mnl->EVMax); } - if(eval_min < mnl->EVMin) { - fprintf(stderr, "\nWarning: smallest eigenvalue for monomial %s: %.6f is smaller than lower bound: %.6f\n\n", mnl->name, eval_min, mnl->EVMin); + if(creal(eval_min) < mnl->EVMin) { + fprintf(stderr, "\nWarning: smallest eigenvalue for monomial %s: %.6f is smaller than lower bound: %.6f\n\n", mnl->name, creal(eval_min), mnl->EVMin); } countfile = fopen(phmcfilename, "a"); fprintf(countfile, "%.8d %1.5e %1.5e %1.5e %1.5e\n", - trajectory_counter, eval_min, eval_max, mnl->EVMin, mnl->EVMax); + trajectory_counter, creal(eval_min), creal(eval_max), mnl->EVMin, mnl->EVMax); fclose(countfile); } etime = gettime(); diff --git a/quda_interface.c b/quda_interface.c index 0856b695c..2438ffd38 100644 --- a/quda_interface.c +++ b/quda_interface.c @@ -2611,7 +2611,7 @@ Interface function for Eigensolver on Quda *********************************************************/ -void eigsolveQuda(double * evals, int n_evals, double tol, int blksize, int blkwise, int max_iterations, int maxmin, +void eigsolveQuda(_Complex double * evals, int n_evals, double tol, int blksize, int blkwise, int max_iterations, int maxmin, const double precision, const int max_iter, const int polydeg, const double amin, const double amax, const int n_kr, const int solver_flag, const int rel_prec, const int even_odd_flag, const SloppyPrecision refinement_precision, diff --git a/quda_interface.h b/quda_interface.h index d133f54d6..05fc5444a 100644 --- a/quda_interface.h +++ b/quda_interface.h @@ -175,7 +175,7 @@ void compute_gauge_derivative_quda(monomial * const mnl, hamiltonian_field_t * c void compute_WFlow_quda(const double eps ,const double tmax, const int traj, FILE* outfile); -void eigsolveQuda(double * evals, int n_evals, double tol, int blksize, int blkwise, int max_iterations, int maxmin, +void eigsolveQuda(_Complex double * evals, int n_evals, double tol, int blksize, int blkwise, int max_iterations, int maxmin, const double precision, const int max_iter, const int polydeg, const double amin, const double amax, const int n_kr, const int solver_flag, const int rel_prec, const int even_odd_flag, const SloppyPrecision refinement_precision, From 4fe78b707ba986eeb178e8c5ba308cb87f479fe0 Mon Sep 17 00:00:00 2001 From: Bartosz Kostrzewa Date: Wed, 19 Jul 2023 19:48:40 +0200 Subject: [PATCH 25/27] document QUDA eigensolver for the HMC --- doc/quda.tex | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/doc/quda.tex b/doc/quda.tex index 81b7b6b83..b034b1a16 100644 --- a/doc/quda.tex +++ b/doc/quda.tex @@ -444,3 +444,18 @@ \subsubsection{QUDA-MG interface} In other words, if the largest of these smallest eigenvalues is $4\cdot10^{-3}$, for example, then \texttt{MGEigSolverPolyMin} can be set to 0.01. This ensures that the desired (smallest) part of the spectrum is smaller than \texttt{MGEigSolverPolyMin} and that the entire spectrum is contained in the range up to \texttt{MGEigSolverPolyMax}. After this, polynomial acceleration can be enabled, which should reduce setup time significantly. + +\subsubsection{Using the QUDA eigensolver in the HMC} + +When employing the rational approximation, in order to make sure that the eigenvalue bounds are chosen appropriately, it is necessary to measure the maximal and minimal eigenvalues of the operator involved in the given monomial. +For the monomials \texttt{NDRAT, NDRATCOR, NDCLOVERRAT} and \texttt{NDCLOVERRATCOR}, this can be done using QUDA's eigensolver when, in addition to a non-zero setting for \texttt{ComputeEVFreq}, \texttt{UseExternalEigSolver = quda} is set. + +The eigensolver further offers the following parameters: +\begin{itemize} + \item{ \texttt{EigSolverPolynomialDegree}: Once appropriate parameters for the polynomial filter have been determined (see \texttt{EigSolverPolyMin} and \texttt{EigSolverPolyMax} below), when \texttt{EigSolverPolynomialDegree} is set to a non-zero value, polynomial acceleration will be used in the measurent of the smallest eigenvalue. (integer, default: \texttt{128}) } + \item{ \texttt{EigSolverPolyMin}: Smallest eigenvalue to be excluded by the polynomial filter when polynomial acceleration is used. A good value for this should be determined by first running the eigensolver without acceleration (\texttt{EigSolverPolynomialDegree = 0}). \texttt{EigSolverPolyMin} should then be set to about $3\lambda_\mathrm{min}$. Note that this is specified in the operator normalisation, such that $\lambda_\mathrm{min}$ obtained from the measurement should be multiplied by \texttt{StildeMax} to get an appropriate value for \texttt{EigSolverPolyMin}. (positive real number, default: \texttt{0.001})} + \item{ \texttt{EigSolverPolyMax}: Largest eigenvalue to be excluded by the polynomial filter when polynomial acceleration is used. This should be set to a value in excess of the measured largest eigenvalue, $1.5\lambda_\mathrm{max}$, say. Note that this is specified in the operator normalisation such that the measured $\lambda_\mathrm{max}$ should be multiplied by \texttt{StildeMax} to obtain an appropriate value for \texttt{EigSolverPolyMax}. (positive real number, defaullt: \texttt{4.0})} + \item{ \texttt{EigSolverKrylovSubspaceSize}: Size of the Krylov space used for the determination of the smallest and largest eigenvalues. The default seems to work well even for large lattices. (integer, default: \texttt{96})} +\end{itemize} + + From a46c4c654b3275ecf06f73448b2b0612d3375c9d Mon Sep 17 00:00:00 2001 From: Aniket Sen Date: Wed, 2 Aug 2023 13:28:44 +0200 Subject: [PATCH 26/27] script for testing eigsolveQuda --- test_eigsolveQuda.c | 558 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 558 insertions(+) create mode 100644 test_eigsolveQuda.c diff --git a/test_eigsolveQuda.c b/test_eigsolveQuda.c new file mode 100644 index 000000000..e7baf3090 --- /dev/null +++ b/test_eigsolveQuda.c @@ -0,0 +1,558 @@ +#include "lime.h" +#if HAVE_CONFIG_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef TM_USE_MPI +#include +#endif +#ifdef TM_USE_OMP +#include +#endif +#include "global.h" +#include "git_hash.h" +#include "io/params.h" +#include "io/gauge.h" +#include "getopt.h" +#include "ranlxd.h" +#include "geometry_eo.h" +#include "start.h" +#include "measure_gauge_action.h" +#include "measure_rectangles.h" +#ifdef TM_USE_MPI +#include "xchange/xchange.h" +#endif +#include "read_input.h" +#include "mpi_init.h" +#include "sighandler.h" +#include "update_tm.h" +#include "init/init.h" +#include "test/check_geometry.h" +#include "boundary.h" +#include "phmc.h" +#include "solver/solver.h" +#include "monomial/monomial.h" +#include "integrator.h" +#include "sighandler.h" +#include "meas/measurements.h" +#include "operator/tm_operators_nd.h" +#ifdef DDalphaAMG +#include "DDalphaAMG_interface.h" +#endif +#ifdef TM_USE_QUDA +# include "quda_interface.h" +#endif + +extern int nstore; + +int const rlxdsize = 105; + +static void usage(const tm_ExitCode_t exit_code); +static void process_args(int argc, char *argv[], char ** input_filename, char ** filename); +static void set_default_filenames(char ** input_filename, char ** filename); + +int main(int argc,char *argv[]) { + + FILE *parameterfile=NULL, *countfile=NULL; + char *filename = NULL; + char datafilename[206]; + char parameterfilename[206]; + char gauge_filename[50]; + char nstore_filename[50]; + char tmp_filename[50]; + char *input_filename = NULL; + int status = 0, accept = 0; + int j,ix,mu, trajectory_counter=0; + unsigned int const io_max_attempts = 5; /* Make this configurable? */ + unsigned int const io_timeout = 5; /* Make this configurable? */ + struct timeval t1; + + _Complex double eval_min = 0.0; + _Complex double eval_max = 0.0; + + /* Energy corresponding to the Gauge part */ + double plaquette_energy = 0., rectangle_energy = 0.; + /* Acceptance rate */ + int Rate=0; + /* Do we want to perform reversibility checks */ + /* See also return_check_flag in read_input.h */ + int return_check = 0; + + paramsXlfInfo *xlfInfo; + +/* For online measurements */ + measurement * meas; + int imeas; + + init_critical_globals(TM_PROGRAM_HMC_TM); + +#ifdef _KOJAK_INST +#pragma pomp inst init +#pragma pomp inst begin(main) +#endif + + #if (defined SSE || defined SSE2 || SSE3) + signal(SIGILL,&catch_ill_inst); +#endif + + strcpy(gauge_filename,"conf.save"); + strcpy(nstore_filename,"nstore_counter"); + strcpy(tmp_filename, ".conf.tmp"); + + verbose = 1; + g_use_clover_flag = 0; + + process_args(argc,argv,&input_filename,&filename); + set_default_filenames(&input_filename,&filename); + + init_parallel_and_read_input(argc, argv, input_filename); + + DUM_DERI = 4; + DUM_MATRIX = DUM_DERI+7; + if(g_running_phmc) { + NO_OF_SPINORFIELDS = DUM_MATRIX+8; + } + else { + NO_OF_SPINORFIELDS = DUM_MATRIX+6; + } + DUM_BI_DERI = 6; + DUM_BI_SOLVER = DUM_BI_DERI+7; + + DUM_BI_MATRIX = DUM_BI_SOLVER+6; + NO_OF_BISPINORFIELDS = DUM_BI_MATRIX+6; + + //4 extra fields (corresponding to DUM_MATRIX+0..5) for deg. and ND matrix mult. + NO_OF_SPINORFIELDS_32 = 6; + + tmlqcd_mpi_init(argc, argv); + tm_stopwatch_push(&g_timers, "HMC", ""); + + if(nstore == -1) { + countfile = fopen(nstore_filename, "r"); + if(countfile != NULL) { + j = fscanf(countfile, "%d %d %s\n", &nstore, &trajectory_counter, gauge_input_filename); + if(j < 1) nstore = 0; + if(j < 2) trajectory_counter = 0; + fclose(countfile); + } + else { + nstore = 0; + trajectory_counter = 0; + } + } + +#ifndef TM_USE_MPI + g_dbw2rand = 0; +#endif + + + g_mu = g_mu1; + +#ifdef _GAUGE_COPY + status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); + status += init_gauge_field_32(VOLUMEPLUSRAND + g_dbw2rand, 1); +#else + status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); + status += init_gauge_field_32(VOLUMEPLUSRAND + g_dbw2rand, 0); +#endif + /* need temporary gauge field for gauge reread checks and in update_tm */ + status += init_gauge_tmp(VOLUME); + + status += init_gauge_fg(VOLUME); + + if (status != 0) { + fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n"); + exit(0); + } + j = init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); + if (j != 0) { + fprintf(stderr, "Not enough memory for geometry_indices! Aborting...\n"); + exit(0); + } + if(even_odd_flag) { + j = init_spinor_field(VOLUMEPLUSRAND/2, NO_OF_SPINORFIELDS); + j += init_spinor_field_32(VOLUMEPLUSRAND/2, NO_OF_SPINORFIELDS_32); + } + else { + j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS); + j += init_spinor_field_32(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS_32); + } + if (j != 0) { + fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); + exit(0); + } + if(even_odd_flag) { + j = init_csg_field(VOLUMEPLUSRAND/2); + } + else { + j = init_csg_field(VOLUMEPLUSRAND); + } + if (j != 0) { + fprintf(stderr, "Not enough memory for csg fields! Aborting...\n"); + exit(0); + } + j = init_moment_field(VOLUME, VOLUMEPLUSRAND + g_dbw2rand); + if (j != 0) { + fprintf(stderr, "Not enough memory for moment fields! Aborting...\n"); + exit(0); + } + + if(g_running_phmc) { + j = init_bispinor_field(VOLUME/2, NO_OF_BISPINORFIELDS); + if (j!= 0) { + fprintf(stderr, "Not enough memory for bi-spinor fields! Aborting...\n"); + exit(0); + } + } + + /* list and initialize measurements*/ + if(g_proc_id == 0) { + printf("\n"); + for(j = 0; j < no_measurements; j++) { + printf("# measurement id %d, type = %d: Frequency %d\n", j, measurement_list[j].type, measurement_list[j].freq); + } + } + init_measurements(); + + /*construct the filenames for the observables and the parameters*/ + strncpy(datafilename,filename,200); + strcat(datafilename,".data"); + strncpy(parameterfilename,filename,200); + strcat(parameterfilename,".para"); + + if(g_proc_id == 0){ + parameterfile = fopen(parameterfilename, "a"); + write_first_messages(parameterfile, "hmc", git_hash); + } + + /* define the geometry */ + geometry(); + + /* define the boundary conditions for the fermion fields */ + boundary(g_kappa); + + status = check_geometry(); + + if (status != 0) { + fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); + exit(1); + } + + +#ifdef _USE_HALFSPINOR + j = init_dirac_halfspinor(); + if (j!= 0) { + fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); + exit(-1); + } + + j = init_dirac_halfspinor32(); + if (j != 0) + { + fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n"); + exit(-1); + } + +# if (defined _PERSISTENT) + init_xchange_halffield(); +# endif +#endif + + /* Initialise random number generator */ + start_ranlux(rlxd_level, random_seed^trajectory_counter); + + /* Set up the gauge field */ + /* continue and restart */ + if(startoption==3 || startoption == 2) { + if(g_proc_id == 0) { + printf("# Trying to read gauge field from file %s in %s precision.\n", + gauge_input_filename, (gauge_precision_read_flag == 32 ? "single" : "double")); + fflush(stdout); + } + if( (status = read_gauge_field(gauge_input_filename,g_gauge_field)) != 0) { + fprintf(stderr, "Error %d while reading gauge field from %s\nAborting...\n", status, gauge_input_filename); + exit(-2); + } + + if (g_proc_id == 0){ + printf("# Finished reading gauge field.\n"); + fflush(stdout); + } + } + else if (startoption == 1) { + /* hot */ + random_gauge_field(reproduce_randomnumber_flag, g_gauge_field); + } + else if(startoption == 0) { + /* cold */ + unit_g_gauge_field(); + } + + /*For parallelization: exchange the gaugefield */ +#ifdef TM_USE_MPI + xchange_gauge(g_gauge_field); + update_tm_gauge_exchange(&g_gauge_state); +#endif + + /*Convert to a 32 bit gauge field, after xchange*/ + convert_32_gauge_field(g_gauge_field_32, g_gauge_field, VOLUMEPLUSRAND + g_dbw2rand); +#ifdef TM_USE_MPI + update_tm_gauge_exchange(&g_gauge_state_32); +#endif + + + if(even_odd_flag) { + j = init_monomials(VOLUMEPLUSRAND/2, even_odd_flag); + } + else { + j = init_monomials(VOLUMEPLUSRAND, even_odd_flag); + } + if (j != 0) { + fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n"); + exit(0); + } + + init_integrator(); + + if(g_proc_id == 0) { + for(j = 0; j < no_monomials; j++) { + printf("# monomial id %d type = %d timescale %d\n", j, monomial_list[j].type, monomial_list[j].timescale); + } + } + + plaquette_energy = measure_plaquette( (const su3**) g_gauge_field); + if(g_rgi_C1 > 0. || g_rgi_C1 < 0.) { + rectangle_energy = measure_rectangles( (const su3**) g_gauge_field); + if(g_proc_id == 0){ + fprintf(parameterfile,"# Computed rectangle value: %14.12f.\n",rectangle_energy/(12.*VOLUME*g_nproc)); + } + } + //eneg = g_rgi_C0 * plaquette_energy + g_rgi_C1 * rectangle_energy; + + if(g_proc_id == 0) { + fprintf(parameterfile,"# Computed plaquette value: %14.12f.\n", plaquette_energy/(6.*VOLUME*g_nproc)); + printf("# Computed plaquette value: %14.12f.\n", plaquette_energy/(6.*VOLUME*g_nproc)); + fclose(parameterfile); + } + + /* set ddummy to zero */ + for(ix = 0; ix < VOLUMEPLUSRAND; ix++){ + for(mu=0; mu<4; mu++){ + ddummy[ix][mu].d1=0.; + ddummy[ix][mu].d2=0.; + ddummy[ix][mu].d3=0.; + ddummy[ix][mu].d4=0.; + ddummy[ix][mu].d5=0.; + ddummy[ix][mu].d6=0.; + ddummy[ix][mu].d7=0.; + ddummy[ix][mu].d8=0.; + } + } + + + for(j = 0; j < no_monomials; j++) { + if( (monomial_list[j].type == NDPOLY) || (monomial_list[j].type == NDDETRATIO) + || (monomial_list[j].type == NDCLOVER) || (monomial_list[j].type == NDRAT) + || (monomial_list[j].type == NDCLOVERRAT) || (monomial_list[j].type == NDRATCOR) + || (monomial_list[j].type == NDCLOVERRATCOR) || (monomial_list[j].type == NDCLOVERDETRATIO) ) { + if( (monomial_list[j].rec_ev != 0) ) { + monomial * mnl = &monomial_list[j]; +#ifdef TM_USE_QUDA + eigsolveQuda(&eval_max, 1, eigenvalue_precision, 1, 0, 1000, 1, + mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, + mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, + 1, // we only support even-odd here + mnl->solver_params.refinement_precision, + mnl->solver_params.sloppy_precision, + mnl->solver_params.compression_type, 0); + if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { + eval_max /= mnl->StildeMax; + } + if(g_proc_id == 0){ + printf("monomial name: %s , id: %d, maximal eigenvalue = %e\n",mnl->name,j,creal(eval_max)); + } + eigsolveQuda(&eval_min, 1, eigenvalue_precision, 1, 0, 1000, 0, + mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, + mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, + 1, // we only support even-odd here + mnl->solver_params.refinement_precision, + mnl->solver_params.sloppy_precision, + mnl->solver_params.compression_type, 0); + if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { + eval_min /= mnl->StildeMax; + } + if(g_proc_id == 0){ + printf("monomial name: %s , id: %d, lowest eigenvalue = %e\n",mnl->name,j,creal(eval_min)); + } +#else + if(g_proc_id == 0) { + fprintf(stderr, "Error: Attempted to use QUDA eigensolver but this build was not configured for QUDA usage.\n"); + #ifdef TM_USE_MPI + MPI_Finalize(); + #endif + exit(-2); + } +#endif + } + }else if( (monomial_list[j].type == CLOVERTRLOG) || (monomial_list[j].type == CLOVERDET) + || (monomial_list[j].type == CLOVERDETRATIO) || (monomial_list[j].type == CLOVERNDTRLOG) + || (monomial_list[j].type == CLOVERRAT) || (monomial_list[j].type == CLOVERRATCOR) + || (monomial_list[j].type == CLOVERDETRATIORW) || (monomial_list[j].type == POLY) + || (monomial_list[j].type == POLYDETRATIO) || (monomial_list[j].type == RAT) + || (monomial_list[j].type == RATCOR) ) { + if( (monomial_list[j].rec_ev != 0) ) { + monomial * mnl = &monomial_list[j]; +#ifdef TM_USE_QUDA + eigsolveQuda(&eval_max, 1, eigenvalue_precision, 1, 0, 1000, 1, + mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, + mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, + 1, // we only support even-odd here + mnl->solver_params.refinement_precision, + mnl->solver_params.sloppy_precision, + mnl->solver_params.compression_type, 1); + if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { + eval_max /= mnl->StildeMax; + } + if(g_proc_id == 0){ + printf("monomial name: %s , id: %d, maximal eigenvalue = %e\n",mnl->name,j,creal(eval_max)); + } + eigsolveQuda(&eval_min, 1, eigenvalue_precision, 1, 0, 1000, 0, + mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, + mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, + 1, // we only support even-odd here + mnl->solver_params.refinement_precision, + mnl->solver_params.sloppy_precision, + mnl->solver_params.compression_type, 1); + if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { + eval_min /= mnl->StildeMax; + } + if(g_proc_id == 0){ + printf("monomial name: %s , id: %d, lowest eigenvalue = %e\n",mnl->name,j,creal(eval_min)); + } +#else + if(g_proc_id == 0) { + fprintf(stderr, "Error: Attempted to use QUDA eigensolver but this build was not configured for QUDA usage.\n"); + #ifdef TM_USE_MPI + MPI_Finalize(); + #endif + exit(-2); + } +#endif + } + } + } + + #ifdef TM_USE_OMP + free_omp_accumulators(); +#endif + free_gauge_tmp(); + free_gauge_field(); + free_gauge_field_32(); + free_geometry_indices(); + free_spinor_field(); + free_spinor_field_32(); + free_moment_field(); + free_monomials(); + if(g_running_phmc) { + free_bispinor_field(); + free_chi_spinor_field(); + } + free(input_filename); + free(filename); + free(SourceInfo.basename); + free(PropInfo.basename); + + tm_stopwatch_pop(&g_timers, 0, 1, ""); + +#ifdef TM_USE_QUDA + _endQuda(); +#endif +#ifdef TM_USE_MPI + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); +#endif + + return(0); +#ifdef _KOJAK_INST +#pragma pomp inst end(main) +#endif +} + +static void usage(const tm_ExitCode_t exit_code){ + if(g_proc_id == 0){ + fprintf(stdout, "QUDA eigensolver for finding largest and lowest eigenvalues\n"); + fprintf(stdout, "Use exactly same input as hmc_tm\n"); + fprintf(stdout, "Set `ComputeEVFreq` to non-zero for the operators for which eigenvalues need to calculated\n"); + fprintf(stdout, "Version %s \n\n", TMLQCD_PACKAGE_VERSION); + fprintf(stdout, "Please send bug reports to %s\n", TMLQCD_PACKAGE_BUGREPORT); + fprintf(stdout, "Usage: hmc_tm [options]\n"); + fprintf(stdout, "Options: [-f input-filename] default: hmc.input\n"); + fprintf(stdout, " [-o output-filename] default: output\n"); + fprintf(stdout, " [-v] more verbosity\n"); + fprintf(stdout, " [-V] print version information and exit\n"); + fprintf(stdout, " [-m level] request MPI thread level 'single' or 'multiple' (default: 'single')\n"); + fprintf(stdout, " [-h|-? this help]\n"); + } + exit(exit_code); +} + +static void process_args(int argc, char *argv[], char ** input_filename, char ** filename) { + int c; + while ((c = getopt(argc, argv, "h?vVf:o:m:")) != -1) { + switch (c) { + case 'f': + *input_filename = calloc(200, sizeof(char)); + strncpy(*input_filename, optarg, 200); + break; + case 'o': + *filename = calloc(200, sizeof(char)); + strncpy(*filename, optarg, 200); + break; + case 'v': + verbose = 1; + break; + case 'V': + if(g_proc_id == 0) { + fprintf(stdout,"%s %s\n",TMLQCD_PACKAGE_STRING,git_hash); + } + exit(TM_EXIT_SUCCESS); + break; + case 'm': + if( !strcmp(optarg, "single") ){ + g_mpi_thread_level = TM_MPI_THREAD_SINGLE; + } else if ( !strcmp(optarg, "multiple") ) { + g_mpi_thread_level = TM_MPI_THREAD_MULTIPLE; + } else { + tm_debug_printf(0, 0, "[hmc_tm process_args]: invalid input for -m command line argument\n"); + usage(TM_EXIT_INVALID_CMDLINE_ARG); + } + break; + case 'h': + case '?': + default: + usage(TM_EXIT_SUCCESS); + break; + } + } +} + +static void set_default_filenames(char ** input_filename, char ** filename) { + if( *input_filename == NULL ) { + *input_filename = calloc(13, sizeof(char)); + strcpy(*input_filename,"hmc.input"); + } + + if( *filename == NULL ) { + *filename = calloc(7, sizeof(char)); + strcpy(*filename,"output"); + } +} + From 3bb66397238102f43b4cd58eba895eee91c6bd40 Mon Sep 17 00:00:00 2001 From: Bartosz Kostrzewa Date: Wed, 4 Oct 2023 13:02:32 +0200 Subject: [PATCH 27/27] Revert "script for testing eigsolveQuda" This reverts commit a46c4c654b3275ecf06f73448b2b0612d3375c9d. --- test_eigsolveQuda.c | 558 -------------------------------------------- 1 file changed, 558 deletions(-) delete mode 100644 test_eigsolveQuda.c diff --git a/test_eigsolveQuda.c b/test_eigsolveQuda.c deleted file mode 100644 index e7baf3090..000000000 --- a/test_eigsolveQuda.c +++ /dev/null @@ -1,558 +0,0 @@ -#include "lime.h" -#if HAVE_CONFIG_H -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef TM_USE_MPI -#include -#endif -#ifdef TM_USE_OMP -#include -#endif -#include "global.h" -#include "git_hash.h" -#include "io/params.h" -#include "io/gauge.h" -#include "getopt.h" -#include "ranlxd.h" -#include "geometry_eo.h" -#include "start.h" -#include "measure_gauge_action.h" -#include "measure_rectangles.h" -#ifdef TM_USE_MPI -#include "xchange/xchange.h" -#endif -#include "read_input.h" -#include "mpi_init.h" -#include "sighandler.h" -#include "update_tm.h" -#include "init/init.h" -#include "test/check_geometry.h" -#include "boundary.h" -#include "phmc.h" -#include "solver/solver.h" -#include "monomial/monomial.h" -#include "integrator.h" -#include "sighandler.h" -#include "meas/measurements.h" -#include "operator/tm_operators_nd.h" -#ifdef DDalphaAMG -#include "DDalphaAMG_interface.h" -#endif -#ifdef TM_USE_QUDA -# include "quda_interface.h" -#endif - -extern int nstore; - -int const rlxdsize = 105; - -static void usage(const tm_ExitCode_t exit_code); -static void process_args(int argc, char *argv[], char ** input_filename, char ** filename); -static void set_default_filenames(char ** input_filename, char ** filename); - -int main(int argc,char *argv[]) { - - FILE *parameterfile=NULL, *countfile=NULL; - char *filename = NULL; - char datafilename[206]; - char parameterfilename[206]; - char gauge_filename[50]; - char nstore_filename[50]; - char tmp_filename[50]; - char *input_filename = NULL; - int status = 0, accept = 0; - int j,ix,mu, trajectory_counter=0; - unsigned int const io_max_attempts = 5; /* Make this configurable? */ - unsigned int const io_timeout = 5; /* Make this configurable? */ - struct timeval t1; - - _Complex double eval_min = 0.0; - _Complex double eval_max = 0.0; - - /* Energy corresponding to the Gauge part */ - double plaquette_energy = 0., rectangle_energy = 0.; - /* Acceptance rate */ - int Rate=0; - /* Do we want to perform reversibility checks */ - /* See also return_check_flag in read_input.h */ - int return_check = 0; - - paramsXlfInfo *xlfInfo; - -/* For online measurements */ - measurement * meas; - int imeas; - - init_critical_globals(TM_PROGRAM_HMC_TM); - -#ifdef _KOJAK_INST -#pragma pomp inst init -#pragma pomp inst begin(main) -#endif - - #if (defined SSE || defined SSE2 || SSE3) - signal(SIGILL,&catch_ill_inst); -#endif - - strcpy(gauge_filename,"conf.save"); - strcpy(nstore_filename,"nstore_counter"); - strcpy(tmp_filename, ".conf.tmp"); - - verbose = 1; - g_use_clover_flag = 0; - - process_args(argc,argv,&input_filename,&filename); - set_default_filenames(&input_filename,&filename); - - init_parallel_and_read_input(argc, argv, input_filename); - - DUM_DERI = 4; - DUM_MATRIX = DUM_DERI+7; - if(g_running_phmc) { - NO_OF_SPINORFIELDS = DUM_MATRIX+8; - } - else { - NO_OF_SPINORFIELDS = DUM_MATRIX+6; - } - DUM_BI_DERI = 6; - DUM_BI_SOLVER = DUM_BI_DERI+7; - - DUM_BI_MATRIX = DUM_BI_SOLVER+6; - NO_OF_BISPINORFIELDS = DUM_BI_MATRIX+6; - - //4 extra fields (corresponding to DUM_MATRIX+0..5) for deg. and ND matrix mult. - NO_OF_SPINORFIELDS_32 = 6; - - tmlqcd_mpi_init(argc, argv); - tm_stopwatch_push(&g_timers, "HMC", ""); - - if(nstore == -1) { - countfile = fopen(nstore_filename, "r"); - if(countfile != NULL) { - j = fscanf(countfile, "%d %d %s\n", &nstore, &trajectory_counter, gauge_input_filename); - if(j < 1) nstore = 0; - if(j < 2) trajectory_counter = 0; - fclose(countfile); - } - else { - nstore = 0; - trajectory_counter = 0; - } - } - -#ifndef TM_USE_MPI - g_dbw2rand = 0; -#endif - - - g_mu = g_mu1; - -#ifdef _GAUGE_COPY - status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); - status += init_gauge_field_32(VOLUMEPLUSRAND + g_dbw2rand, 1); -#else - status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); - status += init_gauge_field_32(VOLUMEPLUSRAND + g_dbw2rand, 0); -#endif - /* need temporary gauge field for gauge reread checks and in update_tm */ - status += init_gauge_tmp(VOLUME); - - status += init_gauge_fg(VOLUME); - - if (status != 0) { - fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n"); - exit(0); - } - j = init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); - if (j != 0) { - fprintf(stderr, "Not enough memory for geometry_indices! Aborting...\n"); - exit(0); - } - if(even_odd_flag) { - j = init_spinor_field(VOLUMEPLUSRAND/2, NO_OF_SPINORFIELDS); - j += init_spinor_field_32(VOLUMEPLUSRAND/2, NO_OF_SPINORFIELDS_32); - } - else { - j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS); - j += init_spinor_field_32(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS_32); - } - if (j != 0) { - fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); - exit(0); - } - if(even_odd_flag) { - j = init_csg_field(VOLUMEPLUSRAND/2); - } - else { - j = init_csg_field(VOLUMEPLUSRAND); - } - if (j != 0) { - fprintf(stderr, "Not enough memory for csg fields! Aborting...\n"); - exit(0); - } - j = init_moment_field(VOLUME, VOLUMEPLUSRAND + g_dbw2rand); - if (j != 0) { - fprintf(stderr, "Not enough memory for moment fields! Aborting...\n"); - exit(0); - } - - if(g_running_phmc) { - j = init_bispinor_field(VOLUME/2, NO_OF_BISPINORFIELDS); - if (j!= 0) { - fprintf(stderr, "Not enough memory for bi-spinor fields! Aborting...\n"); - exit(0); - } - } - - /* list and initialize measurements*/ - if(g_proc_id == 0) { - printf("\n"); - for(j = 0; j < no_measurements; j++) { - printf("# measurement id %d, type = %d: Frequency %d\n", j, measurement_list[j].type, measurement_list[j].freq); - } - } - init_measurements(); - - /*construct the filenames for the observables and the parameters*/ - strncpy(datafilename,filename,200); - strcat(datafilename,".data"); - strncpy(parameterfilename,filename,200); - strcat(parameterfilename,".para"); - - if(g_proc_id == 0){ - parameterfile = fopen(parameterfilename, "a"); - write_first_messages(parameterfile, "hmc", git_hash); - } - - /* define the geometry */ - geometry(); - - /* define the boundary conditions for the fermion fields */ - boundary(g_kappa); - - status = check_geometry(); - - if (status != 0) { - fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); - exit(1); - } - - -#ifdef _USE_HALFSPINOR - j = init_dirac_halfspinor(); - if (j!= 0) { - fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); - exit(-1); - } - - j = init_dirac_halfspinor32(); - if (j != 0) - { - fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n"); - exit(-1); - } - -# if (defined _PERSISTENT) - init_xchange_halffield(); -# endif -#endif - - /* Initialise random number generator */ - start_ranlux(rlxd_level, random_seed^trajectory_counter); - - /* Set up the gauge field */ - /* continue and restart */ - if(startoption==3 || startoption == 2) { - if(g_proc_id == 0) { - printf("# Trying to read gauge field from file %s in %s precision.\n", - gauge_input_filename, (gauge_precision_read_flag == 32 ? "single" : "double")); - fflush(stdout); - } - if( (status = read_gauge_field(gauge_input_filename,g_gauge_field)) != 0) { - fprintf(stderr, "Error %d while reading gauge field from %s\nAborting...\n", status, gauge_input_filename); - exit(-2); - } - - if (g_proc_id == 0){ - printf("# Finished reading gauge field.\n"); - fflush(stdout); - } - } - else if (startoption == 1) { - /* hot */ - random_gauge_field(reproduce_randomnumber_flag, g_gauge_field); - } - else if(startoption == 0) { - /* cold */ - unit_g_gauge_field(); - } - - /*For parallelization: exchange the gaugefield */ -#ifdef TM_USE_MPI - xchange_gauge(g_gauge_field); - update_tm_gauge_exchange(&g_gauge_state); -#endif - - /*Convert to a 32 bit gauge field, after xchange*/ - convert_32_gauge_field(g_gauge_field_32, g_gauge_field, VOLUMEPLUSRAND + g_dbw2rand); -#ifdef TM_USE_MPI - update_tm_gauge_exchange(&g_gauge_state_32); -#endif - - - if(even_odd_flag) { - j = init_monomials(VOLUMEPLUSRAND/2, even_odd_flag); - } - else { - j = init_monomials(VOLUMEPLUSRAND, even_odd_flag); - } - if (j != 0) { - fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n"); - exit(0); - } - - init_integrator(); - - if(g_proc_id == 0) { - for(j = 0; j < no_monomials; j++) { - printf("# monomial id %d type = %d timescale %d\n", j, monomial_list[j].type, monomial_list[j].timescale); - } - } - - plaquette_energy = measure_plaquette( (const su3**) g_gauge_field); - if(g_rgi_C1 > 0. || g_rgi_C1 < 0.) { - rectangle_energy = measure_rectangles( (const su3**) g_gauge_field); - if(g_proc_id == 0){ - fprintf(parameterfile,"# Computed rectangle value: %14.12f.\n",rectangle_energy/(12.*VOLUME*g_nproc)); - } - } - //eneg = g_rgi_C0 * plaquette_energy + g_rgi_C1 * rectangle_energy; - - if(g_proc_id == 0) { - fprintf(parameterfile,"# Computed plaquette value: %14.12f.\n", plaquette_energy/(6.*VOLUME*g_nproc)); - printf("# Computed plaquette value: %14.12f.\n", plaquette_energy/(6.*VOLUME*g_nproc)); - fclose(parameterfile); - } - - /* set ddummy to zero */ - for(ix = 0; ix < VOLUMEPLUSRAND; ix++){ - for(mu=0; mu<4; mu++){ - ddummy[ix][mu].d1=0.; - ddummy[ix][mu].d2=0.; - ddummy[ix][mu].d3=0.; - ddummy[ix][mu].d4=0.; - ddummy[ix][mu].d5=0.; - ddummy[ix][mu].d6=0.; - ddummy[ix][mu].d7=0.; - ddummy[ix][mu].d8=0.; - } - } - - - for(j = 0; j < no_monomials; j++) { - if( (monomial_list[j].type == NDPOLY) || (monomial_list[j].type == NDDETRATIO) - || (monomial_list[j].type == NDCLOVER) || (monomial_list[j].type == NDRAT) - || (monomial_list[j].type == NDCLOVERRAT) || (monomial_list[j].type == NDRATCOR) - || (monomial_list[j].type == NDCLOVERRATCOR) || (monomial_list[j].type == NDCLOVERDETRATIO) ) { - if( (monomial_list[j].rec_ev != 0) ) { - monomial * mnl = &monomial_list[j]; -#ifdef TM_USE_QUDA - eigsolveQuda(&eval_max, 1, eigenvalue_precision, 1, 0, 1000, 1, - mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, - mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, - 1, // we only support even-odd here - mnl->solver_params.refinement_precision, - mnl->solver_params.sloppy_precision, - mnl->solver_params.compression_type, 0); - if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { - eval_max /= mnl->StildeMax; - } - if(g_proc_id == 0){ - printf("monomial name: %s , id: %d, maximal eigenvalue = %e\n",mnl->name,j,creal(eval_max)); - } - eigsolveQuda(&eval_min, 1, eigenvalue_precision, 1, 0, 1000, 0, - mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, - mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, - 1, // we only support even-odd here - mnl->solver_params.refinement_precision, - mnl->solver_params.sloppy_precision, - mnl->solver_params.compression_type, 0); - if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { - eval_min /= mnl->StildeMax; - } - if(g_proc_id == 0){ - printf("monomial name: %s , id: %d, lowest eigenvalue = %e\n",mnl->name,j,creal(eval_min)); - } -#else - if(g_proc_id == 0) { - fprintf(stderr, "Error: Attempted to use QUDA eigensolver but this build was not configured for QUDA usage.\n"); - #ifdef TM_USE_MPI - MPI_Finalize(); - #endif - exit(-2); - } -#endif - } - }else if( (monomial_list[j].type == CLOVERTRLOG) || (monomial_list[j].type == CLOVERDET) - || (monomial_list[j].type == CLOVERDETRATIO) || (monomial_list[j].type == CLOVERNDTRLOG) - || (monomial_list[j].type == CLOVERRAT) || (monomial_list[j].type == CLOVERRATCOR) - || (monomial_list[j].type == CLOVERDETRATIORW) || (monomial_list[j].type == POLY) - || (monomial_list[j].type == POLYDETRATIO) || (monomial_list[j].type == RAT) - || (monomial_list[j].type == RATCOR) ) { - if( (monomial_list[j].rec_ev != 0) ) { - monomial * mnl = &monomial_list[j]; -#ifdef TM_USE_QUDA - eigsolveQuda(&eval_max, 1, eigenvalue_precision, 1, 0, 1000, 1, - mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, - mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, - 1, // we only support even-odd here - mnl->solver_params.refinement_precision, - mnl->solver_params.sloppy_precision, - mnl->solver_params.compression_type, 1); - if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { - eval_max /= mnl->StildeMax; - } - if(g_proc_id == 0){ - printf("monomial name: %s , id: %d, maximal eigenvalue = %e\n",mnl->name,j,creal(eval_max)); - } - eigsolveQuda(&eval_min, 1, eigenvalue_precision, 1, 0, 1000, 0, - mnl->accprec, mnl->maxiter, mnl->eig_polydeg, mnl->eig_amin, - mnl->eig_amax, mnl->eig_n_kr, mnl->solver, g_relative_precision_flag, - 1, // we only support even-odd here - mnl->solver_params.refinement_precision, - mnl->solver_params.sloppy_precision, - mnl->solver_params.compression_type, 1); - if( fabs(mnl->EVMax - 1.) < 2*DBL_EPSILON ) { - eval_min /= mnl->StildeMax; - } - if(g_proc_id == 0){ - printf("monomial name: %s , id: %d, lowest eigenvalue = %e\n",mnl->name,j,creal(eval_min)); - } -#else - if(g_proc_id == 0) { - fprintf(stderr, "Error: Attempted to use QUDA eigensolver but this build was not configured for QUDA usage.\n"); - #ifdef TM_USE_MPI - MPI_Finalize(); - #endif - exit(-2); - } -#endif - } - } - } - - #ifdef TM_USE_OMP - free_omp_accumulators(); -#endif - free_gauge_tmp(); - free_gauge_field(); - free_gauge_field_32(); - free_geometry_indices(); - free_spinor_field(); - free_spinor_field_32(); - free_moment_field(); - free_monomials(); - if(g_running_phmc) { - free_bispinor_field(); - free_chi_spinor_field(); - } - free(input_filename); - free(filename); - free(SourceInfo.basename); - free(PropInfo.basename); - - tm_stopwatch_pop(&g_timers, 0, 1, ""); - -#ifdef TM_USE_QUDA - _endQuda(); -#endif -#ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); -#endif - - return(0); -#ifdef _KOJAK_INST -#pragma pomp inst end(main) -#endif -} - -static void usage(const tm_ExitCode_t exit_code){ - if(g_proc_id == 0){ - fprintf(stdout, "QUDA eigensolver for finding largest and lowest eigenvalues\n"); - fprintf(stdout, "Use exactly same input as hmc_tm\n"); - fprintf(stdout, "Set `ComputeEVFreq` to non-zero for the operators for which eigenvalues need to calculated\n"); - fprintf(stdout, "Version %s \n\n", TMLQCD_PACKAGE_VERSION); - fprintf(stdout, "Please send bug reports to %s\n", TMLQCD_PACKAGE_BUGREPORT); - fprintf(stdout, "Usage: hmc_tm [options]\n"); - fprintf(stdout, "Options: [-f input-filename] default: hmc.input\n"); - fprintf(stdout, " [-o output-filename] default: output\n"); - fprintf(stdout, " [-v] more verbosity\n"); - fprintf(stdout, " [-V] print version information and exit\n"); - fprintf(stdout, " [-m level] request MPI thread level 'single' or 'multiple' (default: 'single')\n"); - fprintf(stdout, " [-h|-? this help]\n"); - } - exit(exit_code); -} - -static void process_args(int argc, char *argv[], char ** input_filename, char ** filename) { - int c; - while ((c = getopt(argc, argv, "h?vVf:o:m:")) != -1) { - switch (c) { - case 'f': - *input_filename = calloc(200, sizeof(char)); - strncpy(*input_filename, optarg, 200); - break; - case 'o': - *filename = calloc(200, sizeof(char)); - strncpy(*filename, optarg, 200); - break; - case 'v': - verbose = 1; - break; - case 'V': - if(g_proc_id == 0) { - fprintf(stdout,"%s %s\n",TMLQCD_PACKAGE_STRING,git_hash); - } - exit(TM_EXIT_SUCCESS); - break; - case 'm': - if( !strcmp(optarg, "single") ){ - g_mpi_thread_level = TM_MPI_THREAD_SINGLE; - } else if ( !strcmp(optarg, "multiple") ) { - g_mpi_thread_level = TM_MPI_THREAD_MULTIPLE; - } else { - tm_debug_printf(0, 0, "[hmc_tm process_args]: invalid input for -m command line argument\n"); - usage(TM_EXIT_INVALID_CMDLINE_ARG); - } - break; - case 'h': - case '?': - default: - usage(TM_EXIT_SUCCESS); - break; - } - } -} - -static void set_default_filenames(char ** input_filename, char ** filename) { - if( *input_filename == NULL ) { - *input_filename = calloc(13, sizeof(char)); - strcpy(*input_filename,"hmc.input"); - } - - if( *filename == NULL ) { - *filename = calloc(7, sizeof(char)); - strcpy(*filename,"output"); - } -} -