Merge remote-tracking branch 'origin/quda_work' into deriv_mg_tune

etmc · Dec 28, 2023 · 2fed63c · 2fed63c
2 parents a1971be + 863ed0f
commit 2fed63c
Show file tree

Hide file tree

Showing 6 changed files with 65 additions and 17 deletions.
diff --git a/doc/quda.tex b/doc/quda.tex
@@ -121,8 +121,8 @@ \subsubsection{General settings}
 \begin{itemize}
   \item \texttt{FermionBC} Forces twisted ({\ttfamily theta}), periodic ({\ttfamily pbc}) or antiperiodic ({\ttfamily apbc}) temporal quark field boundary conditions irrespective of what has been set for \texttt{ThetaT}. This setting exists because at the time of writing (2017.12.28), there seems to be a bug or incompatibility in QUDA which causes (anti-)periodic boundary conditions with gauge compression to produce incorrect propagators. Use with care as the residual check using tmLQCD operators will suggest a non-converged residual.
   \item \texttt{Pipeline} The pipeline length for fused operations in some solvers (for the tmLQCD QUDA interface, at the time of writing in May 2023, this is just GCR). (positive integer, default: $0$)
-  \item \texttt{gcrNkrylov} 
-  \item \texttt{ReliableDelta}
+  \item \texttt{gcrNkrylov} Maximum size of Krylov space used by the solver. (positive integer, default: 10)
+  \item \texttt{ReliableDelta} Reliable update tolerance. (positive float, default: 0.001)
 \end{itemize}
 
 \subsubsection{More advanced settings}
@@ -251,7 +251,7 @@ \subsubsection{QUDA-MG interface}
   \item{ \texttt{MGSmootherPreIterations}: number of smoothing steps before coarse grid correction on a per-level basis. (comma-separated list of zero or positive integers, default: $0$ on all levels)}
   \item{ \texttt{MGSmootherPostIterations}: number of smoothing steps after prolongation on a per-level basis. (comma-separated list of zero or positive integers, default: $4$ on all levels)}
   \item{ \texttt{MGOverUnderRelaxationFactor}: Over- or under-relaxation factor on a per-level basis. (comma-separated list of positive floats, default: $0.85$ on all levels)}
-  \item{ \texttt{MGCoarseMuFactor}: Scaling factor for twisted mass on a per-level basis, accelerates convergence and reduces condition numer of coarse grid solves. From experience it seems that it's reasonable to set this $>1.0$ only on the coarsest level, but sometimes it might also help on intermediate levels. If running with twisted mass, this should always be set and tuned for maximum efficiency. When using coarse-grid deflation (see \texttt{MGEigSolverRequireConvergence}), this should usually be set to $1.0$ on all levels. (comma-separated list of positive floats, usually $ > 1.0$, default $8.0$ from the second level upwards).}
+  \item{ \texttt{MGCoarseMuFactor}: Scaling factor for twisted mass on a per-level basis, accelerates convergence and reduces condition number of coarse grid solves. From experience it seems that it's reasonable to set this $>1.0$ only on the coarsest level, but sometimes it might also help on intermediate levels. If running with twisted mass, this should always be set and tuned for maximum efficiency. When using coarse-grid deflation (see \texttt{MGEigSolverRequireConvergence}), this should usually be set to $1.0$ on all levels. (comma-separated list of positive floats, usually $ > 1.0$, default: $1.0$ on all levels).}
   \item{ \texttt{MGSetup2KappaMu}: The value of $2\kappa\mu$ which should be used during the MG setup process. This is important in the HMC for standard twisted mass fermions, for example, because the setup should always be performed with the smallest quark mass to be employed in a simulation and it might be that a monomial with a heavier twisted quark mass is the first to call to MG and to thus trigger the setup. Generally this is set to the target light twisted quark mass. Setting this to $0.0$ implies that it is ignored. (float, default: $0.0$) }
   \item{ \texttt{MGReuseSetupMuThreshold}: When the twisted quark mass is changed between solves using the MG solver, the MG setup is usually \emph{updated} for this new $\mu$ value. One can attempt to reuse the MG setup for solves with different $\mu$ values up to this threshold, i.e., when the condition $x < 2\kappa\cdot|\mu_\mathrm{old} - \mu_\mathrm{new}|$ holds. (positive float, default: \texttt{2*DBL\_EPSILON})}
   \item{ \texttt{MGRefreshSetupMDUThreshold}: When the MG is used in the HMC, the MG setup must be regularly refreshed by running a few iterations of the setup solver on the current set of approximate null vectors in order to evolve these with the changing gauge field. A good rule of thumb is to perform this setup refresh about twice per coarsest time step. In other words: for a trajectory length $\tau$ and $N$ integration steps on the coarsest time scale, the refresh should be performed at intervals of $(\tau/(2N)-\epsilon)$ MDUs, where $\epsilon$ is a small number to make sure that the threshold is hit at every half-step of the integrator. (positive float, default: \texttt{2*DBL\_EPSILON})}

diff --git a/include/tmLQCD.h b/include/tmLQCD.h
@@ -83,6 +83,12 @@ int tmLQCD_read_gauge(const int nconfig);
 int tmLQCD_invert(double *const propagator, double *const source, const int op_id,
                   const int write_prop);
 
+// invert with source and propagator provided in TXYZ spin colour complex lexicographic order
+// propagator has kappa normalisation
+// the two propagators and sources correspond to two flavours
+int tmLQCD_invert_doublet(double* const propagator0, double* const propagator1, double* const source0, 
+                  double* const source1, const int op_id, const int write_prop);
+
 // invert on odd part of lattice with prepared source
 int tmLQCD_invert_eo(double *const Odd_out, double *const Odd_in, const int op_id);
 

diff --git a/operator.c b/operator.c
@@ -410,6 +410,12 @@ void op_invert(const int op_id, const int index_start, const int write_prop) {
         break;
     }
   } else if(optr->type == DBTMWILSON || optr->type == DBCLOVER) {
+    // there is no default for this set anywhere other than prepare_source.c
+    // such that calling this branch from an external program via tmLQCD_invert_doublet
+    // would result in undefined behaviour
+    // we thus set a default here unless it's explicitly set to 2
+    if( SourceInfo.no_flavours != 2 ) SourceInfo.no_flavours = 1;
+
     if(optr->type == DBCLOVER) {
       if (g_cart_id == 0 && g_debug_level > 1) {
         printf("#\n# csw = %e, computing clover leafs\n", g_c_sw);

diff --git a/quda_interface.c b/quda_interface.c
@@ -678,11 +678,6 @@ void reorder_mom_fromQuda() {
   // mom_quda -> mom_quda_reordered
   tm_stopwatch_push(&g_timers, __func__, "");
 
-#ifdef TM_USE_OMP
-#pragma omp parallel
-  {
-#endif
-
 #ifdef TM_USE_OMP
   #pragma omp parallel for collapse(4)
 #endif
@@ -714,9 +709,6 @@ void reorder_mom_fromQuda() {
 #endif
         }
 
-#ifdef TM_USE_OMP
-  }
-#endif
   tm_stopwatch_pop(&g_timers, 0, 0, "TM_QUDA");
 }
 
@@ -2526,11 +2518,6 @@ void compute_gauge_derivative_quda(monomial * const mnl, hamiltonian_field_t * c
     else
       loop_coeff[i] = -0.66666666666 * g_beta * mnl->c1;
   }
-
-  #pragma omp parallel for
-  for(int i = 0; i < 4; i++){
-    memset(mom_quda[i], 0, VOLUME*10*sizeof(double));
-  }
 
   reorder_gauge_toQuda(hf->gaugefield, NO_COMPRESSION);
   // the reordering above overwrites gauge_quda

diff --git a/read_input.l b/read_input.l
@@ -3980,7 +3980,7 @@ int read_input(char * conf_file){
     quda_input.mg_smoother_tol[level] = _default_quda_mg_smoother_tol;
 
     quda_input.mg_n_vec[level] = _default_quda_mg_n_vec;
-    quda_input.mg_mu_factor[level] = 1.0;
+    quda_input.mg_mu_factor[level] = _default_quda_mg_mu_factor;
     quda_input.mg_coarse_solver_type[level] = QUDA_GCR_INVERTER;
     quda_input.mg_smoother_type[level] = QUDA_CA_GCR_INVERTER;
 

diff --git a/wrapper/lib_wrapper.c b/wrapper/lib_wrapper.c
@@ -278,6 +278,55 @@ int tmLQCD_invert(double* const propagator, double* const source, const int op_i
   return (0);
 }
 
+int tmLQCD_invert_doublet(double* const propagator0, double* const propagator1, double* const source0, 
+                  double* const source1, const int op_id, const int write_prop) {
+  unsigned int index_start = 0;
+  g_mu = 0.;
+
+  if (lowmem_flag && g_proc_id == 0) {
+    printf(
+        "!!! WARNING: you are calling tmLQCD_invert_doublet in \'lowmem\' mode.\n Did you make sure that "
+        "all required fields are allocated and initialised??\n");
+  }
+
+  if (!tmLQCD_invert_initialised) {
+    fprintf(stderr, "tmLQCD_invert_doublet: tmLQCD_inver_init must be called first. Aborting...\n");
+    return (-1);
+  }
+
+  if (op_id < 0 || op_id >= no_operators) {
+    fprintf(stderr, "tmLQCD_invert_doublet: op_id=%d not in valid range. Aborting...\n", op_id);
+    return (-1);
+  }
+
+  operator_list[op_id].sr0 = g_spinor_field[0];
+  operator_list[op_id].sr1 = g_spinor_field[1];
+  operator_list[op_id].sr2 = g_spinor_field[2];
+  operator_list[op_id].sr3 = g_spinor_field[3];
+  operator_list[op_id].prop0 = g_spinor_field[4];
+  operator_list[op_id].prop1 = g_spinor_field[5];
+  operator_list[op_id].prop2 = g_spinor_field[6];
+  operator_list[op_id].prop3 = g_spinor_field[7];
+
+  zero_spinor_field(operator_list[op_id].prop0, VOLUME / 2);
+  zero_spinor_field(operator_list[op_id].prop1, VOLUME / 2);
+  zero_spinor_field(operator_list[op_id].prop2, VOLUME / 2);
+  zero_spinor_field(operator_list[op_id].prop3, VOLUME / 2);
+
+  // convert to even/odd order
+  convert_lexic_to_eo(operator_list[op_id].sr0, operator_list[op_id].sr1, (spinor*)source0);
+  convert_lexic_to_eo(operator_list[op_id].sr2, operator_list[op_id].sr3, (spinor*)source1);
+
+  // invert
+  operator_list[op_id].inverter(op_id, index_start, write_prop);
+
+  // convert back to lexicographic order
+  convert_eo_to_lexic((spinor*)propagator0, operator_list[op_id].prop0, operator_list[op_id].prop1);
+  convert_eo_to_lexic((spinor*)propagator1, operator_list[op_id].prop2, operator_list[op_id].prop3);
+
+  return (0);
+}
+
 int tmLQCD_invert_eo(double* const Odd_out, double* const Odd_in, const int op_id){
   unsigned int index_start = 0;
   if (!tmLQCD_invert_initialised) {