From 5b0ba741b01898948215c79993743bae520eccf4 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 27 Oct 2016 14:59:48 +0300
Subject: [PATCH 01/85] Working interface to DDalphaAMG TM2p1p1

---
 DDalphaAMG_interface.c        | 242 ++++++++++++++++++++++++++++++++--
 DDalphaAMG_interface.h        |   7 +
 default_input_values.h        |   1 +
 global.h                      |   2 +-
 linalg/assign_add_mul_r_32.c  |  30 ++---
 operator/tm_operators_nd.c    |  17 +++
 operator/tm_operators_nd.h    |  39 +++---
 operator/tm_operators_nd_32.c |  18 +++
 operator/tm_operators_nd_32.h |   3 +
 read_input.l                  |  12 +-
 solver/monomial_solve.c       |  81 +++++++++---
 11 files changed, 386 insertions(+), 66 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index e7283e64a..2dc930f07 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -33,8 +33,10 @@
 #include "read_input.h"
 #include "DDalphaAMG.h"
 #include "linalg_eo.h"
+#include "phmc.h"
 #include "operator/D_psi.h"
 #include "operator/tm_operators.h"
+#include "operator/tm_operators_nd.h"
 #include "operator/clovertm_operators.h"
 
 //Enable to test the solution. It cost an application more of the operator. 
@@ -139,6 +141,42 @@ static int MG_check(spinor * const phi_new, spinor * const phi_old, const int N,
   
 }
 
+static int MG_check_nd( spinor * const up_new, spinor * const dn_new, spinor * const up_old, spinor * const dn_old,
+			const int N, const double precision, matrix_mult_nd f) 
+{
+  double differ[2], residual;
+  spinor ** check_vect = NULL;
+  double acc_factor = 2;
+  
+  init_solver_field(&check_vect, VOLUMEPLUSRAND,2);
+  f( check_vect[0], check_vect[1], up_new, dn_new);
+  diff( check_vect[0], check_vect[0], up_old, N);
+  diff( check_vect[1], check_vect[1], dn_old, N);
+  differ[0] = sqrt(square_norm(check_vect[0], N, 1)+square_norm(check_vect[1], N, 1));
+  differ[1] = sqrt(square_norm(up_old, N, 1)+square_norm(dn_old, N, 1));
+  finalize_solver(check_vect, 2);
+  
+  residual = differ[0]/differ[1];
+  
+  if( residual > precision && residual < acc_factor*precision ) {
+    if(g_proc_id == 0)
+      printf("WARNING: solution accepted even if the residual wasn't complitely acceptable (%e > %e) \n", residual, precision);
+  } else if( residual > acc_factor*precision ) {
+    if(g_proc_id == 0) {
+      printf("ERROR: something bad happened... MG converged giving the wrong solution!! Trying to restart... \n");
+      printf("ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > %e \n", differ[0],differ[1],differ[0]/differ[1],precision);
+    }
+    return 0;
+  } 
+
+  if (g_debug_level > 0 && g_proc_id == 0)
+    printf("MGTEST:  || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", differ[0],differ[1],differ[0]/differ[1]);
+  
+  return 1;
+  
+}
+
+
 static int MG_pre_solve( su3 **gf )
 {
   
@@ -216,7 +254,7 @@ static int MG_pre_solve( su3 **gf )
 }
 
 static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double precision,
-						  const int N, matrix_mult f)
+		    const int N, matrix_mult f)
 {
   
   // for rescaling  convention in DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> rescale by 1/4+m
@@ -304,9 +342,10 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
 	    f == Qsw_psi ||       // Gamma5 - Schur complement with mu=0 on odd sites
 	    f == Q_plus_psi ||    // Gamma5 - Full operator    with plus mu 
 	    f == Q_minus_psi ) {  // Gamma5 - Full operator    with minus mu
-    mul_gamma5(old, VOLUME);
+    mul_gamma5((spinor *const) old, VOLUME);
     DDalphaAMG_solve( new, old, precision, &mg_status );
-    mul_gamma5(old, VOLUME);
+    if( N == VOLUME )
+      mul_gamma5((spinor *const) old, VOLUME);
   }
   else if ( f == Qtm_pm_psi ||    //          Schur complement squared
 	    f == Qsw_pm_psi ) {   //          Schur complement squared
@@ -336,6 +375,113 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
     finalize_solver(solver_field, 2);
   }
   
+  mul_r(phi_new ,mg_scale, phi_new, N);
+
+  if (g_proc_id == 0) {
+    printf("Solving time %.2f sec (%.1f %% on coarse grid)\n", mg_status.time,
+	   100.*(mg_status.coarse_time/mg_status.time));
+    printf("Total iterations on fine grid %d\n", mg_status.iter_count);
+    printf("Total iterations on coarse grids %d\n", mg_status.coarse_iter_count);
+    if (!mg_status.success) 
+      printf("ERROR: the solver did not converge!\n");
+  }
+  
+  return mg_status.success;
+}
+
+static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * const up_old, spinor * const dn_old,
+			const double precision, const int N, matrix_mult_nd f)
+{
+  
+  // for rescaling  convention in DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> rescale by 1/4+m
+  // moreover in the nd case, the tmLQCD is multiplied by phmc_invmaxev
+  double mg_scale=0.5/g_kappa/phmc_invmaxev;
+  double *old1 = (double*) up_old; 
+  double *old2 = (double*) dn_old; 
+  double *new1 = (double*) up_new;
+  double *new2 = (double*) dn_new;
+  spinor ** solver_field = NULL;
+  
+  //  if( N != VOLUME && N != VOLUME/2 ) {
+  if( N != VOLUME/2 ) { // no full VOLUME functions implemented at the moment 
+    if( g_proc_id == 0 )
+      printf("ERROR: N = %d in MG_solve. Expettected N == VOLUME (%d) or VOLUME/2 (%d)\n", N, VOLUME, VOLUME/2);
+    return 0;
+  }
+
+  if (N==VOLUME/2) {
+    init_solver_field(&solver_field, VOLUMEPLUSRAND,4);
+    old1 = (double*) solver_field[0];
+    old2 = (double*) solver_field[1];
+    new1 = (double*) solver_field[2];
+    new2 = (double*) solver_field[3];
+    convert_odd_to_lexic( (spinor*) old1, up_old);
+    convert_odd_to_lexic( (spinor*) old2, dn_old);
+  }
+  
+  // Checking if the operator is in the list and compatible with N
+  if (      f == Qtm_ndpsi ||           //  Gamma5 Dh    - Schur complement with csw = 0
+	    f == Qsw_ndpsi ||           //  Gamma5 Dh    - Schur complement
+	    f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar and csw = 0
+	    f == Qsw_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar
+	    f == Qtm_pm_ndpsi ||        // (Gamma5 Dh)^2 - Schur complement squared with csw = 0
+	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh)^2 - Schur complement squared
+	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh)^2 - Schur complement squared with shift
+    if( N != VOLUME/2 && g_proc_id == 0 )
+      printf("WARNING: expected N == VOLUME/2 for the required operator in MG_solve. Continuing with N == VOLUME\n");
+  }
+  else if ( 0 ) {                       // No full operator for nd
+    if( N != VOLUME && g_proc_id == 0 )
+      printf("WARNING: expected N == VOLUME for the required operator in MG_solve. Continuing with N == VOLUME/2\n");
+  }
+  else if( g_proc_id == 0 )
+    printf("WARNING: required operator unknown for MG_solve. Using standard operator: %s.\n",
+	   N==VOLUME?"":"Qsw_ndpsi");
+
+  // Setting mu and eps
+  if (      f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh)^2 - Schur complement squared with shift
+    MG_update_mubar_epsbar( g_mubar, g_epsbar, sqrt(g_shift) );
+  else if ( f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar csw = 0
+	    f == Qsw_dagger_ndpsi )     //  Gamma5 Dh    - Schur complement with mu = -mubar
+    MG_update_mubar_epsbar( -g_mubar, g_epsbar, 0 );
+  else
+    MG_update_mubar_epsbar( g_mubar, g_epsbar, 0 );
+  
+  //Solving
+  if (      f == Qtm_ndpsi ||           //  Gamma5 Dh    - Schur complement with csw = 0
+	    f == Qsw_ndpsi ||           //  Gamma5 Dh    - Schur complement
+	    f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar csw = 0
+	    f == Qsw_dagger_ndpsi ) {   //  Gamma5 Dh    - Schur complement with mu = -mubar
+    mul_gamma5((spinor *const) old1, VOLUME);
+    mul_gamma5((spinor *const) old2, VOLUME);
+    DDalphaAMG_solve_doublet( new1, old1, new2, old2, precision, &mg_status );
+    if( N == VOLUME ) {
+      mul_gamma5((spinor *const) old1, VOLUME);
+      mul_gamma5((spinor *const) old2, VOLUME);
+    }
+  }
+  else if ( f == Qtm_pm_ndpsi ||        // (Gamma5 Dh)^2 - Schur complement squared with csw = 0
+	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh)^2 - Schur complement squared
+	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh)^2 - Schur complement squared with shift
+    mg_scale *= mg_scale;
+    // DDalphaAMG: tau1 gamma5 Dh tau1 gamma5 Dh
+    // tmLQCD:          gamma5 Dh tau1 gamma5 Dh tau1
+    DDalphaAMG_solve_doublet_squared_odd( new2, old2, new1, old1, precision, &mg_status );
+  }
+  else
+    DDalphaAMG_solve_doublet( new1, old1, new2, old2, precision, &mg_status );
+  
+  if (N==VOLUME/2) {
+    convert_lexic_to_odd(up_new, (spinor*) new1);
+    convert_lexic_to_odd(dn_new, (spinor*) new2);
+    finalize_solver(solver_field, 4);
+  }
+  mul_r(up_new ,mg_scale, up_new, N);
+  mul_r(dn_new ,mg_scale, dn_new, N);
+  
   if (g_proc_id == 0) {
     printf("Solving time %.2f sec (%.1f %% on coarse grid)\n", mg_status.time,
 	   100.*(mg_status.coarse_time/mg_status.time));
@@ -344,11 +490,11 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
     if (!mg_status.success) 
       printf("ERROR: the solver did not converge!\n");
   }
-  mul_r(phi_new ,mg_scale, phi_new, N);
   
   return mg_status.success;
 }
 
+
 void MG_init()
 {
   mg_init.comm_cart=g_cart_grid;
@@ -461,15 +607,15 @@ void MG_update_gauge(double step)
   mg_update_gauge = 1;
 }
 
-void MG_update_mu(double mu_tmLQCD, double odd_tmLQCD)
+void MG_update_mu(double mu_tmLQCD, double shift_tmLQCD)
 {
-  double mu, odd_shift;
-  mu=0.5*mu_tmLQCD/g_kappa;
-  odd_shift=0.5*odd_tmLQCD/g_kappa;
+  double mu, shift;
+  mu    = 0.5 * mu_tmLQCD   /g_kappa;
+  shift = 0.5 * shift_tmLQCD/g_kappa;
   
   DDalphaAMG_get_parameters(&mg_params);
   
-  if (mu != mg_params.mu || odd_shift != mg_params.mu_odd_shift || mg_params.mu_even_shift != 0.0 ) {
+  if (mu != mg_params.mu || shift != mg_params.mu_odd_shift || mg_params.mu_even_shift != 0.0 ) {
     //Taking advantage of this function for updating printing in HMC
     if(g_debug_level > 0) 
       mg_params.print=1;
@@ -478,7 +624,39 @@ void MG_update_mu(double mu_tmLQCD, double odd_tmLQCD)
 
     mg_params.mu = mu;
     mg_params.mu_even_shift = 0.0;
-    mg_params.mu_odd_shift = odd_shift;
+    mg_params.mu_odd_shift = shift;
+    mg_params.mu_factor[mg_lvl-1] = mg_cmu_factor;
+    mg_params.epsbar = 0.0;
+    mg_params.epsbar_ig5_even_shift = 0.0;
+    mg_params.epsbar_ig5_odd_shift = 0.0;
+    DDalphaAMG_update_parameters(&mg_params, &mg_status);
+  }	 
+}
+
+void MG_update_mubar_epsbar(double mubar_tmLQCD, double epsbar_tmLQCD, double shift_tmLQCD)
+{
+  double mubar, epsbar, shift;
+  mubar  = 0.5 * mubar_tmLQCD /g_kappa;
+  epsbar = 0.5 * epsbar_tmLQCD/g_kappa;
+  shift  = 0.5 * shift_tmLQCD/g_kappa/phmc_invmaxev;
+  
+  DDalphaAMG_get_parameters(&mg_params);
+  
+  if ( mubar != mg_params.mu || mg_params.mu_odd_shift != 0.0 || mg_params.mu_even_shift != 0.0 ||
+       epsbar != mg_params.epsbar || shift != mg_params.epsbar_ig5_odd_shift || mg_params.epsbar_ig5_even_shift != 0.0 ) {
+    //Taking advantage of this function for updating printing in HMC
+    if(g_debug_level > 0) 
+      mg_params.print=1;
+    else
+      mg_params.print=0;
+
+    mg_params.mu = mubar;
+    mg_params.mu_even_shift = 0.0;
+    mg_params.mu_odd_shift = 0.0;
+    mg_params.mu_factor[mg_lvl-1] = 1.0;
+    mg_params.epsbar = epsbar;
+    mg_params.epsbar_ig5_even_shift = 0.0;
+    mg_params.epsbar_ig5_odd_shift = shift;
     DDalphaAMG_update_parameters(&mg_params, &mg_status);
   }	 
 }
@@ -512,7 +690,7 @@ int MG_solver(spinor * const phi_new, spinor * const phi_old,
   MG_pre_solve(gf);
 
   success = MG_solve( phi_new, phi_old, mg_prec, N, f );
-  
+
 #ifdef MGTEST
   if(success) 
     success = MG_check( phi_new, phi_old, N, mg_prec, f );
@@ -574,3 +752,45 @@ int MG_solver_eo(spinor * const Even_new, spinor * const Odd_new,
   
   return iter_count;
 }
+
+int MG_solver_nd(spinor * const up_new, spinor * const dn_new,
+		 spinor * const up_old, spinor * const dn_old,
+		 const double precision, const int max_iter, const int rel_prec,
+		 const int N, su3 **gf, matrix_mult_nd f)
+{
+  
+  int success=0;
+  double mg_prec = rel_prec?sqrt(precision):sqrt(precision/(square_norm(up_old, N, 1)+square_norm(dn_old, N, 1)));
+  
+  MG_pre_solve(gf);
+
+  success = MG_solve_nd( up_new, dn_new, up_old, dn_old, mg_prec, N, f );
+  
+#ifdef MGTEST
+  if(success) 
+    success = MG_check_nd( up_new, dn_new, up_old, dn_old, N, mg_prec, f );
+#endif
+  
+  if(!success) {
+    MG_reset();
+    MG_pre_solve(gf);
+    success = MG_solve_nd( up_new, dn_new, up_old, dn_old, mg_prec, N, f);
+    
+#ifdef MGTEST
+    if(success) 
+      success = MG_check_nd( up_new, dn_new, up_old, dn_old, N, mg_prec, f );
+#endif
+  }
+  
+  if(!success) {
+    if( g_proc_id == 0 )
+      printf("ERROR: solver didn't converge after two trials!! Aborting... \n");
+    //TODO: handle abort
+    DDalphaAMG_finalize();
+    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Finalize();
+    exit(1);
+  } 
+  // mg_status should have been used last time for the inversion.
+  return mg_status.iter_count;
+}
diff --git a/DDalphaAMG_interface.h b/DDalphaAMG_interface.h
index 102cbf681..aa27ad091 100644
--- a/DDalphaAMG_interface.h
+++ b/DDalphaAMG_interface.h
@@ -26,6 +26,7 @@
 #include "global.h"
 #include "su3.h"
 #include"solver/matrix_mult_typedef.h"
+#include"solver/matrix_mult_typedef_nd.h"
 
 extern int mg_setup_iter;
 extern int mg_coarse_setup_iter;
@@ -44,6 +45,7 @@ void MG_init(void);
 void MG_update_gauge(double step);
 // Convention: mu_MG = 0.5 * mu_tmLQCD / g_kappa;
 void MG_update_mu(double mu_tmLQCD, double odd_tmLQCD);
+void MG_update_mubar_epsbar(double mubar_tmLQCD, double epsbar_tmLQCD, double shift_tmLQCD);
 void MG_reset(void);
 void MG_finalize(void);
 
@@ -56,4 +58,9 @@ int MG_solver_eo(spinor * const Even_new, spinor * const Odd_new,
 		 const double precision, const int max_iter, const int rel_prec,
 		 const int N, su3 **gf, matrix_mult_full f_full);
 
+int MG_solver_nd(spinor * const up_new, spinor * const dn_new,
+		 spinor * const up_old, spinor * const dn_old,
+		 const double precision, const int max_iter, const int rel_prec,
+		 const int N, su3 **gf, matrix_mult_nd f);
+
 #endif /* DDalphaAMG_INTERFACE_H_ */
diff --git a/default_input_values.h b/default_input_values.h
index 06210c47d..02a63bff2 100644
--- a/default_input_values.h
+++ b/default_input_values.h
@@ -50,6 +50,7 @@
 #define _default_g_mu1 0.0
 #define _default_g_mu2 0.0
 #define _default_g_mu3 0.0
+#define _default_g_shift 0.0
 #define _default_c_sw -1.0
 #define _default_g_beta 6.0
 #define _default_g_N_s 20
diff --git a/global.h b/global.h
index 6fd56ba1b..b24e42a1b 100644
--- a/global.h
+++ b/global.h
@@ -194,7 +194,7 @@ EXTERN su3adj ** ddummy;
 
 EXTERN int count00,count01,count10,count11,count20,count21;
 EXTERN double g_kappa, g_c_sw, g_beta;
-EXTERN double g_mu, g_mu1, g_mu2, g_mu3;
+EXTERN double g_mu, g_mu1, g_mu2, g_mu3, g_shift;
 EXTERN double g_rgi_C0, g_rgi_C1;
 
 /* Parameters for non-degenrate case */
diff --git a/linalg/assign_add_mul_r_32.c b/linalg/assign_add_mul_r_32.c
index e60706ea1..15b7626f1 100644
--- a/linalg/assign_add_mul_r_32.c
+++ b/linalg/assign_add_mul_r_32.c
@@ -37,13 +37,8 @@
 #include "su3.h"
 #include "assign_add_mul_r_32.h"
 
-
 #if (defined BGQ && defined XLC)
-void assign_add_mul_r_32(spinor32 * const R, spinor32 * const S, const float c, const int N) {
-#ifdef TM_USE_OMP
-#pragma omp parallel
-  {
-#endif
+void assign_add_mul_r_32_orphaned(spinor32 * const R, spinor32 * const S, const float c, const int N) {
   vector4double x0, x1, x2, x3, x4, x5, y0, y1, y2, y3, y4, y5;
   vector4double z0, z1, z2, z3, z4, z5, k;
   float *s, *r;
@@ -93,20 +88,13 @@ void assign_add_mul_r_32(spinor32 * const R, spinor32 * const S, const float c,
     vec_st(z4, 0, r+16);
     vec_st(z5, 0, r+20);
   }
-#ifdef TM_USE_OMP
-  } /* OpenMP closing brace */
-#endif
   return;
 }
 
 #else
 
-void assign_add_mul_r_32(spinor32 * const R, spinor32 * const S, const float c, const int N)
+void assign_add_mul_r_32_orphaned(spinor32 * const R, spinor32 * const S, const float c, const int N)
 {
-#ifdef TM_USE_OMP
-#pragma omp parallel
-  {
-#endif
   spinor32 *r,*s;
 
 #ifdef TM_USE_OMP
@@ -134,10 +122,20 @@ void assign_add_mul_r_32(spinor32 * const R, spinor32 * const S, const float c,
     r->s3.c2 += c * s->s3.c2;
   }
 
+}
+
+#endif
+
+void assign_add_mul_r_32(spinor32 * const R, spinor32 * const S, const float c, const int N)
+{
+#ifdef TM_USE_OMP
+#pragma omp parallel
+  {
+#endif
+  assign_add_mul_r_32_orphaned(R,S,c,N);
 #ifdef TM_USE_OMP
   } /* OpenMP closing brace */
 #endif
-
+return;
 }
 
-#endif
diff --git a/operator/tm_operators_nd.c b/operator/tm_operators_nd.c
index 3f437a17e..ad51aa9c9 100644
--- a/operator/tm_operators_nd.c
+++ b/operator/tm_operators_nd.c
@@ -237,6 +237,14 @@ void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   return;
 }
 
+void Qtm_pm_ndpsi_shift(spinor * const l_strange, spinor * const l_charm,
+                       spinor * const k_strange, spinor * const k_charm) {
+  Qtm_pm_ndpsi(l_strange,l_charm,k_strange,k_charm);  
+  assign_add_mul_r( l_strange, k_strange, g_shift, VOLUME/2 );
+  assign_add_mul_r( l_charm, k_charm, g_shift, VOLUME/2 );
+  return;
+}
+
 void Qsw_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 		  spinor * const k_strange, spinor * const k_charm) {
 
@@ -284,6 +292,15 @@ void Qsw_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   return;
 }
 
+void Qsw_pm_ndpsi_shift(spinor * const l_strange, spinor * const l_charm,
+                       spinor * const k_strange, spinor * const k_charm) {
+  Qsw_pm_ndpsi(l_strange,l_charm,k_strange,k_charm);
+  
+  assign_add_mul_r( l_strange, k_strange, g_shift, VOLUME/2 );
+  assign_add_mul_r( l_charm, k_charm, g_shift, VOLUME/2 );
+
+  return;
+}
 
 
 /******************************************
diff --git a/operator/tm_operators_nd.h b/operator/tm_operators_nd.h
index 347f326a5..79aefb4f5 100644
--- a/operator/tm_operators_nd.h
+++ b/operator/tm_operators_nd.h
@@ -23,53 +23,58 @@
 #define _TM_OPERATTORS_ND_H
 
 void mul_one_pm_itau2(spinor * const p, spinor * const q,
-		      spinor * const r, spinor * const s,
-		      const double sign, const int N);
+          spinor * const r, spinor * const s,
+          const double sign, const int N);
 
 void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
-	       spinor * const k_strange,  spinor * const k_charm);
+         spinor * const k_strange,  spinor * const k_charm);
 void Qsw_ndpsi(spinor * const l_strange, spinor * const l_charm,
-	       spinor * const k_strange, spinor * const k_charm);
+         spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
-		      spinor * const k_strange, spinor * const k_charm);
+          spinor * const k_strange, spinor * const k_charm);
 void Qsw_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
-		      spinor * const k_strange, spinor * const k_charm);
+          spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
                   spinor * const k_strange, spinor * const k_charm);
+void Qtm_pm_ndpsi_shift(spinor * const l_strange, spinor * const l_charm,
+      spinor * const k_strange, spinor * const k_charm);
+
 void Qsw_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
-		  spinor * const k_strange, spinor * const k_charm);
+      spinor * const k_strange, spinor * const k_charm);
+void Qsw_pm_ndpsi_shift(spinor * const l_strange, spinor * const l_charm,
+      spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k);
 void Qsw_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k);
 
 void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
-			    spinor * const k_strange, spinor * const k_charm, 
-			    const _Complex double z, const double Cpol, const double invev);
+          spinor * const k_strange, spinor * const k_charm, 
+          const _Complex double z, const double Cpol, const double invev);
 void Qsw_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
-			      spinor * const k_strange, spinor * const k_charm, 
-			      const _Complex double z, const double Cpol, const double invev);
+            spinor * const k_strange, spinor * const k_charm, 
+            const _Complex double z, const double Cpol, const double invev);
 
 void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm, 
              spinor * const k_strange, spinor * const k_charm, 
-	     const int ieo);
+       const int ieo);
 void H_eo_sw_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		   spinor * const k_strange, spinor * const k_charm);
+       spinor * const k_strange, spinor * const k_charm);
 
 
 void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		    spinor * const k_strange, spinor * const k_charm,
-		    const double mu, const double eps);
+        spinor * const k_strange, spinor * const k_charm,
+        const double mu, const double eps);
 
 void Msw_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		      spinor * const k_strange, spinor * const k_charm);
+          spinor * const k_strange, spinor * const k_charm);
 
 void Q_test_epsilon(spinor * const l_strange, spinor * const l_charm,
                     spinor * const k_strange, spinor * const k_charm);
 
 void Qtau1_P_ndpsi(spinor * const l_strange, spinor * const l_charm,
-		spinor * const k_strange, spinor * const k_charm);
+    spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_pm_Ptm_pm_psi(spinor * const l, spinor * const k);
 
diff --git a/operator/tm_operators_nd_32.c b/operator/tm_operators_nd_32.c
index a0cdebb5c..54224b8c7 100644
--- a/operator/tm_operators_nd_32.c
+++ b/operator/tm_operators_nd_32.c
@@ -262,6 +262,14 @@ void Qtm_pm_ndpsi_32(spinor32 * const l_strange, spinor32 * const l_charm,
   return;
 }
 
+void Qtm_pm_ndpsi_shift_32(spinor32 * const l_strange, spinor32 * const l_charm,
+          spinor32 * const k_strange, spinor32 * const k_charm){
+  Qtm_pm_ndpsi_32(l_strange, l_charm, k_strange, k_charm);
+  assign_add_mul_r_32(l_strange, k_strange, (float)g_shift, VOLUME/2 );
+  assign_add_mul_r_32(l_charm, k_charm, (float)g_shift, VOLUME/2 );
+  return;
+}
+
 void Qsw_pm_ndpsi_32(spinor32 * const l_strange, spinor32 * const l_charm,
       spinor32 * const k_strange, spinor32 * const k_charm) {
 #ifdef TM_USE_OMP
@@ -316,3 +324,13 @@ void Qsw_pm_ndpsi_32(spinor32 * const l_strange, spinor32 * const l_charm,
 
   return;
 }
+
+void Qsw_pm_ndpsi_shift_32(spinor32* const l_strange, spinor32 * const l_charm,
+      spinor32 * const k_strange, spinor32 * const k_charm){
+  Qsw_pm_ndpsi_32(l_strange,l_charm,k_strange,k_charm);
+  assign_add_mul_r_32(l_strange, k_strange, (float)g_shift, VOLUME/2 );
+  assign_add_mul_r_32(l_charm, k_charm, (float)g_shift, VOLUME/2 );
+  return;
+}
+
+
diff --git a/operator/tm_operators_nd_32.h b/operator/tm_operators_nd_32.h
index fedc818f7..c9833bed6 100644
--- a/operator/tm_operators_nd_32.h
+++ b/operator/tm_operators_nd_32.h
@@ -25,6 +25,9 @@ void Q_pm_ndpsi_32(spinor32 * const l_strange, spinor32 * const l_charm, spinor3
 
 void Qtm_pm_ndpsi_32(spinor32 * const l_strange, spinor32 * const l_charm,
 		  spinor32 * const k_strange, spinor32 * const k_charm);
+void Qtm_pm_ndpsi_shift_32(spinor32 * const l_strange, spinor32 * const l_charm, spinor32 * const k_strange, spinor32 * const k_charm);
+
 void Qsw_pm_ndpsi_32(spinor32 * const l_strange, spinor32 * const l_charm,
       spinor32 * const k_strange, spinor32 * const k_charm);
+void Qsw_pm_ndpsi_shift_32(spinor32 * const l_strange, spinor32 * const l_charm, spinor32 * const k_strange, spinor32 * const k_charm);
 #endif
diff --git a/read_input.l b/read_input.l
index cd0715c97..4252a6b0f 100644
--- a/read_input.l
+++ b/read_input.l
@@ -1697,6 +1697,11 @@ static inline void rmQuotes(char *str){
     mnl->solver = CGMMSND;
     BEGIN(solver_caller);
   }
+  rgmixedcg {
+    if(myverbose) printf("  Solver set to \"%s\" line %d monomial %d\n", yytext, line_of_file, current_monomial);
+    mnl->solver = RGMIXEDCG;
+    BEGIN(solver_caller);
+  }
   mixedCGmmsnd {
     if(myverbose) printf("  Solver set to \"%s\" line %d monomial %d\n", yytext, line_of_file, current_monomial);
     mnl->solver = MIXEDCGMMSND;
@@ -1707,7 +1712,11 @@ static inline void rmQuotes(char *str){
     mnl->solver = 14;
     BEGIN(solver_caller);
   }
-
+  DDalphaAMG {
+    if(myverbose) printf("  Solver set to \"%s\" line %d monomial %d\n", yytext, line_of_file, current_monomial);
+    mnl->solver = MG;
+    BEGIN(solver_caller);
+  }
 }
 
 <GTYPE>{
@@ -2541,6 +2550,7 @@ int read_input(char * conf_file){
   g_mu1 = _default_g_mu1;
   g_mu2 = _default_g_mu2;
   g_mu3 = _default_g_mu3;
+  g_shift = _default_g_shift;
   g_dbw2rand = 0;
   g_running_phmc = 0;
   g_beta = _default_g_beta;
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index f530b1fbe..613035995 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -39,6 +39,7 @@
 #endif
 #include "global.h"
 #include "read_input.h"
+#include "default_input_values.h"
 #include "solver/solver.h"
 #include "solver/matrix_mult_typedef.h"
 #include "solver/solver_types.h"
@@ -60,8 +61,8 @@
 extern  int linsolve_eo_gpu (spinor * const P, spinor * const Q, const int max_iter, 
                             double eps, const int rel_prec, const int N, matrix_mult f);
 extern int dev_cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn, 
-		 spinor * const Qup, spinor * const Qdn, 
-		 solver_pm_t * solver_pm);
+     spinor * const Qup, spinor * const Qdn, 
+     solver_pm_t * solver_pm);
    #ifdef TEMPORALGAUGE
      #include "../temporalgauge.h" 
    #endif
@@ -86,10 +87,10 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
 
     if(usegpu_flag){   
       #ifdef HAVE_GPU     
-	      #ifdef TEMPORALGAUGE
+        #ifdef TEMPORALGAUGE
           to_temporalgauge(g_gauge_field, Q , P);
         #endif          
-        iteration_count = linsolve_eo_gpu(P, Q, max_iter, eps_sq, rel_prec, N, f);			     
+        iteration_count = linsolve_eo_gpu(P, Q, max_iter, eps_sq, rel_prec, N, f);           
         #ifdef TEMPORALGAUGE
           from_temporalgauge(Q, P);
         #endif
@@ -138,24 +139,64 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
   int iteration_count = 0; 
     if(solver_pm->type==MIXEDCGMMSND){
       if(usegpu_flag){
-	#ifdef HAVE_GPU      
-	  #ifdef TEMPORALGAUGE
-	    to_temporalgauge_mms(g_gauge_field , Qup, Qdn, Pup, Pdn, solver_pm->no_shifts);
-	  #endif        
-	  iteration_count = dev_cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);  
-	  #ifdef TEMPORALGAUGE
-	    from_temporalgauge_mms(Qup, Qdn, Pup, Pdn, solver_pm->no_shifts);
-	  #endif 
-	#endif
-      }
-      else{
-	iteration_count = mixed_cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);
+  #ifdef HAVE_GPU      
+    #ifdef TEMPORALGAUGE
+      to_temporalgauge_mms(g_gauge_field , Qup, Qdn, Pup, Pdn, solver_pm->no_shifts);
+    #endif        
+    iteration_count = dev_cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);  
+    #ifdef TEMPORALGAUGE
+      from_temporalgauge_mms(Qup, Qdn, Pup, Pdn, solver_pm->no_shifts);
+    #endif 
+  #endif
+      } else {
+        iteration_count = mixed_cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);
       }
-    }
-    else if (solver_pm->type==CGMMSND){
+    } else if (solver_pm->type == CGMMSND){
       iteration_count = cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);
-    }
-    else{
+    } else if (solver_pm->type == RGMIXEDCG){
+      matrix_mult_nd   f    = Qtm_pm_ndpsi_shift;
+      matrix_mult_nd32 f32  = Qtm_pm_ndpsi_shift_32;
+      if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ){ 
+        f    = Qsw_pm_ndpsi_shift;
+        f32  = Qsw_pm_ndpsi_shift_32;
+      }
+      iteration_count = 0;
+      // solver_params_t struct needs to be passed to all solvers except for cgmms, so we need to construct it here
+      // and set the one relevant parameter
+      solver_params_t temp_params;
+      temp_params.mcg_delta = _default_mixcg_innereps;
+      double iter_local = 0;
+      for(int i = 0; i < solver_pm->no_shifts; ++i){
+        g_shift = solver_pm->shifts[i]*solver_pm->shifts[i]; 
+        iter_local = rg_mixed_cg_her_nd( Pup[i], Pdn[i], Qup, Qdn, temp_params, solver_pm->max_iter,
+					solver_pm->squared_solver_prec, solver_pm->rel_prec, solver_pm->sdim, f, f32);
+        g_shift = _default_g_shift;
+        if(iter_local == -1){
+          return(-1);
+        } else {
+          iteration_count += iter_local;
+        }
+      }
+    } else if (solver_pm->type == MG){
+      matrix_mult_nd f = Qtm_pm_ndpsi_shift;
+      if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
+        f = Qsw_pm_ndpsi_shift;
+      iteration_count = 0;
+      // solver_params_t struct needs to be passed to all solvers except for cgmms, so we need to construct it here
+      // and set the one relevant parameter
+      double iter_local = 0;
+      for(int i = 0; i < solver_pm->no_shifts; ++i){
+        g_shift = solver_pm->shifts[i]*solver_pm->shifts[i]; 
+        iter_local = MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_pm->squared_solver_prec, solver_pm->max_iter,
+				   solver_pm->rel_prec, solver_pm->sdim, g_gauge_field, f );
+        g_shift = _default_g_shift;
+        if(iter_local == -1){
+          return(-1);
+        } else {
+          iteration_count += iter_local;
+        }
+      }
+    } else {
       if(g_proc_id==0) printf("Error: solver not allowed for ND mms solve. Aborting...\n");
       exit(2);      
     }

From 2976c3a8f340ed76e99930c7d55470f73bb453ab Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 2 Nov 2016 11:57:16 +0200
Subject: [PATCH 02/85] Updating to MGMMSND

---
 DDalphaAMG_interface.c  | 203 +++++++++++++++++++++++++++++++++++++++-
 DDalphaAMG_interface.h  |   6 ++
 read_input.l            |   3 +-
 solver/monomial_solve.c |   7 ++
 solver/solver_types.h   |   3 +-
 5 files changed, 218 insertions(+), 4 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 255fb8fa1..387131a5c 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -109,7 +109,7 @@ static int vector_index_fct(int t, int z, int y, int x )
    return id;
 }
 
-static int MG_check(spinor * const phi_new, spinor * const phi_old, const int N, const double precision, matrix_mult f) 
+static inline int MG_check(spinor * const phi_new, spinor * const phi_old, const int N, const double precision, matrix_mult f) 
 {
   double differ[2], residual;
   spinor ** check_vect = NULL;
@@ -142,7 +142,7 @@ static int MG_check(spinor * const phi_new, spinor * const phi_old, const int N,
   
 }
 
-static int MG_check_nd( spinor * const up_new, spinor * const dn_new, spinor * const up_old, spinor * const dn_old,
+static inline int MG_check_nd( spinor * const up_new, spinor * const dn_new, spinor * const up_old, spinor * const dn_old,
 			const int N, const double precision, matrix_mult_nd f) 
 {
   double differ[2], residual;
@@ -177,6 +177,52 @@ static int MG_check_nd( spinor * const up_new, spinor * const dn_new, spinor * c
   
 }
 
+static inline int MG_mms_check_nd( spinor **const up_new, spinor **const dn_new, 
+                                   spinor * const up_old, spinor * const dn_old,
+                                   const double * shifts, const int no_shifts, 
+                                   const int N, const double precision, matrix_mult_nd f) 
+{
+  double differ[2], residual;
+  spinor ** check_vect = NULL;
+  double acc_factor = 2;
+  
+  init_solver_field(&check_vect, VOLUMEPLUSRAND,2);
+
+  for( int i = 0; i < no_shifts; i++ ) {
+
+    g_shift = shifts[i]*shifts[i]; 
+
+    f( check_vect[0], check_vect[1], up_new[i], dn_new[i]);
+    diff( check_vect[0], check_vect[0], up_old, N);
+    diff( check_vect[1], check_vect[1], dn_old, N);
+    differ[0] = sqrt(square_norm(check_vect[0], N, 1)+square_norm(check_vect[1], N, 1));
+    differ[1] = sqrt(square_norm(up_old, N, 1)+square_norm(dn_old, N, 1));
+  
+    residual = differ[0]/differ[1];
+    
+    if( residual > precision && residual < acc_factor*precision ) {
+      if(g_proc_id == 0)
+        printf("WARNING: solution accepted even if the residual wasn't complitely acceptable (%e > %e) \n", residual, precision);
+    } else if( residual > acc_factor*precision ) {
+      if(g_proc_id == 0) {
+        printf("ERROR: something bad happened... MG converged giving the wrong solution!! Trying to restart... \n");
+        printf("ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > %e \n", differ[0],differ[1],differ[0]/differ[1],precision);
+      }
+      finalize_solver(check_vect, 2);
+      return 0;
+    } 
+    
+    if (g_debug_level > 0 && g_proc_id == 0)
+      printf("MGTEST:  || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", differ[0],differ[1],differ[0]/differ[1]);
+    
+  }
+
+  finalize_solver(check_vect, 2);
+
+  return 1;
+  
+}
+
 
 static int MG_pre_solve( su3 **gf )
 {
@@ -483,6 +529,116 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
   return mg_status.success;
 }
 
+static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new, 
+                            spinor * const up_old, spinor * const dn_old,
+                            const double * shifts, const int no_shifts,
+                            const double precision, const int N, matrix_mult_nd f)
+{
+  
+  // for rescaling  convention in DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> rescale by 1/4+m
+  // moreover in the nd case, the tmLQCD is multiplied by phmc_invmaxev
+  double mg_scale=0.5/g_kappa/phmc_invmaxev;
+  double *old1 = (double*) up_old; 
+  double *old2 = (double*) dn_old; 
+  double **new1, **new2, *mg_odd_shifts, *mg_even_shifts;
+  spinor ** solver_field = NULL;
+
+  //  if( N != VOLUME && N != VOLUME/2 ) {
+  if( N != VOLUME/2 ) { // no full VOLUME functions implemented at the moment 
+    if( g_proc_id == 0 )
+      printf("ERROR: N = %d in MG_solve. Expettected N == VOLUME (%d) or VOLUME/2 (%d)\n", N, VOLUME, VOLUME/2);
+    return 0;
+  }
+
+  new1 = (double**) malloc(no_shifts*sizeof(double*));
+  new2 = (double**) malloc(no_shifts*sizeof(double*));
+  mg_odd_shifts  = (double*) malloc(no_shifts*sizeof(double));
+  mg_even_shifts = (double*) malloc(no_shifts*sizeof(double));
+
+  if( N==VOLUME/2 ) {
+    init_solver_field(&solver_field, VOLUMEPLUSRAND,2+2*no_shifts);
+    old1 = (double*) solver_field[0];
+    old2 = (double*) solver_field[1];
+    convert_odd_to_lexic( (spinor*) old1, up_old);
+    convert_odd_to_lexic( (spinor*) old2, dn_old);
+
+    for( int i = 0; i < no_shifts; i++ ) {
+      new1[i] = (double*) solver_field[2+2*i];
+      new2[i] = (double*) solver_field[3+2*i];
+    }
+  } else {
+    for( int i = 0; i < no_shifts; i++ ) {
+      new1[i] = (double*) up_new[i];
+      new2[i] = (double*) dn_new[i];
+    }
+  }
+
+  MG_update_mubar_epsbar( g_mubar, g_epsbar, shifts[0] );
+  for( int i = 0; i < no_shifts; i++ ) {
+    mg_odd_shifts[i]  = shifts[i]*mg_scale;
+    mg_even_shifts[i] = 0;
+  }
+  // Checking if the operator is in the list and compatible with N
+  if (	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh)^2 - Schur complement squared with shift
+    if( N != VOLUME/2 ) {
+      if( g_proc_id == 0 )
+        printf("ERROR: expected N == VOLUME/2 for the required operator in MG_mms_solve_nd.\n");
+      return 0;
+    }
+  }  else if( g_proc_id == 0 )
+    printf("WARNING: required operator unknown for MG_solve. Using standard operator: %s.\n",
+	   N==VOLUME?"":"Qsw_pm_ndpsi_shift");
+
+  //Solving
+  /* TODO: Qtm_ndpsi_shift, Qsw_ndpsi_shift
+  if (      f == Qtm_ndpsi_shift ||     //  Gamma5 Dh    - Schur complement with csw = 0 and shift
+	    f == Qsw_ndpsi_shift ) {    //  Gamma5 Dh    - Schur complement with shift
+    mul_gamma5((spinor *const) old1, VOLUME);
+    mul_gamma5((spinor *const) old2, VOLUME);
+    DDalphaAMG_solve_ms_doublet( new1, old1, new2, old2, mg_even_shifts, mg_odd_shifts, no_shifts, 
+                                 precision, &mg_status );
+    if( N == VOLUME ) {
+      mul_gamma5((spinor *const) old1, VOLUME);
+      mul_gamma5((spinor *const) old2, VOLUME);
+    }
+  }
+ else*/if ( f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh)^2 - Schur complement squared with shift
+    mg_scale *= mg_scale;
+    // DDalphaAMG: tau1 gamma5 Dh tau1 gamma5 Dh
+    // tmLQCD:          gamma5 Dh tau1 gamma5 Dh tau1
+    DDalphaAMG_solve_ms_doublet_squared_odd( new2, old2, new1, old1, mg_even_shifts, mg_odd_shifts, no_shifts,
+                                             precision, &mg_status );
+  }
+  else
+    DDalphaAMG_solve_ms_doublet( new1, old1, new2, old2, mg_even_shifts, mg_odd_shifts, no_shifts, 
+                                 precision, &mg_status );
+
+  if (N==VOLUME/2) {
+    for( int i = 0; i < no_shifts; i++ ) {
+      convert_lexic_to_odd(up_new[i], (spinor*) new1[i]);
+      convert_lexic_to_odd(dn_new[i], (spinor*) new2[i]);
+    }
+    finalize_solver(solver_field, 2+2*no_shifts);
+  }
+
+  for( int i = 0; i < no_shifts; i++ ) {
+    mul_r(up_new[i], mg_scale, up_new[i], N);
+    mul_r(dn_new[i], mg_scale, dn_new[i], N);
+  }
+
+  if (g_proc_id == 0) {
+    printf("Solving time %.2f sec (%.1f %% on coarse grid)\n", mg_status.time,
+	   100.*(mg_status.coarse_time/mg_status.time));
+    printf("Total iterations on fine grid %d\n", mg_status.iter_count);
+    printf("Total iterations on coarse grids %d\n", mg_status.coarse_iter_count);
+    if (!mg_status.success) 
+      printf("ERROR: the solver did not converge!\n");
+  }
+  
+  return mg_status.success;
+}
 
 void MG_init()
 {
@@ -783,3 +939,46 @@ int MG_solver_nd(spinor * const up_new, spinor * const dn_new,
   // mg_status should have been used last time for the inversion.
   return mg_status.iter_count;
 }
+
+int MG_mms_solver_nd(spinor **const up_new, spinor **const dn_new,
+                     spinor * const up_old, spinor * const dn_old,
+                     const double * shifts, const int no_shifts,
+                     const double precision, const int max_iter, const int rel_prec,
+                     const int N, su3 **gf, matrix_mult_nd f)
+{
+  
+  int success=0;
+  double mg_prec = rel_prec?sqrt(precision):sqrt(precision/(square_norm(up_old, N, 1)+square_norm(dn_old, N, 1)));
+  
+  MG_pre_solve(gf);
+
+  success = MG_mms_solve_nd( up_new, dn_new, up_old, dn_old, shifts, no_shifts, mg_prec, N, f );
+  
+#ifdef MGTEST
+  if(success) 
+    success = MG_mms_check_nd( up_new, dn_new, up_old, dn_old, shifts, no_shifts, N, mg_prec, f );
+#endif
+  
+  if(!success) {
+    MG_reset();
+    MG_pre_solve(gf);
+    success = MG_mms_solve_nd( up_new, dn_new, up_old, dn_old, shifts, no_shifts, mg_prec, N, f);
+    
+#ifdef MGTEST
+    if(success) 
+      success = MG_mms_check_nd( up_new, dn_new, up_old, dn_old, shifts, no_shifts, N, mg_prec, f );
+#endif
+  }
+  
+  if(!success) {
+    if( g_proc_id == 0 )
+      printf("ERROR: solver didn't converge after two trials!! Aborting... \n");
+    //TODO: handle abort
+    DDalphaAMG_finalize();
+    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Finalize();
+    exit(1);
+  } 
+  // mg_status should have been used last time for the inversion.
+  return mg_status.iter_count;
+}
diff --git a/DDalphaAMG_interface.h b/DDalphaAMG_interface.h
index 516e8e7d7..75fffbc58 100644
--- a/DDalphaAMG_interface.h
+++ b/DDalphaAMG_interface.h
@@ -64,4 +64,10 @@ int MG_solver_nd(spinor * const up_new, spinor * const dn_new,
 		 const double precision, const int max_iter, const int rel_prec,
 		 const int N, su3 **gf, matrix_mult_nd f);
 
+int MG_mms_solver_nd(spinor **const up_new, spinor **const dn_new,
+                     spinor * const up_old, spinor * const dn_old,
+                     const double * shifts, const int no_shifts,
+                     const double precision, const int max_iter, const int rel_prec,
+                     const int N, su3 **gf, matrix_mult_nd f);
+
 #endif /* DDalphaAMG_INTERFACE_H_ */
diff --git a/read_input.l b/read_input.l
index c75e7165d..f1a9eb86f 100644
--- a/read_input.l
+++ b/read_input.l
@@ -1715,7 +1715,8 @@ static inline void rmQuotes(char *str){
   }
   DDalphaAMG {
     if(myverbose) printf("  Solver set to \"%s\" line %d monomial %d\n", yytext, line_of_file, current_monomial);
-    mnl->solver = MG;
+    // mnl->solver = MG;
+    mnl->solver = MGMMSND;
     BEGIN(solver_caller);
   }
 }
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 613035995..c29d01162 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -153,6 +153,13 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
       }
     } else if (solver_pm->type == CGMMSND){
       iteration_count = cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);
+    } else if (solver_pm->type == MGMMSND){
+      matrix_mult_nd f = Qtm_pm_ndpsi_shift;
+      if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
+        f = Qsw_pm_ndpsi_shift;
+      iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, solver_pm->no_shifts,
+                                          solver_pm->squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
+                                          solver_pm->sdim, g_gauge_field, f );
     } else if (solver_pm->type == RGMIXEDCG){
       matrix_mult_nd   f    = Qtm_pm_ndpsi_shift;
       matrix_mult_nd32 f32  = Qtm_pm_ndpsi_shift_32;
diff --git a/solver/solver_types.h b/solver/solver_types.h
index d16491580..45addb0b6 100644
--- a/solver/solver_types.h
+++ b/solver/solver_types.h
@@ -24,7 +24,8 @@ typedef enum SOLVER_TYPE {
  MCR,
  CR,
  BICG,
- MG
+ MG,
+ MGMMSND
 } SOLVER_TYPE;
 
 #endif

From 4a6a69d3b30aa1672f00cb3eb271c4056a6bbb5e Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 4 Nov 2016 11:56:13 +0200
Subject: [PATCH 03/85] Bug fix

---
 DDalphaAMG_interface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index a6e184a7c..12a9be2dd 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -151,8 +151,8 @@ static int MG_pre_solve( su3 **gf )
   //  mg_dtau_update == 0.0  : updating at every change of configuration -> valid as well if configuration changed outside the HMC
   //  mg_rho_update < 0.0    : parameter ignore
   //  mg_rho_update == rho   : updating only if this condition and the others are satisfied
-  if ( mg_do_setup == 0 && mg_update_setup < mg_update_setup_iter && ( mg_dtau_update < dtau+1e-6 || (mg_dtau_update==0.0 && mg_update_gauge==1)) &&
-       (mg_rho_update >= 0.0 && mg_rho_update == g_mu3)) 
+  if ( mg_do_setup == 0 && mg_update_setup < mg_update_setup_iter && ( mg_dtau_update < dtau+1e-6 || (mg_dtau_update==0.0 && mg_update_gauge==1) ||
+                                                                       (mg_rho_update >= 0.0 && mg_rho_update == g_mu3) )) 
     mg_update_setup = mg_update_setup_iter;
   
   if(g_debug_level > 0 && g_proc_id == 0)

From 8ba0bd3ff522cb5893465026a359133a392310b8 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Sun, 6 Nov 2016 17:09:40 +0200
Subject: [PATCH 04/85] Optimized smoother iterations for MG 1+1

---
 DDalphaAMG_interface.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 1a9e8807d..6559ead22 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -760,7 +760,7 @@ void MG_update_mu(double mu_tmLQCD, double shift_tmLQCD)
   
   DDalphaAMG_get_parameters(&mg_params);
   
-  if (mu != mg_params.mu || shift != mg_params.mu_odd_shift || mg_params.mu_even_shift != 0.0 ) {
+  if (mu != mg_params.mu || shift != mg_params.mu_odd_shift || mg_params.mu_even_shift != 0.0 || mg_params.smoother_iterations != 4 ) {
     //Taking advantage of this function for updating printing in HMC
     if(g_debug_level > 0) 
       mg_params.print=1;
@@ -774,6 +774,7 @@ void MG_update_mu(double mu_tmLQCD, double shift_tmLQCD)
     mg_params.epsbar = 0.0;
     mg_params.epsbar_ig5_even_shift = 0.0;
     mg_params.epsbar_ig5_odd_shift = 0.0;
+    mg_params.smoother_iterations = 4;
     DDalphaAMG_update_parameters(&mg_params, &mg_status);
   }	 
 }
@@ -788,7 +789,7 @@ void MG_update_mubar_epsbar(double mubar_tmLQCD, double epsbar_tmLQCD, double sh
   DDalphaAMG_get_parameters(&mg_params);
   
   if ( mubar != mg_params.mu || mg_params.mu_odd_shift != 0.0 || mg_params.mu_even_shift != 0.0 ||
-       epsbar != mg_params.epsbar || shift != mg_params.epsbar_ig5_odd_shift || mg_params.epsbar_ig5_even_shift != 0.0 ) {
+       epsbar != mg_params.epsbar || shift != mg_params.epsbar_ig5_odd_shift || mg_params.epsbar_ig5_even_shift != 0.0 || mg_params.smoother_iterations != 2 ) {
     //Taking advantage of this function for updating printing in HMC
     if(g_debug_level > 0) 
       mg_params.print=1;
@@ -802,6 +803,7 @@ void MG_update_mubar_epsbar(double mubar_tmLQCD, double epsbar_tmLQCD, double sh
     mg_params.epsbar = epsbar;
     mg_params.epsbar_ig5_even_shift = 0.0;
     mg_params.epsbar_ig5_odd_shift = shift;
+    mg_params.smoother_iterations = 2;
     DDalphaAMG_update_parameters(&mg_params, &mg_status);
   }	 
 }

From ccad26e5f87201e8f9ec2f941cde77a96b78f7d5 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 25 Jan 2017 13:49:20 +0200
Subject: [PATCH 05/85] Added mixed approach with DDalphaAMG and CGMMS. Use the
 parameter MGNumberOfShifts for setting how many shifts has to be done by
 DDalphaAMG.

---
 DDalphaAMG_interface.c  |  3 ++-
 DDalphaAMG_interface.h  |  1 +
 read_input.h            |  1 +
 read_input.l            |  5 +++++
 solver/monomial_solve.c | 17 ++++++++++++++---
 5 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 6559ead22..6f027d91a 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -59,6 +59,7 @@ int mg_lvl=3;
 int mg_blk[4] = {0, 0, 0, 0};
 int mg_mixed_prec=0;
 int mg_setup_mu_set = 0; //flag that enable the use of mg_setup_mu in the setup phase
+int mg_no_shifts = -1;
 double mg_setup_mu = 0.; 
 double mg_cmu_factor = 1.0;
 double mg_dtau_update = 0.0;
@@ -184,7 +185,7 @@ static inline int MG_mms_check_nd( spinor **const up_new, spinor **const dn_new,
 {
   double differ[2], residual;
   spinor ** check_vect = NULL;
-  double acc_factor = 2;
+  double acc_factor = 20;
   
   init_solver_field(&check_vect, VOLUMEPLUSRAND,2);
 
diff --git a/DDalphaAMG_interface.h b/DDalphaAMG_interface.h
index 75fffbc58..2335c1707 100644
--- a/DDalphaAMG_interface.h
+++ b/DDalphaAMG_interface.h
@@ -37,6 +37,7 @@ extern int mg_lvl;
 extern int mg_blk[4];
 extern int mg_mixed_prec;
 extern int mg_setup_mu_set;
+extern int mg_no_shifts; // Number of shifts to solve with MG in solve_mms_nd
 extern double mg_setup_mu;
 extern double mg_cmu_factor;
 extern double mg_dtau_update;
diff --git a/read_input.h b/read_input.h
index ffe325b7c..6bbd7c159 100644
--- a/read_input.h
+++ b/read_input.h
@@ -131,6 +131,7 @@ extern "C"
   extern int mg_blk[4];
   extern int mg_mixed_prec;
   extern int mg_setup_mu_set;
+  extern int mg_no_shifts;
   extern double mg_setup_mu;
   extern double mg_cmu_factor;
   extern double mg_dtau_update;
diff --git a/read_input.l b/read_input.l
index f1a9eb86f..cddce0ac1 100644
--- a/read_input.l
+++ b/read_input.l
@@ -714,6 +714,11 @@ static inline void rmQuotes(char *str){
     mg_omp_num_threads=a;
     if(myverbose) printf("  MG_omp_num_threads set to %d line %d operator %d\n", mg_omp_num_threads, line_of_file, current_operator);
   }
+  {SPC}*MGNumberOfShifts{EQL}{DIGIT}+ {
+    sscanf(yytext, " %[a-zA-Z] = %d", name, &a);
+    mg_no_shifts=a;
+    if(myverbose) printf("  MGNumberOfShifts set to %d line %d operator %d\n", mg_no_shifts, line_of_file, current_operator);
+  }
   EndDDalphaAMG{SPC}* {
   if(myverbose) printf("DDalphaAMG parsed in line %d\n\n", line_of_file);
   BEGIN(0);
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index c29d01162..971dddf45 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -157,9 +157,20 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
       matrix_mult_nd f = Qtm_pm_ndpsi_shift;
       if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
         f = Qsw_pm_ndpsi_shift;
-      iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, solver_pm->no_shifts,
-                                          solver_pm->squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
-                                          solver_pm->sdim, g_gauge_field, f );
+      if( mg_no_shifts > 0 && mg_no_shifts < solver_pm->no_shifts ) {
+        iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, mg_no_shifts,
+                                            solver_pm->squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
+                                            solver_pm->sdim, g_gauge_field, f );
+        solver_pm->no_shifts -= mg_no_shifts;
+        solver_pm->shifts += mg_no_shifts;
+        iteration_count += cg_mms_tm_nd( Pup+mg_no_shifts, Pdn+mg_no_shifts, Qup, Qdn, solver_pm );
+        // Restoring solver_pm
+        solver_pm->no_shifts += mg_no_shifts;
+        solver_pm->shifts -= mg_no_shifts;
+      } else
+        iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, solver_pm->no_shifts,
+                                            solver_pm->squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
+                                            solver_pm->sdim, g_gauge_field, f );
     } else if (solver_pm->type == RGMIXEDCG){
       matrix_mult_nd   f    = Qtm_pm_ndpsi_shift;
       matrix_mult_nd32 f32  = Qtm_pm_ndpsi_shift_32;

From e6bab07b8a140360debf704f6d914c8cdc3f2f73 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 25 Jan 2017 13:51:25 +0200
Subject: [PATCH 06/85] Checked correction term for the rational approximation.
 Improved the acceptance step saving half of the applications of Z. Fixed the
 checking of the residual (a note will follow).

---
 doc/rational.tex             |  2 +-
 monomial/ndratcor_monomial.c | 48 +++++++++++++++++++++++++-----------
 2 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/doc/rational.tex b/doc/rational.tex
index 032015b55..cf1c1fc9a 100644
--- a/doc/rational.tex
+++ b/doc/rational.tex
@@ -156,7 +156,7 @@ \subsubsection{Correction Monomial}
   which, again expanding in $Z$ is obtained by
   \[
   \phi^\dagger (1+Z)^{-1/2} \phi = \phi^\dagger (1 - \frac{1}{2}Z +
-  \frac{3}{8}Z^3 + ...) \phi\, .
+  \frac{3}{8}Z^2 - \frac{5}{16}Z^3  ...) \phi\, .
   \]
   Also here the series can be truncated after the first few terms.
 \item the second possibility is to include this correction as a
diff --git a/monomial/ndratcor_monomial.c b/monomial/ndratcor_monomial.c
index 3f5b9e60b..3cf786187 100644
--- a/monomial/ndratcor_monomial.c
+++ b/monomial/ndratcor_monomial.c
@@ -114,7 +114,11 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
   up1 = mnl->w_fields[2]; dn1 = mnl->w_fields[3];
 	 
   for(int i = 1; i < 8; i++) {
-    delta = apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &solver_pm);
+    apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &solver_pm);
+    
+    delta = coefs[i-1]*(scalar_prod_r(mnl->pf, up1, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn1, VOLUME/2, 1));
+    if(g_debug_level > 2 && g_proc_id == 0)
+      printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", i, i, delta);
     assign_add_mul_r(mnl->pf, up1, coefs[i-1], VOLUME/2);
     assign_add_mul_r(mnl->pf2, dn1, coefs[i-1], VOLUME/2);
     if(delta < mnl->accprec) break;
@@ -138,7 +142,7 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
 double ndratcor_acc(const int id, hamiltonian_field_t * const hf) {
   solver_pm_t solver_pm;
   monomial * mnl = &monomial_list[id];
-  double atime, etime, delta;
+  double atime, etime, delta_e;
   spinor * up0, * dn0, * up1, * dn1, * tup, * tdn;
   double coefs[6] = {-1./2., 3./8., -5./16., 35./128., -63./256., 231./1024.};
   atime = gettime();
@@ -149,7 +153,7 @@ double ndratcor_acc(const int id, hamiltonian_field_t * const hf) {
     sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
     copy_32_sw_fields();
   }
-  mnl->energy1 = 0.;
+  mnl->energy1 = square_norm(mnl->pf, VOLUME/2, 1) + square_norm(mnl->pf2, VOLUME/2, 1);
 
   solver_pm.max_iter = mnl->maxiter;
   solver_pm.squared_solver_prec = mnl->accprec;
@@ -166,27 +170,37 @@ double ndratcor_acc(const int id, hamiltonian_field_t * const hf) {
   solver_pm.rel_prec = g_relative_precision_flag;
 
   // apply (Q R)^(-1) to pseudo-fermion fields
-  assign(mnl->w_fields[4], mnl->pf, VOLUME/2);
-  assign(mnl->w_fields[5], mnl->pf2, VOLUME/2);
   up0 = mnl->w_fields[0]; dn0 = mnl->w_fields[1];
   up1 = mnl->w_fields[2]; dn1 = mnl->w_fields[3];
 
-  delta = apply_Z_ndpsi(up0, dn0, mnl->pf, mnl->pf2, id, hf, &solver_pm);
-  assign_add_mul_r(mnl->w_fields[4], up0, coefs[0], VOLUME/2);
-  assign_add_mul_r(mnl->w_fields[5], dn0, coefs[0], VOLUME/2);
+  apply_Z_ndpsi(up0, dn0, mnl->pf, mnl->pf2, id, hf, &solver_pm);
+  delta_e = coefs[0]*(scalar_prod_r(mnl->pf, up0, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn0, VOLUME/2, 1));
+  mnl->energy1 += delta_e;
+  if(g_debug_level > 2 && g_proc_id == 0)
+    printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", 1, 1, delta_e);
 
   for(int i = 2; i < 8; i++) {
-    if(delta < mnl->accprec) break;
-    delta = apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &solver_pm);
-    assign_add_mul_r(mnl->w_fields[4], up1, coefs[i-1], VOLUME/2);
-    assign_add_mul_r(mnl->w_fields[5], dn1, coefs[i-1], VOLUME/2);
+    if(delta_e*delta_e < mnl->accprec) break;
+
+    delta_e = coefs[i-1]*(square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1)); 
+    mnl->energy1 += delta_e;
+    if(g_debug_level > 2 && g_proc_id == 0)
+      printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", i, i, delta_e);
+    i++;
+    if(delta_e*delta_e < mnl->accprec) break;
+
+    apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &solver_pm);
+    delta_e = coefs[i-1]*(scalar_prod_r(up0, up1, VOLUME/2, 1) + scalar_prod_r(dn0, dn1, VOLUME/2, 1));
+    mnl->energy1 += delta_e;
+    if(g_debug_level > 2 && g_proc_id == 0)
+      printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", i, i, delta_e);
+
     tup = up0; tdn = dn0;
     up0 = up1; dn0 = dn1;
     up1 = tup; dn1 = tdn;
   }
 
-  mnl->energy1 = scalar_prod_r(mnl->pf, mnl->w_fields[4], VOLUME/2, 1);
-  mnl->energy1 += scalar_prod_r(mnl->pf2, mnl->w_fields[5], VOLUME/2, 1);
+
   etime = gettime();
   if(g_proc_id == 0) {
     if(g_debug_level > 1) {
@@ -221,7 +235,7 @@ double apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
   }
 
   // apply R a second time
-  solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
+  mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
 	       k_up, k_dn,
 	       solver_pm);
   for(int j = (mnl->rat.np-1); j > -1; j--) {
@@ -239,11 +253,15 @@ double apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
 		     g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np]);
   diff(k_up, k_up, l_up, VOLUME/2);
   diff(k_dn, k_dn, l_dn, VOLUME/2);
+
+  /* TO REMOVE: We don't need this quantity. 
   double resi = square_norm(k_up, VOLUME/2, 1) + square_norm(k_dn, VOLUME/2, 1);
   if(g_debug_level > 2 && g_proc_id == 0) {
     printf("# NDRATCOR: ||Z * phi|| = %e\n", resi);
   }
   return(resi);
+  */
+  return 0;
 }
 
 // computes ||(1 - C^dagger R C) phi||

From c4510cf83f11a8f0eb60925c827ffca4d7d9ded3 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 25 Jan 2017 13:52:10 +0200
Subject: [PATCH 07/85] Fixed reweighting_factor.c in order to work for
 reweighting the correction term for the rational approximation only.

---
 reweighting_factor.c | 52 +++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/reweighting_factor.c b/reweighting_factor.c
index f0c5f4a97..3141a2bf2 100644
--- a/reweighting_factor.c
+++ b/reweighting_factor.c
@@ -54,24 +54,32 @@ void reweighting_factor(const int N, const int nstore) {
     mnl = &monomial_list[j];
     if(mnl->even_odd_flag) {
       init_sw_fields();
-      double c_sw = mnl->c_sw;
-      if(c_sw < 0.) c_sw = 0.;
 
-      sw_term( (const su3**) hf.gaugefield, mnl->kappa, c_sw); 
-      if(mnl->type != NDDETRATIO) {
-        trlog[j] = -sw_trace(0, mnl->mu);
-      }
-      else {
-        trlog[j] = -sw_trace_nd(0, mnl->mubar, mnl->epsbar);
-      }
+      if(mnl->type != NDCLOVERRATCOR && (mnl->kappa != mnl->kappa2
+                                       || (mnl->type == NDDETRATIO 
+                                           && (mnl->mubar != mnl->mubar2 || mnl->epsbar != mnl->epsbar2))
+                                       || (mnl->type != NDDETRATIO
+                                           && (mnl->mu != mnl->mu2)))) {
+        double c_sw = mnl->c_sw;
+        if(c_sw < 0.) c_sw = 0.;
         
-      sw_term( (const su3**) hf.gaugefield, mnl->kappa2, c_sw);
-      if(mnl->type != NDDETRATIO) {
-        trlog[j] -= -sw_trace(0, mnl->mu2);
-      }
-      else {
-        trlog[j] -= -sw_trace_nd(0, mnl->mubar2, mnl->epsbar2);
-      }
+        sw_term( (const su3**) hf.gaugefield, mnl->kappa, c_sw); 
+        if(mnl->type != NDDETRATIO) {
+          trlog[j] = -sw_trace(0, mnl->mu);
+        }
+        else {
+          trlog[j] = -sw_trace_nd(0, mnl->mubar, mnl->epsbar);
+        }
+        
+        sw_term( (const su3**) hf.gaugefield, mnl->kappa2, c_sw);
+        if(mnl->type != NDDETRATIO) {
+          trlog[j] -= -sw_trace(0, mnl->mu2);
+        }
+        else {
+          trlog[j] -= -sw_trace_nd(0, mnl->mubar2, mnl->epsbar2);
+        }
+      } else
+        trlog[j] = 0.;
     }
     else {
       trlog[j] = 0.;
@@ -96,19 +104,19 @@ void reweighting_factor(const int N, const int nstore) {
           random_spinor_field_lexic(mnl->pf, mnl->rngrepro, RN_GAUSS);
           mnl->energy0 = square_norm(mnl->pf, n, 1);
         }
-	if(g_proc_id == 0 && g_debug_level > 1) {
-	  printf("# monomial[%d] %s, energy0 = %e\n", j, mnl->name, mnl->energy0);
-	}
-	if(mnl->type == NDDETRATIO) {
+	if(mnl->type == NDDETRATIO || mnl->type == NDCLOVERRATCOR) {
 	  if(mnl->even_odd_flag) {
 	    random_spinor_field_eo(mnl->pf2, mnl->rngrepro, RN_GAUSS);
-            mnl->energy0 += square_norm(mnl->pf, n/2, 1);
+            mnl->energy0 += square_norm(mnl->pf2, n/2, 1);
 	  }
 	  else {
-            random_spinor_field_lexic(mnl->pf, mnl->rngrepro, RN_GAUSS);
+            random_spinor_field_lexic(mnl->pf2, mnl->rngrepro, RN_GAUSS);
             mnl->energy0 += square_norm(mnl->pf2, n, 1);
           }
 	}
+	if(g_proc_id == 0 && g_debug_level > 1) {
+	  printf("# monomial[%d] %s, energy0 = %e\n", j, mnl->name, mnl->energy0);
+	}
       }
     }
 

From 57d375a8ad5c6e445d05db75f156eb8aa8988e45 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 25 Jan 2017 15:36:27 +0200
Subject: [PATCH 08/85] Added estimation of next order correction in
 monomial/ndratcor_monomial.c for heatbath.

---
 monomial/ndratcor_monomial.c | 48 +++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/monomial/ndratcor_monomial.c b/monomial/ndratcor_monomial.c
index 3cf786187..ddff93ac3 100644
--- a/monomial/ndratcor_monomial.c
+++ b/monomial/ndratcor_monomial.c
@@ -67,7 +67,7 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
   monomial * mnl = &monomial_list[id];
   solver_pm_t solver_pm;
   double atime, etime, delta;
-  spinor * up0, * dn0, * up1, * dn1, * tup, * tdn;
+  spinor * up0, * dn0, * up1, * dn1, * tup, * tdn, * Zup, * Zdn;
   double coefs[6] = {1./4., -3./32., 7./122., -77./2048., 231./8192., -1463./65536.};
   atime = gettime();
   nd_set_global_parameter(mnl);
@@ -108,23 +108,41 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
   solver_pm.rel_prec = g_relative_precision_flag;
 
   // apply B to the random field to generate pseudo-fermion fields
-  assign(mnl->w_fields[0], mnl->pf, VOLUME/2);
-  assign(mnl->w_fields[1], mnl->pf2, VOLUME/2);
   up0 = mnl->w_fields[0]; dn0 = mnl->w_fields[1];
   up1 = mnl->w_fields[2]; dn1 = mnl->w_fields[3];
-	 
-  for(int i = 1; i < 8; i++) {
-    apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &solver_pm);
+  Zup = mnl->w_fields[4]; Zdn = mnl->w_fields[5];
+
+  apply_Z_ndpsi(up0, dn0, mnl->pf, mnl->pf2, id, hf, &solver_pm);
+  delta = coefs[0]*(scalar_prod_r(mnl->pf, up0, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn0, VOLUME/2, 1));
+  if(g_debug_level > 2 && g_proc_id == 0)
+    printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", 1, 1, delta);
+
+  if(delta*delta > mnl->accprec) {
+    assign_add_mul_r(mnl->pf, up0, coefs[0], VOLUME/2);
+    assign_add_mul_r(mnl->pf2, dn0, coefs[0], VOLUME/2);
     
-    delta = coefs[i-1]*(scalar_prod_r(mnl->pf, up1, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn1, VOLUME/2, 1));
-    if(g_debug_level > 2 && g_proc_id == 0)
-      printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", i, i, delta);
-    assign_add_mul_r(mnl->pf, up1, coefs[i-1], VOLUME/2);
-    assign_add_mul_r(mnl->pf2, dn1, coefs[i-1], VOLUME/2);
-    if(delta < mnl->accprec) break;
-    tup = up0; tdn = dn0;
-    up0 = up1; dn0 = dn1;
-    up1 = tup; dn1 = tdn;
+    // saving first application
+    assign(Zup, up0, VOLUME/2);
+    assign(Zdn, dn0, VOLUME/2);
+    
+    
+    for(int i = 2; i < 8; i++) {
+
+      // computing next order correction
+      delta = coefs[i-1]*(scalar_prod_r(Zup, up0, VOLUME/2, 1) + scalar_prod_r(Zup, dn0, VOLUME/2, 1)); 
+      if(g_debug_level > 2 && g_proc_id == 0)
+        printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", i, i, delta);
+      if(delta*delta < mnl->accprec) break;
+
+      apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &solver_pm);
+      
+      assign_add_mul_r(mnl->pf, up1, coefs[i-1], VOLUME/2);
+      assign_add_mul_r(mnl->pf2, dn1, coefs[i-1], VOLUME/2);
+
+      tup = up0; tdn = dn0;
+      up0 = up1; dn0 = dn1;
+      up1 = tup; dn1 = tdn;
+    }
   }
   etime = gettime();
   if(g_proc_id == 0) {

From 6be1fc9e67d0cbf801ca0fb69d9ea8eacdcd1d1b Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 27 Jan 2017 10:53:53 +0200
Subject: [PATCH 09/85] Improved computation of heatbath with MG. TESTED.

---
 DDalphaAMG_interface.c     | 74 ++++++++++++++++++++++++++++++--------
 monomial/ndrat_monomial.c  | 62 +++++++++++++++++++++-----------
 operator/tm_operators_nd.c | 57 +++++++++++++++++++++++++++++
 operator/tm_operators_nd.h | 10 ++++++
 4 files changed, 168 insertions(+), 35 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 18680465c..dd7035b88 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -443,7 +443,7 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
 	    f == Q_minus_psi ) {  // Gamma5 - Full operator    with minus mu
     mul_gamma5((spinor *const) old, VOLUME);
     DDalphaAMG_solve( new, old, precision, &mg_status );
-    if( N == VOLUME )
+    if( N == VOLUME ) // in case of VOLUME/2 old is a just local vector
       mul_gamma5((spinor *const) old, VOLUME);
   }
   else if ( f == Qtm_pm_psi ||    //          Schur complement squared
@@ -519,6 +519,10 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
 	    f == Qsw_ndpsi ||           //  Gamma5 Dh    - Schur complement
 	    f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar and csw = 0
 	    f == Qsw_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar
+            f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
+            f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
+            f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
+            f == Qsw_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with minus shift
 	    f == Qtm_pm_ndpsi ||        // (Gamma5 Dh)^2 - Schur complement squared with csw = 0
 	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
 	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh)^2 - Schur complement squared
@@ -538,6 +542,12 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
   if (      f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
 	    f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh)^2 - Schur complement squared with shift
     MG_update_mubar_epsbar( g_mubar, g_epsbar, sqrt(g_shift) );
+  else if ( f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
+            f == Qsw_tau1_ndpsi_add_Ishift )  // Gamma5 Dh tau1 - Schur complement with plus shift
+    MG_update_mubar_epsbar( g_mubar, g_epsbar, sqrt(g_shift) );
+  else if ( f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
+            f == Qsw_tau1_ndpsi_sub_Ishift )  // Gamma5 Dh tau1 - Schur complement with minus shift
+    MG_update_mubar_epsbar( g_mubar, g_epsbar, -sqrt(g_shift) );
   else if ( f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar csw = 0
 	    f == Qsw_dagger_ndpsi )     //  Gamma5 Dh    - Schur complement with mu = -mubar
     MG_update_mubar_epsbar( -g_mubar, g_epsbar, 0 );
@@ -552,11 +562,24 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
     mul_gamma5((spinor *const) old1, VOLUME);
     mul_gamma5((spinor *const) old2, VOLUME);
     DDalphaAMG_solve_doublet( new1, old1, new2, old2, precision, &mg_status );
-    if( N == VOLUME ) {
+    if( N == VOLUME ) { // in case of VOLUME/2 old is a just local vector
       mul_gamma5((spinor *const) old1, VOLUME);
       mul_gamma5((spinor *const) old2, VOLUME);
     }
   }
+  else if ( f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
+            f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
+            f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
+            f == Qsw_tau1_ndpsi_sub_Ishift ) {// Gamma5 Dh tau1 - Schur complement with minus shift
+    mul_gamma5((spinor *const) old1, VOLUME);
+    mul_gamma5((spinor *const) old2, VOLUME);
+    // tau1 exchange new1 <-> new2
+    DDalphaAMG_solve_doublet( new2, old1, new1, old2, precision, &mg_status );
+    if( N == VOLUME ) { // in case of VOLUME/2 old is a just local vector
+      mul_gamma5((spinor *const) old1, VOLUME);
+      mul_gamma5((spinor *const) old2, VOLUME);
+    }
+  }	    
   else if ( f == Qtm_pm_ndpsi ||        // (Gamma5 Dh)^2 - Schur complement squared with csw = 0
 	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
 	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh)^2 - Schur complement squared
@@ -633,13 +656,12 @@ static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new,
     }
   }
 
-  MG_update_mubar_epsbar( g_mubar, g_epsbar, shifts[0] );
-  for( int i = 0; i < no_shifts; i++ ) {
-    mg_odd_shifts[i]  = shifts[i]*mg_scale;
-    mg_even_shifts[i] = 0;
-  }
   // Checking if the operator is in the list and compatible with N
-  if (	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
+  if (	    f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
+            f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
+            f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
+            f == Qsw_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with minus shift
+            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
 	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh)^2 - Schur complement squared with shift
     if( N != VOLUME/2 ) {
       if( g_proc_id == 0 )
@@ -650,20 +672,42 @@ static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new,
     printf("WARNING: required operator unknown for MG_solve. Using standard operator: %s.\n",
 	   N==VOLUME?"":"Qsw_pm_ndpsi_shift");
 
+  // Setting mubar, epsbar and shifts
+  if (	    f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
+            f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
+            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh)^2 - Schur complement squared with shift
+    MG_update_mubar_epsbar( g_mubar, g_epsbar, shifts[0] );
+    for( int i = 0; i < no_shifts; i++ ) {
+      mg_odd_shifts[i]  = shifts[i]*mg_scale;
+      mg_even_shifts[i] = 0;
+    }
+  }
+  else if ( f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
+            f == Qsw_tau1_ndpsi_sub_Ishift ) {// Gamma5 Dh tau1 - Schur complement with minus shift
+    MG_update_mubar_epsbar( g_mubar, g_epsbar, -shifts[0] );
+    for( int i = 0; i < no_shifts; i++ ) {
+      mg_odd_shifts[i]  = -shifts[i]*mg_scale;
+      mg_even_shifts[i] = 0;
+    }
+  }
+
   //Solving
-  /* TODO: Qtm_ndpsi_shift, Qsw_ndpsi_shift
-  if (      f == Qtm_ndpsi_shift ||     //  Gamma5 Dh    - Schur complement with csw = 0 and shift
-	    f == Qsw_ndpsi_shift ) {    //  Gamma5 Dh    - Schur complement with shift
+  if (      f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
+            f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
+            f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
+            f == Qsw_tau1_ndpsi_sub_Ishift ) {// Gamma5 Dh tau1 - Schur complement with minus shift
     mul_gamma5((spinor *const) old1, VOLUME);
     mul_gamma5((spinor *const) old2, VOLUME);
-    DDalphaAMG_solve_ms_doublet( new1, old1, new2, old2, mg_even_shifts, mg_odd_shifts, no_shifts, 
+    // tau1 exchange new1 <-> new2
+    DDalphaAMG_solve_ms_doublet( new2, old1, new1, old2, mg_even_shifts, mg_odd_shifts, no_shifts, 
                                  precision, &mg_status );
-    if( N == VOLUME ) {
+    if( N == VOLUME ) { // in case of VOLUME/2 old is a just local vector
       mul_gamma5((spinor *const) old1, VOLUME);
       mul_gamma5((spinor *const) old2, VOLUME);
     }
-  }
- else*/if ( f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
+  }	    
+  else if ( f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
 	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh)^2 - Schur complement squared with shift
     mg_scale *= mg_scale;
     // DDalphaAMG: tau1 gamma5 Dh tau1 gamma5 Dh
diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c
index 81cc33b27..daf47ef7c 100644
--- a/monomial/ndrat_monomial.c
+++ b/monomial/ndrat_monomial.c
@@ -231,30 +231,52 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
-  mnl->iter0 = solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
-                   		      mnl->pf, mnl->pf2, &solver_pm);
 
-  assign(mnl->w_fields[2], mnl->pf, VOLUME/2);
-  assign(mnl->w_fields[3], mnl->pf2, VOLUME/2);
-
-  // apply C to the random field to generate pseudo-fermion fields
-  for(int j = (mnl->rat.np-1); j > -1; j--) {
-    // Q_h * tau^1 - i nu_j
-    // this needs phmc_Cpol = 1 to work!
-    if(mnl->type == NDCLOVERRAT) {
-      Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
-			       g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-			       I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
+  if( mnl->solver == MGMMSND ){
+    // With MG we can solve directly the unsquared operator
+    solver_pm.M_ndpsi = &Qtm_tau1_ndpsi_add_Ishift;
+    if(mnl->type == NDCLOVERRAT)
+      solver_pm.M_ndpsi = &Qsw_tau1_ndpsi_add_Ishift;
+    
+    mnl->iter0 = MG_mms_solver_nd( g_chi_up_spinor_field, g_chi_dn_spinor_field, mnl->pf, mnl->pf2, 
+                                   solver_pm.shifts, solver_pm.no_shifts,solver_pm.squared_solver_prec, 
+                                   solver_pm.max_iter, solver_pm.rel_prec, solver_pm.sdim, g_gauge_field, 
+                                   solver_pm.M_ndpsi );
+
+    assign(mnl->w_fields[2], mnl->pf, VOLUME/2);
+    assign(mnl->w_fields[3], mnl->pf2, VOLUME/2);
+
+    // apply C to the random field to generate pseudo-fermion fields
+    for(int j = (mnl->rat.np-1); j > -1; j--) {
+      assign_add_mul(mnl->pf, g_chi_up_spinor_field[j], I*mnl->rat.rnu[j], VOLUME/2);
+      assign_add_mul(mnl->pf2, g_chi_dn_spinor_field[j], I*mnl->rat.rnu[j], VOLUME/2);
     }
-    else {
-      Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
-			     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-			     I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
+
+  } else {
+    mnl->iter0 = solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
+                              mnl->pf, mnl->pf2, &solver_pm);
+    
+    assign(mnl->w_fields[2], mnl->pf, VOLUME/2);
+    assign(mnl->w_fields[3], mnl->pf2, VOLUME/2);
+    
+    // apply C to the random field to generate pseudo-fermion fields
+    for(int j = (mnl->rat.np-1); j > -1; j--) {
+      // Q_h * tau^1 - i nu_j
+      // this needs phmc_Cpol = 1 to work!
+      if(mnl->type == NDCLOVERRAT) {
+        Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
+                                 g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+                                 I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
+      }
+      else {
+        Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
+                               g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+                               I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
+      }
+      assign_add_mul(mnl->pf, g_chi_up_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
+      assign_add_mul(mnl->pf2, g_chi_dn_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
     }
-    assign_add_mul(mnl->pf, g_chi_up_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
-    assign_add_mul(mnl->pf2, g_chi_dn_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
   }
-
   etime = gettime();
   if(g_proc_id == 0) {
     if(g_debug_level > 1) {
diff --git a/operator/tm_operators_nd.c b/operator/tm_operators_nd.c
index ad51aa9c9..f1af4acbe 100644
--- a/operator/tm_operators_nd.c
+++ b/operator/tm_operators_nd.c
@@ -110,6 +110,63 @@ void Qsw_ndpsi(spinor * const l_strange, spinor * const l_charm,
   return;
 }
 
+/******************************************
+ *
+ * This is the implementation of 
+ *
+ *  Q_tau1_ndpsi_add/sub_Ishift =  ( M +/- I z_k )
+ *
+ *  with M = Qhat(2x2) tau_1   and z_k from sqrt(g_shift) 
+ *
+ *
+ *  needed in the evaluation of the heatbath when 
+ *  the Rational approximation is used
+ *
+ *
+ * For details, see documentation and comments of the
+ * above mentioned routines
+ *
+ * k_charm and k_strange are the input fields
+ * l_* the output fields
+ *
+ * it acts only on the odd part or only
+ * on a half spinor
+ ******************************************/
+
+
+void Qtm_tau1_ndpsi_add_Ishift(spinor * const l_strange, spinor * const l_charm,
+                               spinor * const k_strange, spinor * const k_charm) {
+
+  Q_tau1_sub_const_ndpsi(l_strange,l_charm,k_strange,k_charm,-I*sqrt(g_shift),1.,phmc_invmaxev);
+
+  return;
+}
+
+void Qtm_tau1_ndpsi_sub_Ishift(spinor * const l_strange, spinor * const l_charm,
+                               spinor * const k_strange, spinor * const k_charm) {
+
+  Q_tau1_sub_const_ndpsi(l_strange,l_charm,k_strange,k_charm, I*sqrt(g_shift),1.,phmc_invmaxev);
+
+  return;
+}
+
+void Qsw_tau1_ndpsi_add_Ishift(spinor * const l_strange, spinor * const l_charm,
+                               spinor * const k_strange, spinor * const k_charm) {
+
+  Qsw_tau1_sub_const_ndpsi(l_strange,l_charm,k_strange,k_charm,-I*sqrt(g_shift),1.,phmc_invmaxev);
+
+  return;
+}
+
+void Qsw_tau1_ndpsi_sub_Ishift(spinor * const l_strange, spinor * const l_charm,
+                               spinor * const k_strange, spinor * const k_charm) {
+
+  Qsw_tau1_sub_const_ndpsi(l_strange,l_charm,k_strange,k_charm, I*sqrt(g_shift),1.,phmc_invmaxev);
+
+  return;
+}
+
+
 /******************************************
  *
  * This is the implementation of
diff --git a/operator/tm_operators_nd.h b/operator/tm_operators_nd.h
index 79aefb4f5..48a7f81db 100644
--- a/operator/tm_operators_nd.h
+++ b/operator/tm_operators_nd.h
@@ -31,6 +31,16 @@ void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 void Qsw_ndpsi(spinor * const l_strange, spinor * const l_charm,
          spinor * const k_strange, spinor * const k_charm);
 
+void Qtm_tau1_ndpsi_add_Ishift(spinor * const l_strange, spinor * const l_charm,
+                               spinor * const k_strange,  spinor * const k_charm);
+void Qtm_tau1_ndpsi_sub_Ishift(spinor * const l_strange, spinor * const l_charm,
+                               spinor * const k_strange,  spinor * const k_charm);
+void Qsw_tau1_ndpsi_add_Ishift(spinor * const l_strange, spinor * const l_charm,
+                               spinor * const k_strange,  spinor * const k_charm);
+void Qsw_tau1_ndpsi_sub_Ishift(spinor * const l_strange, spinor * const l_charm,
+                               spinor * const k_strange,  spinor * const k_charm);
+
+
 void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
           spinor * const k_strange, spinor * const k_charm);
 void Qsw_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,

From c1b8bdb61d36af20b1ae58433a43107b60b6f688 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 27 Jan 2017 12:04:57 +0200
Subject: [PATCH 10/85] Making DDalphaAMG working for inverting doublet
 operator within invert executable.

---
 DDalphaAMG_interface.c          | 45 +++++++++++++++++++--
 DDalphaAMG_interface.h          |  7 ++++
 invert_doublet_eo.c             | 21 ++++++++++
 monomial/ndrat_monomial.c       |  9 ++++-
 operator.c                      | 31 +++++----------
 operator/tm_operators_nd.c      | 69 +++++++++++++++++++++++++++++++++
 operator/tm_operators_nd.h      | 14 +++++++
 read_input.l                    | 10 +++++
 solver/matrix_mult_typedef_nd.h |  1 +
 9 files changed, 181 insertions(+), 26 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index dd7035b88..3778c22e7 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -497,8 +497,7 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
   double *new2 = (double*) dn_new;
   spinor ** solver_field = NULL;
   
-  //  if( N != VOLUME && N != VOLUME/2 ) {
-  if( N != VOLUME/2 ) { // no full VOLUME functions implemented at the moment 
+  if( N != VOLUME && N != VOLUME/2 ) {
     if( g_proc_id == 0 )
       printf("ERROR: N = %d in MG_solve. Expettected N == VOLUME (%d) or VOLUME/2 (%d)\n", N, VOLUME, VOLUME/2);
     return 0;
@@ -530,7 +529,7 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
     if( N != VOLUME/2 && g_proc_id == 0 )
       printf("WARNING: expected N == VOLUME/2 for the required operator in MG_solve. Continuing with N == VOLUME\n");
   }
-  else if ( 0 ) {                       // No full operator for nd
+  else if ( f == D_ndpsi ) {            //  Dh
     if( N != VOLUME && g_proc_id == 0 )
       printf("WARNING: expected N == VOLUME for the required operator in MG_solve. Continuing with N == VOLUME/2\n");
   }
@@ -551,6 +550,8 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
   else if ( f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar csw = 0
 	    f == Qsw_dagger_ndpsi )     //  Gamma5 Dh    - Schur complement with mu = -mubar
     MG_update_mubar_epsbar( -g_mubar, g_epsbar, 0 );
+  else if ( f == D_ndpsi )              //  Dh
+    MG_update_mubar_epsbar( g_mubar, g_epsbar, 0 );
   else
     MG_update_mubar_epsbar( g_mubar, g_epsbar, 0 );
   
@@ -589,6 +590,8 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
     // tmLQCD:          gamma5 Dh tau1 gamma5 Dh tau1
     DDalphaAMG_solve_doublet_squared_odd( new2, old2, new1, old1, precision, &mg_status );
   }
+  else if ( f == D_ndpsi )              //  Dh
+    DDalphaAMG_solve_doublet( new1, old1, new2, old2, precision, &mg_status );
   else
     DDalphaAMG_solve_doublet( new1, old1, new2, old2, precision, &mg_status );
   
@@ -1046,6 +1049,42 @@ int MG_solver_nd(spinor * const up_new, spinor * const dn_new,
   return mg_status.iter_count;
 }
 
+int MG_solver_nd_eo(spinor * const Even_new_up, spinor * const Odd_new_up, 
+                    spinor * const Even_new_dn, spinor * const Odd_new_dn,
+                    spinor * const Even_up, spinor * const Odd_up,
+                    spinor * const Even_dn, spinor * const Odd_dn,
+                    const double precision, const int max_iter, const int rel_prec,
+                    const int N, su3 **gf, matrix_mult_full_nd f_full)
+{
+  
+  int iter_count;
+  spinor ** solver_field = NULL;
+  matrix_mult_nd f;
+  
+  init_solver_field(&solver_field, VOLUMEPLUSRAND, 4);
+  convert_eo_to_lexic(solver_field[0], Even_up, Odd_up);
+  convert_eo_to_lexic(solver_field[1], Even_dn, Odd_dn);
+  
+  if (f_full == M_full_ndpsi)
+    f=&D_ndpsi;
+  else if (f_full == Msw_full_ndpsi)
+    f=&D_ndpsi;
+  else {
+    f=&D_ndpsi;
+    if( g_proc_id == 0 )
+      printf("WARNING: required operator unknown for MG_solver_eo. Using standard operator.\n");
+  }
+
+  iter_count = MG_solver_nd( solver_field[2], solver_field[3], solver_field[0], solver_field[1], precision, max_iter,
+                             rel_prec, VOLUME, gf, f );
+  
+  convert_lexic_to_eo(Even_new_up, Odd_new_up, solver_field[2]);
+  convert_lexic_to_eo(Even_new_dn, Odd_new_dn, solver_field[3]);
+  finalize_solver(solver_field, 4);
+  
+  return iter_count;
+}
+
 int MG_mms_solver_nd(spinor **const up_new, spinor **const dn_new,
                      spinor * const up_old, spinor * const dn_old,
                      const double * shifts, const int no_shifts,
diff --git a/DDalphaAMG_interface.h b/DDalphaAMG_interface.h
index b9478088c..4e831f974 100644
--- a/DDalphaAMG_interface.h
+++ b/DDalphaAMG_interface.h
@@ -64,6 +64,13 @@ int MG_solver_nd(spinor * const up_new, spinor * const dn_new,
 		 const double precision, const int max_iter, const int rel_prec,
 		 const int N, su3 **gf, matrix_mult_nd f);
 
+int MG_solver_nd_eo(spinor * const Even_new_up, spinor * const Odd_new_up, 
+                    spinor * const Even_new_dn, spinor * const Odd_new_dn,
+                    spinor * const Even_up, spinor * const Odd_up,
+                    spinor * const Even_dn, spinor * const Odd_dn,
+                    const double precision, const int max_iter, const int rel_prec,
+                    const int N, su3 **gf, matrix_mult_full_nd f_full);
+
 int MG_mms_solver_nd(spinor **const up_new, spinor **const dn_new,
                      spinor * const up_old, spinor * const dn_old,
                      const double * shifts, const int no_shifts,
diff --git a/invert_doublet_eo.c b/invert_doublet_eo.c
index 6cf7a8efd..b8fac4ce9 100644
--- a/invert_doublet_eo.c
+++ b/invert_doublet_eo.c
@@ -50,6 +50,9 @@
 #ifdef QUDA
 #  include "quda_interface.h"
 #endif
+#ifdef DDalphaAMG
+#  include "DDalphaAMG_interface.h"
+#endif
 
 
 #ifdef HAVE_GPU
@@ -84,6 +87,15 @@ int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
                                    sloppy, compression );
   }
 #endif
+
+#ifdef DDalphaAMG
+  if( solver_flag==MG ) {
+    return MG_solver_nd_eo( Even_new_s, Odd_new_s, Even_new_c, Odd_new_c,
+                            Even_s, Odd_s, Even_c, Odd_c,
+                            precision, max_iter, rel_prec,
+                            VOLUME/2, g_gauge_field, M_full_ndpsi );
+  }
+#endif
   
 #ifdef HAVE_GPU
 #  ifdef TEMPORALGAUGE
@@ -201,6 +213,15 @@ int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
                                    sloppy, compression );
   }
 #endif
+
+#ifdef DDalphaAMG
+  if( solver_flag==MG ) {
+    return MG_solver_nd_eo( Even_new_s, Odd_new_s, Even_new_c, Odd_new_c,
+                            Even_s, Odd_s, Even_c, Odd_c,
+                            precision, max_iter, rel_prec,
+                            VOLUME/2, g_gauge_field, Msw_full_ndpsi );
+  }
+#endif
   
   /* here comes the inversion using even/odd preconditioning */
   if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);}
diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c
index daf47ef7c..9b95a46ab 100644
--- a/monomial/ndrat_monomial.c
+++ b/monomial/ndrat_monomial.c
@@ -47,6 +47,9 @@
 #include "rational/rational.h"
 #include "phmc.h"
 #include "ndrat_monomial.h"
+#ifdef DDalphaAMG
+#  include "DDalphaAMG_interface.h"
+#endif
 
 void nd_set_global_parameter(monomial * const mnl) {
 
@@ -232,6 +235,7 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) {
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
 
+#ifdef DDalphaAMG
   if( mnl->solver == MGMMSND ){
     // With MG we can solve directly the unsquared operator
     solver_pm.M_ndpsi = &Qtm_tau1_ndpsi_add_Ishift;
@@ -251,8 +255,9 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) {
       assign_add_mul(mnl->pf, g_chi_up_spinor_field[j], I*mnl->rat.rnu[j], VOLUME/2);
       assign_add_mul(mnl->pf2, g_chi_dn_spinor_field[j], I*mnl->rat.rnu[j], VOLUME/2);
     }
-
-  } else {
+  } else 
+#endif
+    {
     mnl->iter0 = solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
                               mnl->pf, mnl->pf2, &solver_pm);
     
diff --git a/operator.c b/operator.c
index d9a4e14fe..6e90445b3 100644
--- a/operator.c
+++ b/operator.c
@@ -422,6 +422,10 @@ void op_invert(const int op_id, const int index_start, const int write_prop) {
                                               optr->solver, optr->rel_prec,
                                               optr->solver_params, optr->external_inverter, 
                                               optr->sloppy_precision, optr->compression_type);
+        // checking solution
+        M_full_ndpsi( g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2],
+                      g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4],
+                      optr->prop0, optr->prop1, optr->prop2, optr->prop3 );
       }
       else {
         optr->iterations = invert_cloverdoublet_eo( optr->prop0, optr->prop1, optr->prop2, optr->prop3,
@@ -430,27 +434,12 @@ void op_invert(const int op_id, const int index_start, const int write_prop) {
                                                     optr->solver, optr->rel_prec,
                                                     optr->solver_params, optr->external_inverter, 
                                                     optr->sloppy_precision, optr->compression_type);
+        // checking solution
+        Msw_full_ndpsi( g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2],
+                        g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4],
+                        optr->prop0, optr->prop1, optr->prop2, optr->prop3 );
       }
-      g_mu = optr->mubar;
-      if(optr->type != DBCLOVER) {
-        M_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1);
-      }
-      else {
-        Msw_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1);
-      }
-      assign_add_mul_r(g_spinor_field[DUM_DERI+1], optr->prop2, -optr->epsbar, VOLUME/2);
-      assign_add_mul_r(g_spinor_field[DUM_DERI+2], optr->prop3, -optr->epsbar, VOLUME/2);
-    
-      g_mu = -g_mu;
-      if(optr->type != DBCLOVER) {
-        M_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3);
-      }
-      else {
-        Msw_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3);
-      }
-      assign_add_mul_r(g_spinor_field[DUM_DERI+3], optr->prop0, -optr->epsbar, VOLUME/2);
-      assign_add_mul_r(g_spinor_field[DUM_DERI+4], optr->prop1, -optr->epsbar, VOLUME/2);
-
+ 
       diff(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], optr->sr0, VOLUME/2); 
       diff(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], optr->sr1, VOLUME/2); 
       diff(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+3], optr->sr2, VOLUME/2); 
@@ -461,7 +450,7 @@ void op_invert(const int op_id, const int index_start, const int write_prop) {
       nrm1 += square_norm(g_spinor_field[DUM_DERI+3], VOLUME/2, 1); 
       nrm1 += square_norm(g_spinor_field[DUM_DERI+4], VOLUME/2, 1); 
       optr->reached_prec = nrm1;
-      g_mu = g_mu1;
+
       /* For standard normalisation */
       /* we have to mult. by 2*kappa */
       mul_r(g_spinor_field[DUM_DERI], (2*optr->kappa), optr->prop0, VOLUME/2);
diff --git a/operator/tm_operators_nd.c b/operator/tm_operators_nd.c
index f1af4acbe..73ba9132e 100644
--- a/operator/tm_operators_nd.c
+++ b/operator/tm_operators_nd.c
@@ -37,6 +37,7 @@
 #include "phmc.h"
 #include "gamma.h"
 #include "linalg_eo.h"
+#include "operator/D_psi.h"
 #include "operator/tm_operators.h"
 #include "operator/clovertm_operators.h"
 #include "operator/tm_operators_nd.h"
@@ -52,6 +53,74 @@ void M_oo_sub_g5_ndpsi(spinor * const l_s, spinor * const l_c,
 
 /* external functions */
 
+
+/******************************************
+ *
+ * This is the implementation of
+ *
+ *  M_full_ndpsi = D_w I_f + i gamma5 mubar tau3 - epsbar tau1
+ *  the full operator done for testing purpose
+ ******************************************/
+void M_full_ndpsi(spinor * const Even_new_s, spinor * const Odd_new_s, 
+                  spinor * const Even_new_c, spinor * const Odd_new_c, 
+                  spinor * const Even_s, spinor * const Odd_s,
+                  spinor * const Even_c, spinor * const Odd_c) {
+  
+  double mu = g_mu;
+  g_mu = g_mubar;
+  M_full(Even_new_s, Odd_new_s, Even_s, Odd_s);
+
+  assign_add_mul_r(Even_new_s, Even_c, -g_epsbar, VOLUME/2);
+  assign_add_mul_r(Odd_new_s, Odd_c, -g_epsbar, VOLUME/2);
+  
+  g_mu = -g_mu;
+  M_full(Even_new_c, Odd_new_c, Even_c, Odd_c);
+  
+  assign_add_mul_r(Even_new_c, Even_s, -g_epsbar, VOLUME/2);
+  assign_add_mul_r(Odd_new_c, Odd_s, -g_epsbar, VOLUME/2);
+
+  g_mu = mu;
+}
+
+void Msw_full_ndpsi(spinor * const Even_new_s, spinor * const Odd_new_s, 
+                    spinor * const Even_new_c, spinor * const Odd_new_c, 
+                    spinor * const Even_s, spinor * const Odd_s,
+                    spinor * const Even_c, spinor * const Odd_c) {
+
+  double mu = g_mu;
+  g_mu = g_mubar;
+  Msw_full(Even_new_s, Odd_new_s, Even_s, Odd_s);
+
+  assign_add_mul_r(Even_new_s, Even_c, -g_epsbar, VOLUME/2);
+  assign_add_mul_r(Odd_new_s, Odd_c, -g_epsbar, VOLUME/2);
+  
+  g_mu = -g_mu;
+  Msw_full(Even_new_c, Odd_new_c, Even_c, Odd_c);
+  
+  assign_add_mul_r(Even_new_c, Even_s, -g_epsbar, VOLUME/2);
+  assign_add_mul_r(Odd_new_c, Odd_s, -g_epsbar, VOLUME/2);
+
+  g_mu = mu;
+}
+
+// full VOLUME operator; it used D_psi which works with tm and tm+clover
+void D_ndpsi(spinor * const l_strange, spinor * const l_charm,
+             spinor * const k_strange, spinor * const k_charm) {
+
+  double mu = g_mu;
+  g_mu = g_mubar;
+  D_psi(l_strange,k_strange);
+
+  assign_add_mul_r(l_strange, k_charm, -g_epsbar, VOLUME);
+  
+  g_mu = -g_mu;
+  D_psi(l_charm,k_charm);
+  
+  assign_add_mul_r(l_charm, k_strange, -g_epsbar, VOLUME);
+
+  g_mu = mu;
+}
+
 /******************************************
  *
  * This is the implementation of
diff --git a/operator/tm_operators_nd.h b/operator/tm_operators_nd.h
index 48a7f81db..e7a9dc755 100644
--- a/operator/tm_operators_nd.h
+++ b/operator/tm_operators_nd.h
@@ -26,6 +26,20 @@ void mul_one_pm_itau2(spinor * const p, spinor * const q,
           spinor * const r, spinor * const s,
           const double sign, const int N);
 
+void M_full_ndpsi(spinor * const Even_new_s, spinor * const Odd_new_s, 
+                  spinor * const Even_new_c, spinor * const Odd_new_c, 
+                  spinor * const Even_s, spinor * const Odd_s,
+                  spinor * const Even_c, spinor * const Odd_c);
+
+void Msw_full_ndpsi(spinor * const Even_new_s, spinor * const Odd_new_s, 
+                    spinor * const Even_new_c, spinor * const Odd_new_c, 
+                    spinor * const Even_s, spinor * const Odd_s,
+                    spinor * const Even_c, spinor * const Odd_c);
+
+//This works with tm and tm+clover 
+void D_ndpsi(spinor * const l_strange, spinor * const l_charm,
+             spinor * const k_strange,  spinor * const k_charm);
+
 void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
          spinor * const k_strange,  spinor * const k_charm);
 void Qsw_ndpsi(spinor * const l_strange, spinor * const l_charm,
diff --git a/read_input.l b/read_input.l
index a50180fff..7616c3770 100644
--- a/read_input.l
+++ b/read_input.l
@@ -1095,6 +1095,16 @@ static inline void rmQuotes(char *str){
     if(myverbose) printf("  Solver set to RGMixedCG line %d operator %d\n", line_of_file, current_operator);
     BEGIN(name_caller);
   }
+  DDalphaAMG {
+#ifdef DDalphaAMG
+    optr->solver = MG;
+    if(myverbose) printf("  Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator);
+    BEGIN(name_caller);
+#else
+    printf("ERROR line %d operator %d: DDalphaAMG library not included\n", line_of_file, current_operator);
+    exit(1);
+#endif
+  }
 }
 
 <TMSOLVER>{
diff --git a/solver/matrix_mult_typedef_nd.h b/solver/matrix_mult_typedef_nd.h
index b9d8b814b..ce298c946 100644
--- a/solver/matrix_mult_typedef_nd.h
+++ b/solver/matrix_mult_typedef_nd.h
@@ -29,6 +29,7 @@
 #define _MATRIX_MULT_TYPEDEF_ND_H
 
 typedef void (*matrix_mult_nd)(spinor * const, spinor * const,spinor * const, spinor * const);
+typedef void (*matrix_mult_full_nd)(spinor * const, spinor * const,spinor * const, spinor * const,spinor * const, spinor * const,spinor * const, spinor * const);
 typedef void (*matrix_mult_nd32)(spinor32 * const, spinor32 * const, spinor32 * const, spinor32 * const);
 
 #endif

From 7edd999f084b772f4871109265bacb65ae6a8022 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 7 Feb 2017 11:57:25 +0200
Subject: [PATCH 11/85] Fixed checking for ndratcor_monomial heatbath. Added
 documentation

---
 doc/rational.tex             | 47 ++++++++++++++++++++-
 monomial/ndratcor_monomial.c | 81 ++++++++++++++++++++----------------
 2 files changed, 89 insertions(+), 39 deletions(-)

diff --git a/doc/rational.tex b/doc/rational.tex
index 678041ee4..fd1f51a2c 100644
--- a/doc/rational.tex
+++ b/doc/rational.tex
@@ -117,13 +117,13 @@ \subsubsection{Correction Monomial}
 The rational approximation has a finite precision. In the HMC one can
 account for this effect by estimating
 \[
-1 - |\hat Q_h| R\,,
+1 - |\hat Q_h| \mathcal{R}\,,
 \]
 which can be done in different ways:
 \begin{itemize}
 \item we include an additional monomial for
   \[
-  \det (|\hat Q_h| R)
+  \det (|\hat Q_h| \mathcal{R})
   \]
   in the Hamiltonian. If the rational apprximation is precise enough,
   it is sufficient to only include this in the heatbath and acceptance
@@ -159,6 +159,49 @@ \subsubsection{Correction Monomial}
   \frac{3}{8}Z^2 - \frac{5}{16}Z^3 + ...) \phi\, .
   \]
   Also here the series can be truncated after the first few terms.
+  Since the correction monomial is not used in the force computation of MD,
+  its final purpose for the HMC is to compute the energy difference
+  \[
+  dH_{corr} = R^\dagger \left(1-(1+Z_{old})^{1/4}(1+Z_{new})^{-1/2}(1+Z_{old})^{1/4}\right)R\,.
+  \]
+  Considering $\mathcal{O}(Z_{old}) = \mathcal{O}(Z_{new}) = \mathcal{O}(Z)$ and
+  using the previous series expansions, we obtain
+  \begin{align*}
+  dH_{corr} & =  R^\dagger \left( \frac{1}{2} Z_{old} - \frac{1}{2} Z_{new} \right)R\\
+  & + R^\dagger \left( - \frac{1}{8} Z_{old}^2 - \frac{1}{8} \left\{Z_{old} , Z_{new} \right\}  + \frac{3}{8} Z_{new}^2 \right)R\\
+  & + R^\dagger \left( \frac{1}{16} Z_{old}^3 + \frac{3}{64} \left\{ Z_{old}^2 , Z_{new} \right\} - \frac{1}{32} Z_{old} Z_{new} Z_{old} + \frac{3}{32} \left\{ Z_{old} , Z_{new}^2 \right\} - \frac{5}{16} Z_{new}^3 \right)R\\
+  & + \mathcal{O}(Z^4).
+  \end{align*}
+  The coefficients in front of the terms $R^\dagger Z_{old}^n R$ are given by the series of
+  \[
+  (1+Z_{old})^{1/2} -1 =  \frac{1}{2} Z_{old} - \frac{1}{8} Z_{old}^2 + \frac{1}{16} Z_{old}^3 + ...
+  \]
+  For this reason, computing $\phi = B(Z_{old})\cdot R$, we use as a stopping criterium
+  \[
+  c_n R^\dagger Z_{old}^n R < \text{tolerance}
+  \]
+  where $c_n$ are the coefficients from the series of $(1+Z_{old})^{1/2}$.
+  Since $Z$ is hermitian, we can compute in advance the next order correction of the series evaluating
+  \[
+  c_n (R Z_{old})^\dagger\cdot (Z_{old}^{n-1} R) < \text{tolerance}\,;
+  \]
+  in this way we save an application of Z in the evaluation of $\phi = B(Z_{old})\cdot R$.
+  
+  Exploting the hermiticity of $Z$, we can also save applications of it in the computation of
+  \[
+  dH_{corr} = R^\dagger R - \phi^\dagger\left((1+Z_{new})^{-1/2}\right)\phi\,,
+  \]
+  which is done in the acceptance step. Indeed defining $\chi_i = Z_{new}^i\phi$, $dH_{corr}$ reads as
+  \[
+  dH_{corr} = R^\dagger R - \phi^\dagger\phi + \frac{1}{2}\phi^\dagger\chi_1\phi - \frac{3}{8}\chi_1^\dagger\chi_1 + \frac{5}{16} \chi_1^\dagger\chi_2 - ...\,,
+  \]
+  that requires $n$ applications of $Z_{new}$ for computing $dH_{corr}$ up to $\mathcal{O}(Z_{new}^{2n})$.
+  Here we use as stopping criterium,
+  \[
+  c_n \phi^\dagger Z_{new}^{n} \phi < \text{tolerance}\,;
+  \]
+  where $c_n$ are the coefficients from the series of $(1+Z_{new})^{-1/2}$.
+
 \item the second possibility is to include this correction as a
   reweighting factor.
 \item the third is to use a more precise rational approximation for
diff --git a/monomial/ndratcor_monomial.c b/monomial/ndratcor_monomial.c
index e8aa9919b..db2e2b0ef 100644
--- a/monomial/ndratcor_monomial.c
+++ b/monomial/ndratcor_monomial.c
@@ -56,10 +56,10 @@ void check_C_ndpsi(spinor * const k_up, spinor * const k_dn,
 		   solver_pm_t * solver_pm);
 
 // applies (Q^2 R^2 -1) phi
-double apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
-		     spinor * const l_up, spinor * const l_dn,
-		     const int id, hamiltonian_field_t * const hf,
-		     solver_pm_t * solver_pm);
+void apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
+                   spinor * const l_up, spinor * const l_dn,
+                   const int id, hamiltonian_field_t * const hf,
+                   solver_pm_t * solver_pm);
 
 
 
@@ -68,7 +68,8 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
   solver_pm_t solver_pm;
   double atime, etime, delta;
   spinor * up0, * dn0, * up1, * dn1, * tup, * tdn, * Zup, * Zdn;
-  double coefs[6] = {1./4., -3./32., 7./128., -77./2048., 231./8192., -1463./65536.};
+  double coefs[6] = {1./4., -3./32., 7./128., -77./2048., 231./8192., -1463./65536.}; // series of (1+x)^(1/4)
+  double coefs_check[6] = {1./2., -1./8., 1./16., -5./128., 7./256., -21./1024.}; // series of (1+x)^(1/2)
   atime = gettime();
   nd_set_global_parameter(mnl);
   g_mu3 = 0.;
@@ -113,9 +114,18 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
   Zup = mnl->w_fields[4]; Zdn = mnl->w_fields[5];
 
   apply_Z_ndpsi(up0, dn0, mnl->pf, mnl->pf2, id, hf, &solver_pm);
-  delta = coefs[0]*(scalar_prod_r(mnl->pf, up0, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn0, VOLUME/2, 1));
+  // computing correction to energy1
+  delta = coefs_check[0]*(scalar_prod_r(mnl->pf, up0, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn0, VOLUME/2, 1));
   if(g_debug_level > 2 && g_proc_id == 0)
     printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", 1, 1, delta);
+  // debug for showing that the old check was giving a smaller delta
+  if(g_debug_level > 3) {
+    double delta_old = square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1);
+    if(g_proc_id == 0) {
+      printf("# NDRATCOR old check: || Z^%d * R ||^2 = %e\n", 1, delta_old);
+      printf("# NDRATCOR new check: (c_%d*(R * Z^%d * R))^2 = %e\n", 1, 1, delta*delta);
+    }
+  }
 
   if(delta*delta > mnl->accprec) {
     assign_add_mul_r(mnl->pf, up0, coefs[0], VOLUME/2);
@@ -127,11 +137,18 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
     
     
     for(int i = 2; i < 8; i++) {
-
-      // computing next order correction
-      delta = coefs[i-1]*(scalar_prod_r(Zup, up0, VOLUME/2, 1) + scalar_prod_r(Zup, dn0, VOLUME/2, 1)); 
+      // computing next order correction to energy1
+      delta = coefs_check[i-1]*(scalar_prod_r(Zup, up0, VOLUME/2, 1) + scalar_prod_r(Zup, dn0, VOLUME/2, 1)); 
       if(g_debug_level > 2 && g_proc_id == 0)
         printf("# NDRATCOR heatbath: c_%d*(R * Z^%d * R) = %e\n", i, i, delta);
+      // debug for showing that the old check was giving a smaller delta
+      if(g_debug_level > 3) {
+        double delta_old = square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1);
+        if(g_proc_id == 0) {
+          printf("# NDRATCOR old check: || Z^%d * R ||^2 = %e\n", 1, delta_old);
+          printf("# NDRATCOR new check: (c_%d*(R * Z^%d * R))^2 = %e\n", 1, 1, delta*delta);
+        }
+      }
       if(delta*delta < mnl->accprec) break;
 
       apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &solver_pm);
@@ -160,7 +177,7 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
 double ndratcor_acc(const int id, hamiltonian_field_t * const hf) {
   solver_pm_t solver_pm;
   monomial * mnl = &monomial_list[id];
-  double atime, etime, delta_e;
+  double atime, etime, delta;
   spinor * up0, * dn0, * up1, * dn1, * tup, * tdn;
   double coefs[6] = {-1./2., 3./8., -5./16., 35./128., -63./256., 231./1024.};
   atime = gettime();
@@ -192,26 +209,26 @@ double ndratcor_acc(const int id, hamiltonian_field_t * const hf) {
   up1 = mnl->w_fields[2]; dn1 = mnl->w_fields[3];
 
   apply_Z_ndpsi(up0, dn0, mnl->pf, mnl->pf2, id, hf, &solver_pm);
-  delta_e = coefs[0]*(scalar_prod_r(mnl->pf, up0, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn0, VOLUME/2, 1));
-  mnl->energy1 += delta_e;
+  delta = coefs[0]*(scalar_prod_r(mnl->pf, up0, VOLUME/2, 1) + scalar_prod_r(mnl->pf2, dn0, VOLUME/2, 1));
+  mnl->energy1 += delta;
   if(g_debug_level > 2 && g_proc_id == 0)
-    printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", 1, 1, delta_e);
+    printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", 1, 1, delta);
 
   for(int i = 2; i < 8; i++) {
-    if(delta_e*delta_e < mnl->accprec) break;
+    if(delta*delta < mnl->accprec) break;
 
-    delta_e = coefs[i-1]*(square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1)); 
-    mnl->energy1 += delta_e;
+    delta = coefs[i-1]*(square_norm(up0, VOLUME/2, 1) + square_norm(dn0, VOLUME/2, 1)); 
+    mnl->energy1 += delta;
     if(g_debug_level > 2 && g_proc_id == 0)
-      printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", i, i, delta_e);
-    i++;
-    if(delta_e*delta_e < mnl->accprec) break;
+      printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", i, i, delta);
+    i++; //incrementing i
+    if(delta*delta < mnl->accprec) break;
 
     apply_Z_ndpsi(up1, dn1, up0, dn0, id, hf, &solver_pm);
-    delta_e = coefs[i-1]*(scalar_prod_r(up0, up1, VOLUME/2, 1) + scalar_prod_r(dn0, dn1, VOLUME/2, 1));
-    mnl->energy1 += delta_e;
+    delta = coefs[i-1]*(scalar_prod_r(up0, up1, VOLUME/2, 1) + scalar_prod_r(dn0, dn1, VOLUME/2, 1));
+    mnl->energy1 += delta;
     if(g_debug_level > 2 && g_proc_id == 0)
-      printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", i, i, delta_e);
+      printf("# NDRATCOR acc step: c_%d*(phi * Z^%d * phi) = %e\n", i, i, delta);
 
     tup = up0; tdn = dn0;
     up0 = up1; dn0 = dn1;
@@ -232,11 +249,10 @@ double ndratcor_acc(const int id, hamiltonian_field_t * const hf) {
 }
 
 // applies ((Q_h\tau_1 * R)^2 - 1)
-
-double apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
-		     spinor * const l_up, spinor * const l_dn,
-		     const int id, hamiltonian_field_t * const hf,
-		     solver_pm_t * solver_pm) {
+void apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
+                   spinor * const l_up, spinor * const l_dn,
+                   const int id, hamiltonian_field_t * const hf,
+                   solver_pm_t * solver_pm) {
   monomial * mnl = &monomial_list[id];
 
   mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
@@ -271,19 +287,10 @@ double apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
 		     g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np]);
   diff(k_up, k_up, l_up, VOLUME/2);
   diff(k_dn, k_dn, l_dn, VOLUME/2);
-
-  /* TO REMOVE: We don't need this quantity. 
-  double resi = square_norm(k_up, VOLUME/2, 1) + square_norm(k_dn, VOLUME/2, 1);
-  if(g_debug_level > 2 && g_proc_id == 0) {
-    printf("# NDRATCOR: ||Z * phi|| = %e\n", resi);
-  }
-  return(resi);
-  */
-  return 0;
+  
 }
 
 // computes ||(1 - C^dagger R C) phi||
-
 void check_C_ndpsi(spinor * const k_up, spinor * const k_dn,
 		   spinor * const l_up, spinor * const l_dn,
 		   const int id, hamiltonian_field_t * const hf,

From c7d3874b7c4a783dd744e1d7260fdbbb62527d61 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 7 Feb 2017 17:19:23 +0200
Subject: [PATCH 12/85] Added mms_squared_solver_prec to solver_pm_t structure.
 Defined its use in cg_mms_tm and cg_mms_tm_nd.

---
 solver/cg_mms_tm.c      |  26 ++++++-
 solver/cg_mms_tm_nd.c   |  29 +++++--
 solver/monomial_solve.c | 167 ++++++++++++++++++++++------------------
 solver/solver.h         |   2 +
 4 files changed, 140 insertions(+), 84 deletions(-)

diff --git a/solver/cg_mms_tm.c b/solver/cg_mms_tm.c
index 9e88ef743..36ef80bae 100644
--- a/solver/cg_mms_tm.c
+++ b/solver/cg_mms_tm.c
@@ -72,6 +72,16 @@ int cg_mms_tm(spinor ** const P, spinor * const Q,
   double atime, etime;
   const int nr_sf = 3;
 
+  // if solver_pm->mms_squared_solver_prec is NULL,
+  // filling it with solver_pm->squared_solver_prec
+  double *mms_squared_solver_prec = NULL;
+  if (solver_pm->mms_squared_solver_prec == NULL) {
+    mms_squared_solver_prec = (double*) malloc(solver_pm->no_shifts*sizeof(double));
+    for (int i=0; i<solver_pm->no_shifts; i++)
+      mms_squared_solver_prec[i] = solver_pm->squared_solver_prec;
+    solver_pm->mms_squared_solver_prec = mms_squared_solver_prec;
+  }
+
   atime = gettime();
   if(solver_pm->sdim == VOLUME) {
     init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
@@ -144,12 +154,14 @@ int cg_mms_tm(spinor ** const P, spinor * const Q,
       // this is useful for computing time and needed, because otherwise
       // zita might get smaller than DOUBLE_EPS and, hence, zero
       if(iteration > 0 && (iteration % 20 == 0) && (im == no_shifts-1)) {
-	double sn = square_norm(ps_mms_solver[im-1], N, 1);
-	if(alphas[no_shifts-1]*alphas[no_shifts-1]*sn <= solver_pm->squared_solver_prec) {
+	double sn = square_norm(ps_mms_solver[no_shifts-2], N, 1);
+        // while because more than one shift could be converged
+	while(alphas[no_shifts-1]*alphas[no_shifts-1]*sn <= solver_pm->mms_squared_solver_prec[no_shifts-1] && no_shifts>1) {
 	  no_shifts--;
 	  if(g_debug_level > 2 && g_proc_id == 0) {
 	    printf("# CGMMS: at iteration %d removed one shift, %d remaining\n", iteration, no_shifts);
       	  }
+          sn = square_norm(ps_mms_solver[no_shifts-2], N, 1);
 	}
       }
     }
@@ -167,8 +179,8 @@ int cg_mms_tm(spinor ** const P, spinor * const Q,
       printf("# CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
     }
 
-    if( ((err <= solver_pm->squared_solver_prec) && (solver_pm->rel_prec == 0)) ||
-        ((err <= solver_pm->squared_solver_prec*squarenorm) && (solver_pm->rel_prec > 0)) ||
+    if( ((err <= solver_pm->mms_squared_solver_prec[0]) && (solver_pm->rel_prec == 0) && no_shifts==1) ||
+        ((err <= solver_pm->mms_squared_solver_prec[0]*squarenorm) && (solver_pm->rel_prec > 0) && no_shifts==1) ||
         (iteration == solver_pm->max_iter -1) ) {
       /* FIXME temporary output of precision until a better solution can be found */
       *cgmms_reached_prec = err;
@@ -195,6 +207,12 @@ int cg_mms_tm(spinor ** const P, spinor * const Q,
   if(g_debug_level > 0 && g_proc_id == 0) {
     printf("# CGMMS (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_pm->no_shifts, iteration, solver_pm->squared_solver_prec, etime - atime); 
   }
+
+  // freeing mms_squared_solver_prec if it has been allocated
+  if(mms_squared_solver_prec != NULL) {
+    free(mms_squared_solver_prec);
+    solver_pm->mms_squared_solver_prec = NULL;
+  }
   
   finalize_solver(solver_field, nr_sf);
   return(iteration);
diff --git a/solver/cg_mms_tm_nd.c b/solver/cg_mms_tm_nd.c
index 9da378692..2cf0d7d7c 100644
--- a/solver/cg_mms_tm_nd.c
+++ b/solver/cg_mms_tm_nd.c
@@ -72,6 +72,16 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
   double atime, etime;
   const int nr_sf = 4;
 
+  // if solver_pm->mms_squared_solver_prec is NULL,
+  // filling it with solver_pm->squared_solver_prec
+  double *mms_squared_solver_prec = NULL;
+  if (solver_pm->mms_squared_solver_prec == NULL) {
+    mms_squared_solver_prec = (double*) malloc(solver_pm->no_shifts*sizeof(double));
+    for (int i=0; i<solver_pm->no_shifts; i++)
+      mms_squared_solver_prec[i] = solver_pm->squared_solver_prec;
+    solver_pm->mms_squared_solver_prec = mms_squared_solver_prec;
+  }
+
   atime = gettime();
   if(solver_pm->sdim == VOLUME) {
     init_solver_field(&solver_field, VOLUMEPLUSRAND, 2*nr_sf);
@@ -156,13 +166,16 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
       // this is useful for computing time and needed, because otherwise
       // zita might get smaller than DOUBLE_EPS and, hence, zero
       if(iteration > 0 && (iteration % 20 == 0) && (im == shifts-1)) {
-	double sn = square_norm(ps_mms_solver[2*im], N, 1);
-	sn += square_norm(ps_mms_solver[2*im+1], N, 1);
-	if(alphas[shifts-1]*alphas[shifts-1]*sn <= solver_pm->squared_solver_prec) {
+          double sn = square_norm(ps_mms_solver[2*(shifts-1)], N, 1);
+          sn += square_norm(ps_mms_solver[2*(shifts-1)+1], N, 1);
+        // while because more than one shift could be converged
+          while(alphas[shifts-1]*alphas[shifts-1]*sn <= solver_pm->mms_squared_solver_prec[shifts-1] && shifts>1) {
 	  shifts--;
 	  if(g_debug_level > 2 && g_proc_id == 0) {
 	    printf("# CGMMSND: at iteration %d removed one shift, %d remaining\n", iteration, shifts);
 	  }
+          sn = square_norm(ps_mms_solver[2*(shifts-1)], N, 1);
+          sn += square_norm(ps_mms_solver[2*(shifts-1)+1], N, 1);
 	}
       }
     }
@@ -182,8 +195,8 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
       printf("# CGMMSND iteration: %d residue: %g\n", iteration, err); fflush( stdout );
     }
 
-    if( ((err <= solver_pm->squared_solver_prec) && (solver_pm->rel_prec == 0)) ||
-	((err <= solver_pm->squared_solver_prec*squarenorm) && (solver_pm->rel_prec > 0)) ||
+    if( ((err <= solver_pm->mms_squared_solver_prec[0]) && (solver_pm->rel_prec == 0) && shifts==1) ||
+	((err <= solver_pm->mms_squared_solver_prec[0]*squarenorm) && (solver_pm->rel_prec > 0) && shifts==1) ||
         (iteration == solver_pm->max_iter -1) ) {
       break;
     }
@@ -210,6 +223,12 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
   if(g_debug_level > 0 && g_proc_id == 0) {
     printf("# CGMMS (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_pm->no_shifts, iteration, solver_pm->squared_solver_prec, etime - atime); 
   }
+
+  // freeing mms_squared_solver_prec if it has been allocated
+  if(mms_squared_solver_prec != NULL) {
+    free(mms_squared_solver_prec);
+    solver_pm->mms_squared_solver_prec = NULL;
+  }
   
   finalize_solver(solver_field, 2*nr_sf);
   return(iteration);
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 971dddf45..cf1d6b80a 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -137,86 +137,103 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
                  spinor * const Qup, spinor * const Qdn, 
                  solver_pm_t * solver_pm){ 
   int iteration_count = 0; 
-    if(solver_pm->type==MIXEDCGMMSND){
-      if(usegpu_flag){
-  #ifdef HAVE_GPU      
-    #ifdef TEMPORALGAUGE
+
+  // if solver_pm->mms_squared_solver_prec is NULL,
+  // filling it with solver_pm->squared_solver_prec
+  double *mms_squared_solver_prec = NULL;
+  if (solver_pm->mms_squared_solver_prec == NULL) {
+    mms_squared_solver_prec = (double*) malloc(solver_pm->no_shifts*sizeof(double));
+    for (int i=0; i<solver_pm->no_shifts; i++)
+      mms_squared_solver_prec[i] = solver_pm->squared_solver_prec;
+    solver_pm->mms_squared_solver_prec = mms_squared_solver_prec;
+  }
+
+  if(solver_pm->type==MIXEDCGMMSND){
+    if(usegpu_flag){
+    #ifdef HAVE_GPU      
+      #ifdef TEMPORALGAUGE
       to_temporalgauge_mms(g_gauge_field , Qup, Qdn, Pup, Pdn, solver_pm->no_shifts);
-    #endif        
-    iteration_count = dev_cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);  
-    #ifdef TEMPORALGAUGE
+      #endif        
+      iteration_count = dev_cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);  
+      #ifdef TEMPORALGAUGE
       from_temporalgauge_mms(Qup, Qdn, Pup, Pdn, solver_pm->no_shifts);
-    #endif 
-  #endif
+      #endif 
+    #endif
+    } else {
+      iteration_count = mixed_cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);
+    }
+  } else if (solver_pm->type == CGMMSND){
+    iteration_count = cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);
+  } else if (solver_pm->type == MGMMSND){
+    matrix_mult_nd f = Qtm_pm_ndpsi_shift;
+    if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
+      f = Qsw_pm_ndpsi_shift;
+    if( mg_no_shifts > 0 && mg_no_shifts < solver_pm->no_shifts ) {
+      iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, mg_no_shifts,
+                                          solver_pm->squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
+                                          solver_pm->sdim, g_gauge_field, f );
+      solver_pm->no_shifts -= mg_no_shifts;
+      solver_pm->shifts += mg_no_shifts;
+      iteration_count += cg_mms_tm_nd( Pup+mg_no_shifts, Pdn+mg_no_shifts, Qup, Qdn, solver_pm );
+      // Restoring solver_pm
+      solver_pm->no_shifts += mg_no_shifts;
+      solver_pm->shifts -= mg_no_shifts;
+    } else
+      iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, solver_pm->no_shifts,
+                                          solver_pm->squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
+                                          solver_pm->sdim, g_gauge_field, f );
+  } else if (solver_pm->type == RGMIXEDCG){
+    matrix_mult_nd   f    = Qtm_pm_ndpsi_shift;
+    matrix_mult_nd32 f32  = Qtm_pm_ndpsi_shift_32;
+    if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ){ 
+      f    = Qsw_pm_ndpsi_shift;
+      f32  = Qsw_pm_ndpsi_shift_32;
+    }
+    iteration_count = 0;
+    // solver_params_t struct needs to be passed to all solvers except for cgmms, so we need to construct it here
+    // and set the one relevant parameter
+    solver_params_t temp_params;
+    temp_params.mcg_delta = _default_mixcg_innereps;
+    double iter_local = 0;
+    for(int i = 0; i < solver_pm->no_shifts; ++i){
+      g_shift = solver_pm->shifts[i]*solver_pm->shifts[i]; 
+      iter_local = rg_mixed_cg_her_nd( Pup[i], Pdn[i], Qup, Qdn, temp_params, solver_pm->max_iter,
+                                       solver_pm->mms_squared_solver_prec[i], solver_pm->rel_prec, solver_pm->sdim, f, f32);
+      g_shift = _default_g_shift;
+      if(iter_local == -1){
+        return(-1);
       } else {
-        iteration_count = mixed_cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);
-      }
-    } else if (solver_pm->type == CGMMSND){
-      iteration_count = cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);
-    } else if (solver_pm->type == MGMMSND){
-      matrix_mult_nd f = Qtm_pm_ndpsi_shift;
-      if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
-        f = Qsw_pm_ndpsi_shift;
-      if( mg_no_shifts > 0 && mg_no_shifts < solver_pm->no_shifts ) {
-        iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, mg_no_shifts,
-                                            solver_pm->squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
-                                            solver_pm->sdim, g_gauge_field, f );
-        solver_pm->no_shifts -= mg_no_shifts;
-        solver_pm->shifts += mg_no_shifts;
-        iteration_count += cg_mms_tm_nd( Pup+mg_no_shifts, Pdn+mg_no_shifts, Qup, Qdn, solver_pm );
-        // Restoring solver_pm
-        solver_pm->no_shifts += mg_no_shifts;
-        solver_pm->shifts -= mg_no_shifts;
-      } else
-        iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, solver_pm->no_shifts,
-                                            solver_pm->squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
-                                            solver_pm->sdim, g_gauge_field, f );
-    } else if (solver_pm->type == RGMIXEDCG){
-      matrix_mult_nd   f    = Qtm_pm_ndpsi_shift;
-      matrix_mult_nd32 f32  = Qtm_pm_ndpsi_shift_32;
-      if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ){ 
-        f    = Qsw_pm_ndpsi_shift;
-        f32  = Qsw_pm_ndpsi_shift_32;
+        iteration_count += iter_local;
       }
-      iteration_count = 0;
-      // solver_params_t struct needs to be passed to all solvers except for cgmms, so we need to construct it here
-      // and set the one relevant parameter
-      solver_params_t temp_params;
-      temp_params.mcg_delta = _default_mixcg_innereps;
-      double iter_local = 0;
-      for(int i = 0; i < solver_pm->no_shifts; ++i){
-        g_shift = solver_pm->shifts[i]*solver_pm->shifts[i]; 
-        iter_local = rg_mixed_cg_her_nd( Pup[i], Pdn[i], Qup, Qdn, temp_params, solver_pm->max_iter,
-					solver_pm->squared_solver_prec, solver_pm->rel_prec, solver_pm->sdim, f, f32);
-        g_shift = _default_g_shift;
-        if(iter_local == -1){
-          return(-1);
-        } else {
-          iteration_count += iter_local;
-        }
-      }
-    } else if (solver_pm->type == MG){
-      matrix_mult_nd f = Qtm_pm_ndpsi_shift;
-      if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
-        f = Qsw_pm_ndpsi_shift;
-      iteration_count = 0;
-      // solver_params_t struct needs to be passed to all solvers except for cgmms, so we need to construct it here
-      // and set the one relevant parameter
-      double iter_local = 0;
-      for(int i = 0; i < solver_pm->no_shifts; ++i){
-        g_shift = solver_pm->shifts[i]*solver_pm->shifts[i]; 
-        iter_local = MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_pm->squared_solver_prec, solver_pm->max_iter,
-				   solver_pm->rel_prec, solver_pm->sdim, g_gauge_field, f );
-        g_shift = _default_g_shift;
-        if(iter_local == -1){
-          return(-1);
-        } else {
-          iteration_count += iter_local;
-        }
+    }
+  } else if (solver_pm->type == MG){
+    matrix_mult_nd f = Qtm_pm_ndpsi_shift;
+    if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
+      f = Qsw_pm_ndpsi_shift;
+    iteration_count = 0;
+    // solver_params_t struct needs to be passed to all solvers except for cgmms, so we need to construct it here
+    // and set the one relevant parameter
+    double iter_local = 0;
+    for(int i = 0; i < solver_pm->no_shifts; ++i){
+      g_shift = solver_pm->shifts[i]*solver_pm->shifts[i]; 
+      iter_local = MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_pm->mms_squared_solver_prec[i], solver_pm->max_iter,
+                                 solver_pm->rel_prec, solver_pm->sdim, g_gauge_field, f );
+      g_shift = _default_g_shift;
+      if(iter_local == -1){
+        return(-1);
+      } else {
+        iteration_count += iter_local;
       }
-    } else {
-      if(g_proc_id==0) printf("Error: solver not allowed for ND mms solve. Aborting...\n");
-      exit(2);      
     }
+  } else {
+    if(g_proc_id==0) printf("Error: solver not allowed for ND mms solve. Aborting...\n");
+    exit(2);      
+  }
+
+  // freeing mms_squared_solver_prec if it has been allocated
+  if(mms_squared_solver_prec != NULL) {
+    free(mms_squared_solver_prec);
+    solver_pm->mms_squared_solver_prec = NULL;
+  }
   return(iteration_count);
 }
diff --git a/solver/solver.h b/solver/solver.h
index dcc6ddc3b..7e198f476 100644
--- a/solver/solver.h
+++ b/solver/solver.h
@@ -49,6 +49,8 @@ typedef struct {
   matrix_mult_nd32 M_ndpsi32;  
   // pointer to array of shifts
   double * shifts;
+  // squared desired residue for each shift in mms. If NULL use squared_solver_prec for all
+  double * mms_squared_solver_prec;
 } solver_pm_t;
 
 #include"solver/gmres.h"

From 3a1afd8b2753a1e042373394954486aaac8bccd6 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 16 Feb 2017 17:11:42 +0200
Subject: [PATCH 13/85] Using different tolerances for the shifts in the
 rational approximation; we scale them with the coefficient which sum the
 inverse.

---
 DDalphaAMG_interface.c       | 28 ++++++++++++++++++----------
 DDalphaAMG_interface.h       |  2 +-
 monomial/ndrat_monomial.c    | 17 ++++++++++++++++-
 monomial/ndratcor_monomial.c | 18 ++++++++++++++++--
 solver/monomial_solve.c      |  6 ++++--
 5 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 077bc7aef..489a93f7b 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -240,11 +240,11 @@ static inline int MG_check_nd( spinor * const up_new, spinor * const dn_new, spi
 static inline int MG_mms_check_nd( spinor **const up_new, spinor **const dn_new, 
                                    spinor * const up_old, spinor * const dn_old,
                                    const double * shifts, const int no_shifts, 
-                                   const int N, const double precision, matrix_mult_nd f) 
+                                   const int N, double * precision, matrix_mult_nd f) 
 {
   double differ[2], residual;
   spinor ** check_vect = NULL;
-  double acc_factor = 20;
+  double acc_factor = 2;
   
   init_solver_field(&check_vect, VOLUMEPLUSRAND,2);
 
@@ -260,13 +260,13 @@ static inline int MG_mms_check_nd( spinor **const up_new, spinor **const dn_new,
   
     residual = differ[0]/differ[1];
     
-    if( residual > precision && residual < acc_factor*precision ) {
+    if( residual > precision[i] && residual < acc_factor*precision[i] ) {
       if(g_proc_id == 0)
-        printf("WARNING: solution accepted even if the residual wasn't complitely acceptable (%e > %e) \n", residual, precision);
-    } else if( residual > acc_factor*precision ) {
+        printf("WARNING: solution accepted even if the residual wasn't complitely acceptable (%e > %e) \n", residual, precision[i]);
+    } else if( residual > acc_factor*precision[i] ) {
       if(g_proc_id == 0) {
         printf("ERROR: something bad happened... MG converged giving the wrong solution!! Trying to restart... \n");
-        printf("ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > %e \n", differ[0],differ[1],differ[0]/differ[1],precision);
+        printf("ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > %e \n", differ[0],differ[1],differ[0]/differ[1],precision[i]);
       }
       finalize_solver(check_vect, 2);
       return 0;
@@ -618,7 +618,7 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
 static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new, 
                             spinor * const up_old, spinor * const dn_old,
                             const double * shifts, const int no_shifts,
-                            const double precision, const int N, matrix_mult_nd f)
+                            double * precision, const int N, matrix_mult_nd f)
 {
   
   // for rescaling  convention in DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> rescale by 1/4+m
@@ -1088,13 +1088,21 @@ int MG_solver_nd_eo(spinor * const Even_new_up, spinor * const Odd_new_up,
 int MG_mms_solver_nd(spinor **const up_new, spinor **const dn_new,
                      spinor * const up_old, spinor * const dn_old,
                      const double * shifts, const int no_shifts,
-                     const double precision, const int max_iter, const int rel_prec,
+                     const double * precision, const int max_iter, const int rel_prec,
                      const int N, su3 **gf, matrix_mult_nd f)
 {
   
   int success=0;
-  double mg_prec = rel_prec?sqrt(precision):sqrt(precision/(square_norm(up_old, N, 1)+square_norm(dn_old, N, 1)));
-  
+  double mg_prec[no_shifts];
+  if(rel_prec) {
+    for(int i=0; i<no_shifts; i++)
+      mg_prec[i] = sqrt(precision[i]);
+  } else {
+    double nrhs = square_norm(up_old, N, 1)+square_norm(dn_old, N, 1);
+    for(int i=0; i<no_shifts; i++)
+      mg_prec[i] = sqrt(precision[i]/nrhs);
+  }  
+
   MG_pre_solve(gf);
 
   success = MG_mms_solve_nd( up_new, dn_new, up_old, dn_old, shifts, no_shifts, mg_prec, N, f );
diff --git a/DDalphaAMG_interface.h b/DDalphaAMG_interface.h
index 4e831f974..a3b6bcb0e 100644
--- a/DDalphaAMG_interface.h
+++ b/DDalphaAMG_interface.h
@@ -74,7 +74,7 @@ int MG_solver_nd_eo(spinor * const Even_new_up, spinor * const Odd_new_up,
 int MG_mms_solver_nd(spinor **const up_new, spinor **const dn_new,
                      spinor * const up_old, spinor * const dn_old,
                      const double * shifts, const int no_shifts,
-                     const double precision, const int max_iter, const int rel_prec,
+                     const double * precision, const int max_iter, const int rel_prec,
                      const int N, su3 **gf, matrix_mult_nd f);
 
 #endif /* DDalphaAMG_INTERFACE_H_ */
diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c
index 9b95a46ab..06aa3ac4a 100644
--- a/monomial/ndrat_monomial.c
+++ b/monomial/ndrat_monomial.c
@@ -234,6 +234,9 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
+  solver_pm.mms_squared_solver_prec = (double*) malloc(solver_pm.no_shifts*sizeof(double));
+  for(int i=0; i<solver_pm.no_shifts; i++)
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/mnl->rat.rnu[i]/mnl->rat.rnu[i];
 
 #ifdef DDalphaAMG
   if( mnl->solver == MGMMSND ){
@@ -243,7 +246,7 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) {
       solver_pm.M_ndpsi = &Qsw_tau1_ndpsi_add_Ishift;
     
     mnl->iter0 = MG_mms_solver_nd( g_chi_up_spinor_field, g_chi_dn_spinor_field, mnl->pf, mnl->pf2, 
-                                   solver_pm.shifts, solver_pm.no_shifts,solver_pm.squared_solver_prec, 
+                                   solver_pm.shifts, solver_pm.no_shifts,solver_pm.mms_squared_solver_prec, 
                                    solver_pm.max_iter, solver_pm.rel_prec, solver_pm.sdim, g_gauge_field, 
                                    solver_pm.M_ndpsi );
 
@@ -282,6 +285,10 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) {
       assign_add_mul(mnl->pf2, g_chi_dn_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
     }
   }
+
+  free(solver_pm.mms_squared_solver_prec);
+  solver_pm.mms_squared_solver_prec = NULL;
+
   etime = gettime();
   if(g_proc_id == 0) {
     if(g_debug_level > 1) {
@@ -322,6 +329,10 @@ double ndrat_acc(const int id, hamiltonian_field_t * const hf) {
   }
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
+  solver_pm.mms_squared_solver_prec = (double*) malloc(solver_pm.no_shifts*sizeof(double));
+  for(int i=0; i<solver_pm.no_shifts; i++)
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/mnl->rat.rmu[i]/mnl->rat.rmu[i];
+
   mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
                              mnl->pf, mnl->pf2,&solver_pm);
 
@@ -337,6 +348,10 @@ double ndrat_acc(const int id, hamiltonian_field_t * const hf) {
 
   mnl->energy1 = scalar_prod_r(mnl->pf, mnl->w_fields[0], VOLUME/2, 1);
   mnl->energy1 += scalar_prod_r(mnl->pf2, mnl->w_fields[1], VOLUME/2, 1);
+
+  free(solver_pm.mms_squared_solver_prec);
+  solver_pm.mms_squared_solver_prec = NULL;
+
   etime = gettime();
   if(g_proc_id == 0) {
     if(g_debug_level > 1) {
diff --git a/monomial/ndratcor_monomial.c b/monomial/ndratcor_monomial.c
index db2e2b0ef..4a0fa6dfd 100644
--- a/monomial/ndratcor_monomial.c
+++ b/monomial/ndratcor_monomial.c
@@ -107,6 +107,10 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
+  // since each shift will be multiplied by mnl->rat.rmu, we scale the tolerance with it.
+  solver_pm.mms_squared_solver_prec = (double*) malloc(solver_pm.no_shifts*sizeof(double));
+  for(int i=0; i<solver_pm.no_shifts; i++)
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/mnl->rat.rmu[i]/mnl->rat.rmu[i];
 
   // apply B to the random field to generate pseudo-fermion fields
   up0 = mnl->w_fields[0]; dn0 = mnl->w_fields[1];
@@ -161,6 +165,10 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
       up1 = tup; dn1 = tdn;
     }
   }
+
+  free(solver_pm.mms_squared_solver_prec);
+  solver_pm.mms_squared_solver_prec = NULL;
+
   etime = gettime();
   if(g_proc_id == 0) {
     if(g_debug_level > 1) {
@@ -203,6 +211,10 @@ double ndratcor_acc(const int id, hamiltonian_field_t * const hf) {
   }
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
+  // since each shift will be multiplied by mnl->rat.rmu, we scale the tolerance with it.
+  solver_pm.mms_squared_solver_prec = (double*) malloc(solver_pm.no_shifts*sizeof(double));
+  for(int i=0; i<solver_pm.no_shifts; i++)
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/mnl->rat.rmu[i]/mnl->rat.rmu[i];
 
   // apply (Q R)^(-1) to pseudo-fermion fields
   up0 = mnl->w_fields[0]; dn0 = mnl->w_fields[1];
@@ -235,6 +247,8 @@ double ndratcor_acc(const int id, hamiltonian_field_t * const hf) {
     up1 = tup; dn1 = tdn;
   }
 
+  free(solver_pm.mms_squared_solver_prec);
+  solver_pm.mms_squared_solver_prec = NULL;
 
   etime = gettime();
   if(g_proc_id == 0) {
@@ -255,10 +269,9 @@ void apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
                    solver_pm_t * solver_pm) {
   monomial * mnl = &monomial_list[id];
 
+  // apply R to the pseudo-fermion fields
   mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
 			                       l_up, l_dn, solver_pm);  
-  
-  // apply R to the pseudo-fermion fields
   assign(k_up, l_up, VOLUME/2);
   assign(k_dn, l_dn, VOLUME/2);
   for(int j = (mnl->rat.np-1); j > -1; j--) {
@@ -278,6 +291,7 @@ void apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
     assign_add_mul_r(k_dn, g_chi_dn_spinor_field[j], 
 		     mnl->rat.rmu[j], VOLUME/2);
   }
+
   mul_r(g_chi_up_spinor_field[mnl->rat.np], mnl->rat.A*mnl->rat.A, 
 	k_up, VOLUME/2);
   mul_r(g_chi_dn_spinor_field[mnl->rat.np], mnl->rat.A*mnl->rat.A, 
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index cf1d6b80a..96e65958b 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -170,17 +170,19 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
       f = Qsw_pm_ndpsi_shift;
     if( mg_no_shifts > 0 && mg_no_shifts < solver_pm->no_shifts ) {
       iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, mg_no_shifts,
-                                          solver_pm->squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
+                                          solver_pm->mms_squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
                                           solver_pm->sdim, g_gauge_field, f );
       solver_pm->no_shifts -= mg_no_shifts;
       solver_pm->shifts += mg_no_shifts;
+      solver_pm->mms_squared_solver_prec += mg_no_shifts;
       iteration_count += cg_mms_tm_nd( Pup+mg_no_shifts, Pdn+mg_no_shifts, Qup, Qdn, solver_pm );
       // Restoring solver_pm
       solver_pm->no_shifts += mg_no_shifts;
       solver_pm->shifts -= mg_no_shifts;
+      solver_pm->mms_squared_solver_prec -= mg_no_shifts;
     } else
       iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, solver_pm->no_shifts,
-                                          solver_pm->squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
+                                          solver_pm->mms_squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
                                           solver_pm->sdim, g_gauge_field, f );
   } else if (solver_pm->type == RGMIXEDCG){
     matrix_mult_nd   f    = Qtm_pm_ndpsi_shift;

From dfc978939b218bc6c36e0b1a040131a05bfae0c6 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 16 May 2017 09:34:29 +0300
Subject: [PATCH 14/85] Add flag SPERIMENTAL around new changes in rat monomial

---
 monomial/ndrat_monomial.c    | 32 ++++++++++++++++++++++++++++----
 monomial/ndratcor_monomial.c | 22 ++++++++++++++++------
 2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c
index 06aa3ac4a..7c59ead16 100644
--- a/monomial/ndrat_monomial.c
+++ b/monomial/ndrat_monomial.c
@@ -111,9 +111,21 @@ void ndrat_derivative(const int id, hamiltonian_field_t * const hf) {
     solver_pm.M_ndpsi32 = &Qsw_pm_ndpsi_32;
   }
   solver_pm.sdim = VOLUME/2;
+  solver_pm.mms_squared_solver_prec = (double*) malloc(solver_pm.no_shifts*sizeof(double));
+  for(int i=0; i<solver_pm.no_shifts; i++) {
+#ifdef SPERIMENTAL
+    // since each shift will be multiplied by solver_pm.shifts, we scale the tolerance with it.
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/solver_pm.shifts[i]/solver_pm.shifts[i];
+#else
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec;
+#endif
+  }
   // this generates all X_j,o (odd sites only) -> g_chi_up|dn_spinor_field
   mnl->iter1 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
                    		      mnl->pf, mnl->pf2,&solver_pm);
+
+  free(solver_pm.mms_squared_solver_prec);
+  solver_pm.mms_squared_solver_prec = NULL;
   
   for(int j = (mnl->rat.np-1); j > -1; j--) {
     if(mnl->type == NDCLOVERRAT) {
@@ -235,8 +247,14 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) {
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
   solver_pm.mms_squared_solver_prec = (double*) malloc(solver_pm.no_shifts*sizeof(double));
-  for(int i=0; i<solver_pm.no_shifts; i++)
-    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/mnl->rat.rnu[i]/mnl->rat.rnu[i];
+  for(int i=0; i<solver_pm.no_shifts; i++) {
+#ifdef SPERIMENTAL
+    // since each shift will be multiplied by solver_pm.shifts, we scale the tolerance with it.
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/solver_pm.shifts[i]/solver_pm.shifts[i];
+#else
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec;
+#endif
+  }
 
 #ifdef DDalphaAMG
   if( mnl->solver == MGMMSND ){
@@ -330,8 +348,14 @@ double ndrat_acc(const int id, hamiltonian_field_t * const hf) {
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
   solver_pm.mms_squared_solver_prec = (double*) malloc(solver_pm.no_shifts*sizeof(double));
-  for(int i=0; i<solver_pm.no_shifts; i++)
-    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/mnl->rat.rmu[i]/mnl->rat.rmu[i];
+  for(int i=0; i<solver_pm.no_shifts; i++) {
+#ifdef SPERIMENTAL
+    // since each shift will be multiplied by solver_pm.shifts, we scale the tolerance with it.
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/solver_pm.shifts[i]/solver_pm.shifts[i];
+#else
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec;
+#endif
+  }
 
   mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
                              mnl->pf, mnl->pf2,&solver_pm);
diff --git a/monomial/ndratcor_monomial.c b/monomial/ndratcor_monomial.c
index 4a0fa6dfd..cfd4f7851 100644
--- a/monomial/ndratcor_monomial.c
+++ b/monomial/ndratcor_monomial.c
@@ -107,10 +107,15 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
-  // since each shift will be multiplied by mnl->rat.rmu, we scale the tolerance with it.
   solver_pm.mms_squared_solver_prec = (double*) malloc(solver_pm.no_shifts*sizeof(double));
-  for(int i=0; i<solver_pm.no_shifts; i++)
-    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/mnl->rat.rmu[i]/mnl->rat.rmu[i];
+  for(int i=0; i<solver_pm.no_shifts; i++) {
+#ifdef SPERIMENTAL
+    // since each shift will be multiplied by mnl->rat.rmu, we scale the tolerance with it.
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/solver_pm.shifts[i]/solver_pm.shifts[i];
+#else
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec;
+#endif
+  }
 
   // apply B to the random field to generate pseudo-fermion fields
   up0 = mnl->w_fields[0]; dn0 = mnl->w_fields[1];
@@ -211,10 +216,15 @@ double ndratcor_acc(const int id, hamiltonian_field_t * const hf) {
   }
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
-  // since each shift will be multiplied by mnl->rat.rmu, we scale the tolerance with it.
   solver_pm.mms_squared_solver_prec = (double*) malloc(solver_pm.no_shifts*sizeof(double));
-  for(int i=0; i<solver_pm.no_shifts; i++)
-    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/mnl->rat.rmu[i]/mnl->rat.rmu[i];
+  for(int i=0; i<solver_pm.no_shifts; i++) {
+#ifdef SPERIMENTAL
+    // since each shift will be multiplied by solver_pm.shifts, we scale the tolerance with it.
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec/solver_pm.shifts[i]/solver_pm.shifts[i];
+#else
+    solver_pm.mms_squared_solver_prec[i] = solver_pm.squared_solver_prec;
+#endif
+  }
 
   // apply (Q R)^(-1) to pseudo-fermion fields
   up0 = mnl->w_fields[0]; dn0 = mnl->w_fields[1];

From 8434a00a4d7e19843c72bff41035eb9d2714e675 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 29 Aug 2017 14:06:42 +0300
Subject: [PATCH 15/85] removing spurious changings

---
 hmc_tm.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/hmc_tm.c b/hmc_tm.c
index ace960cea..914e36d61 100644
--- a/hmc_tm.c
+++ b/hmc_tm.c
@@ -524,11 +524,11 @@ int main(int argc,char *argv[]) {
     // When the configuration is rejected, we have to update it in the MG and redo the setup.
     int mg_update = accept ? 0:1;
 #endif
-     for(imeas = 0; imeas < no_measurements; imeas++){
-       meas = &measurement_list[imeas];
-       if(trajectory_counter%meas->freq == 0){
-         if (g_proc_id == 0) {
-           fprintf(stdout, "#\n# Beginning online measurement.\n");
+    for(imeas = 0; imeas < no_measurements; imeas++){
+      meas = &measurement_list[imeas];
+      if(trajectory_counter%meas->freq == 0){
+        if (g_proc_id == 0) {
+          fprintf(stdout, "#\n# Beginning online measurement.\n");
         }
 #ifdef DDalphaAMG
         if( mg_update ) {
@@ -536,9 +536,9 @@ int main(int argc,char *argv[]) {
           MG_reset();
         }
 #endif
-         meas->measurefunc(trajectory_counter, imeas, even_odd_flag);
-       }
-     }
+        meas->measurefunc(trajectory_counter, imeas, even_odd_flag);
+      }
+    }
 
     if(g_proc_id == 0) {
       verbose = 1;

From 5f01c73f0801d32d1e942783f4b601b7ae64f266 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Mon, 4 Sep 2017 13:01:12 +0300
Subject: [PATCH 16/85] Bug fix: cg_mms wasn't using relative residual for the
 shifts.

---
 solver/cg_mms_tm_nd.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/solver/cg_mms_tm_nd.c b/solver/cg_mms_tm_nd.c
index 9da378692..327d2feef 100644
--- a/solver/cg_mms_tm_nd.c
+++ b/solver/cg_mms_tm_nd.c
@@ -155,10 +155,12 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
       // falls below a threshold
       // this is useful for computing time and needed, because otherwise
       // zita might get smaller than DOUBLE_EPS and, hence, zero
-      if(iteration > 0 && (iteration % 20 == 0) && (im == shifts-1)) {
+      if(iteration > 0 && (iteration % 10 == 0) && (im == shifts-1)) {
 	double sn = square_norm(ps_mms_solver[2*im], N, 1);
 	sn += square_norm(ps_mms_solver[2*im+1], N, 1);
-	if(alphas[shifts-1]*alphas[shifts-1]*sn <= solver_pm->squared_solver_prec) {
+        err = alphas[shifts-1]*alphas[shifts-1]*sn;
+	if(((err <= solver_pm->squared_solver_prec) && (solver_pm->rel_prec == 0)) ||
+           ((err <= solver_pm->squared_solver_prec*squarenorm) && (solver_pm->rel_prec > 0))) {
 	  shifts--;
 	  if(g_debug_level > 2 && g_proc_id == 0) {
 	    printf("# CGMMSND: at iteration %d removed one shift, %d remaining\n", iteration, shifts);

From fd0ca8cde297603ea568baba06400d5b5391e98a Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Mon, 4 Sep 2017 14:51:36 +0300
Subject: [PATCH 17/85] Bug fix in the scaling of the operator in the rational
 approximation

---
 monomial/ndrat_monomial.c | 48 ++++++++++++++++++++++-----------------
 phmc.c                    |  4 ++--
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c
index 81cc33b27..6d5654c1f 100644
--- a/monomial/ndrat_monomial.c
+++ b/monomial/ndrat_monomial.c
@@ -57,7 +57,7 @@ void nd_set_global_parameter(monomial * const mnl) {
   boundary(g_kappa);
   phmc_cheb_evmin = mnl->EVMin;
   phmc_invmaxev = mnl->EVMaxInv;
-  phmc_cheb_evmax = 1.;
+  phmc_cheb_evmax = mnl->EVMax;
   phmc_Cpol = 1.;
   // used for preconditioning in cloverdetrat
   g_mu3 = 0.;
@@ -325,30 +325,36 @@ double ndrat_acc(const int id, hamiltonian_field_t * const hf) {
 
 int init_ndrat_monomial(const int id) {
   monomial * mnl = &monomial_list[id];  
-
-  mnl->EVMin = mnl->StildeMin / mnl->StildeMax;
-  mnl->EVMax = 1.;
-  mnl->EVMaxInv = 1./(sqrt(mnl->StildeMax));
+  int scale = 0;
 
   if(mnl->type == RAT || mnl->type == CLOVERRAT ||
-     mnl->type == RATCOR || mnl->type == CLOVERRATCOR) {
-    init_rational(&mnl->rat, 1);
-
-    if(init_chi_spinor_field(VOLUMEPLUSRAND/2, (mnl->rat.np+2)/2) != 0) {
-      fprintf(stderr, "Not enough memory for Chi fields! Aborting...\n");
-      exit(0);
-    }
-  }
-  else {
-    init_rational(&mnl->rat, 0);
+     mnl->type == RATCOR || mnl->type == CLOVERRATCOR) 
+    scale = 1;
+
+  if(scale) {
+    // When scale = 1 
+    //   the rational approximation is done for the standard operator 
+    //   which have eigenvalues between EVMin and EVMax.  Indeed the 
+    //   parameters of the rational approximation are scaled. Thus 
+    //   additional scaling of the operator (EVMaxInv) is not required.
+    mnl->EVMin = mnl->StildeMin;
+    mnl->EVMax = mnl->StildeMax;
+    mnl->EVMaxInv = 1.;
+  } else {
+    // When scale = 0 
+    //   the rational approximation is done for the normalized operator 
+    //   which have eigenvalues between EVMin/EVMax and 1. Thus the 
+    //   operator need to be scaled by EVMaxInv=1/EVMax.
     mnl->EVMin = mnl->StildeMin / mnl->StildeMax;
     mnl->EVMax = 1.;
-    mnl->EVMaxInv = 1./(sqrt(mnl->StildeMax));
-    
-    if(init_chi_spinor_field(VOLUMEPLUSRAND/2, (mnl->rat.np+1)) != 0) {
-      fprintf(stderr, "Not enough memory for Chi fields! Aborting...\n");
-      exit(0);
-    }
+    mnl->EVMaxInv = 1./sqrt(mnl->StildeMax);
+  }
+
+  init_rational(&mnl->rat, scale);
+
+  if(init_chi_spinor_field(VOLUMEPLUSRAND/2, (mnl->rat.np+2)/2) != 0) {
+    fprintf(stderr, "Not enough memory for Chi fields! Aborting...\n");
+    exit(0);
   }
 
   return(0);
diff --git a/phmc.c b/phmc.c
index d3a46d691..880c9f11c 100644
--- a/phmc.c
+++ b/phmc.c
@@ -235,7 +235,7 @@ void phmc_compute_ev(const int trajectory_counter,
 	   mnl->name, trajectory_counter, temp2);
   }
   if(g_proc_id == 0) {
-    if(temp2 > 1.) {
+    if(temp2 > mnl->EVMax) {
       fprintf(stderr, "\nWarning: largest eigenvalue for monomial %s larger than upper bound!\n\n", mnl->name);
     }
     if(temp < mnl->EVMin) {
@@ -243,7 +243,7 @@ void phmc_compute_ev(const int trajectory_counter,
     }
     countfile = fopen(phmcfilename, "a");
     fprintf(countfile, "%.8d %1.5e %1.5e %1.5e %1.5e\n", 
-	    trajectory_counter, temp, temp2, mnl->EVMin, 1.);
+	    trajectory_counter, temp, temp2, mnl->EVMin, mnl->EVMax);
     fclose(countfile);
   }
   etime = gettime();

From af9e3a5922f74d616ccf3270b7e017e460a3460b Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 5 Sep 2017 15:42:54 +0300
Subject: [PATCH 18/85] Enable initial guess in rg_mixed_cg_her_nd

---
 solver/rg_mixed_cg_her_nd.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/solver/rg_mixed_cg_her_nd.c b/solver/rg_mixed_cg_her_nd.c
index de5643f74..b39c5d406 100644
--- a/solver/rg_mixed_cg_her_nd.c
+++ b/solver/rg_mixed_cg_her_nd.c
@@ -28,8 +28,8 @@
  *
  * in:
  *   Q: source
- * inout:
- *   P: result (initial guess currently not supported)
+ * input:
+ *   P: result
  *
  * POSSIBLE IMPROVEMENTS
  * There are still quite a few things that can be tried to make it better,
@@ -193,7 +193,7 @@ int rg_mixed_cg_her_nd(spinor * const P_up, spinor * const P_dn, spinor * const
   int iter_in_sp = 0, iter_in_dp = 0, iter_out = 0;
   float rho_sp, delta = solver_params.mcg_delta;
   double beta_dp, rho_dp;
-  double sourcesquarenorm, target_eps_sq;
+  double sourcesquarenorm, guesssquarenorm, target_eps_sq;
 
   spinor *xhigh_up, *xhigh_dn, *rhigh_up, *rhigh_dn, *qhigh_up, *qhigh_dn, *phigh_up, *phigh_dn;
   spinor32 *x_up, *x_dn, *p_up, *p_dn, *q_up, *q_dn, *r_up, *r_dn;
@@ -248,17 +248,27 @@ int rg_mixed_cg_her_nd(spinor * const P_up, spinor * const P_dn, spinor * const
   if(g_debug_level > 0 && g_proc_id==0) 
     printf("#RG_Mixed CG_ND: N_outer: %d \n", N_outer);
   
-  // should compute real residual here, for now we always use a zero guess
   zero_spinor_field_32(x_up,N); zero_spinor_field_32(x_dn,N);
-  zero_spinor_field(P_up,N); zero_spinor_field(P_dn,N);
-  assign(phigh_up,Q_up,N); assign(phigh_dn,Q_dn,N);
-  assign(rhigh_up,Q_up,N); assign(rhigh_dn,Q_dn,N);
-  
-  rho_dp = ( square_norm(rhigh_up,N,1) + square_norm(rhigh_dn,N,1) );
+
+  guesssquarenorm = square_norm(P_up, N, 1);
+  guesssquarenorm += square_norm(P_dn, N, 1);
+
+  if(guesssquarenorm == 0) {
+    assign(phigh_up,Q_up,N); assign(phigh_dn,Q_dn,N);
+    assign(rhigh_up,Q_up,N); assign(rhigh_dn,Q_dn,N);
+    rho_dp = sourcesquarenorm;
+  } else {
+    // computing initial guess
+    f(rhigh_up,rhigh_dn,P_up,P_dn);
+    diff(rhigh_up,Q_up,rhigh_up,N); diff(rhigh_dn,Q_dn,rhigh_dn,N);
+    assign(phigh_up,rhigh_up,N); assign(phigh_dn,rhigh_dn,N);
+    rho_dp = ( square_norm(rhigh_up,N,1) + square_norm(rhigh_dn,N,1) );
+  }
+
   assign_to_32(r_up,rhigh_up,N); assign_to_32(r_dn,rhigh_dn,N);
   rho_sp = rho_dp;
-  assign_32(p_up,r_up,N); assign_32(p_dn,r_dn,N);
-  
+  assign_32(p_up,r_up,N); assign_32(p_dn,r_dn,N); 
+
   iter_in_sp += inner_loop(x_up, x_dn, p_up, p_dn, q_up, q_dn, r_up, r_dn, &rho_sp, delta, 
                            f32, (float)target_eps_sq, 
                            N, iter_out+iter_in_sp+iter_in_dp, max_iter, 0.0, 0.0, MCG_NO_PIPELINED, MCG_NO_PR);

From 78dcd4654442801b83dae62711cde00ccb798c30 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 6 Sep 2017 10:48:56 +0300
Subject: [PATCH 19/85] Rename parameter from mg_no_shift to mg_mms_mass.

---
 DDalphaAMG_interface.c  |  3 ++-
 DDalphaAMG_interface.h  |  2 +-
 read_input.h            |  2 +-
 read_input.l            |  8 ++++----
 solver/monomial_solve.c | 44 ++++++++++++++++++++++++-----------------
 5 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 489a93f7b..53b564bab 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -34,6 +34,7 @@ int mg_lvl;
 int mg_blk[4];
 int mg_mixed_prec;
 int mg_setup_mu_set;
+double mg_mms_mass = 0;
 double mg_setup_mu;
 double mg_cmu_factor;
 double mg_dtau_update;
@@ -118,7 +119,7 @@ int mg_lvl=3;
 int mg_blk[4] = {0, 0, 0, 0};
 int mg_mixed_prec=0;
 int mg_setup_mu_set = 0; //flag that enable the use of mg_setup_mu in the setup phase
-int mg_no_shifts = -1;
+double mg_mms_mass = 0.1; // mass shift value for switching from MMS-CG to MG. MMS-CG is used for larger masses than the value.
 double mg_setup_mu = 0.; 
 double mg_cmu_factor = 1.0;
 double mg_dtau_update = 0.0;
diff --git a/DDalphaAMG_interface.h b/DDalphaAMG_interface.h
index a3b6bcb0e..a19560994 100644
--- a/DDalphaAMG_interface.h
+++ b/DDalphaAMG_interface.h
@@ -37,7 +37,7 @@ extern int mg_lvl;
 extern int mg_blk[4];
 extern int mg_mixed_prec;
 extern int mg_setup_mu_set;
-extern int mg_no_shifts; // Number of shifts to solve with MG in solve_mms_nd
+extern double mg_mms_mass;
 extern double mg_setup_mu;
 extern double mg_cmu_factor;
 extern double mg_dtau_update;
diff --git a/read_input.h b/read_input.h
index 54c3e7980..2cea6a321 100644
--- a/read_input.h
+++ b/read_input.h
@@ -131,7 +131,7 @@ extern "C"
   extern int mg_blk[4];
   extern int mg_mixed_prec;
   extern int mg_setup_mu_set;
-  extern int mg_no_shifts;
+  extern double mg_mms_mass;
   extern double mg_setup_mu;
   extern double mg_cmu_factor;
   extern double mg_dtau_update;
diff --git a/read_input.l b/read_input.l
index c07842cfd..ed35efd69 100644
--- a/read_input.l
+++ b/read_input.l
@@ -721,10 +721,10 @@ static inline void rmQuotes(char *str){
     mg_omp_num_threads=a;
     if(myverbose) printf("  MG_omp_num_threads set to %d line %d operator %d\n", mg_omp_num_threads, line_of_file, current_operator);
   }
-  {SPC}*MGNumberOfShifts{EQL}{DIGIT}+ {
-    sscanf(yytext, " %[a-zA-Z] = %d", name, &a);
-    mg_no_shifts=a;
-    if(myverbose) printf("  MGNumberOfShifts set to %d line %d operator %d\n", mg_no_shifts, line_of_file, current_operator);
+  {SPC}*MGMMSMass{EQL}{DIGIT}+ {
+    sscanf(yytext, " %[a-zA-Z] = %lf", name, &c);
+    mg_mms_mass=c;
+    if(myverbose) printf("  MGMMSMass set to %f line %d operator %d\n", mg_mms_mass, line_of_file, current_operator);
   }
   EndDDalphaAMG{SPC}* {
   if(myverbose) printf("DDalphaAMG parsed in line %d\n\n", line_of_file);
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 96e65958b..1b16f20b6 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -164,26 +164,34 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     }
   } else if (solver_pm->type == CGMMSND){
     iteration_count = cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);
-  } else if (solver_pm->type == MGMMSND){
-    matrix_mult_nd f = Qtm_pm_ndpsi_shift;
-    if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
-      f = Qsw_pm_ndpsi_shift;
-    if( mg_no_shifts > 0 && mg_no_shifts < solver_pm->no_shifts ) {
+  } else if (solver_pm->type == MGMMSND) {
+    // if the mg_mms_mass is larger than the smallest shift we use MG
+    if (mg_mms_mass >= solver_pm->shifts[0]) { 
+
+      // if the mg_mms_mass is smaller than the larger shifts, we use CGMMS for those
+      int no_shifts = solver_pm->no_shifts;
+      int mg_no_shifts = solver_pm->no_shifts;
+      while (mg_mms_mass < solver_pm->shifts[mg_no_shifts-1]) { mg_no_shifts--; }
+      if (mg_no_shifts < no_shifts) {
+        solver_pm->no_shifts = no_shifts - mg_no_shifts;
+        solver_pm->shifts += mg_no_shifts;
+        solver_pm->mms_squared_solver_prec += mg_no_shifts;
+        iteration_count = cg_mms_tm_nd( Pup+mg_no_shifts, Pdn+mg_no_shifts, Qup, Qdn, solver_pm );
+        // Restoring solver_pm
+        solver_pm->no_shifts = no_shifts;
+        solver_pm->shifts -= mg_no_shifts;
+        solver_pm->mms_squared_solver_prec -= mg_no_shifts;
+      }
+            
+      matrix_mult_nd f = Qtm_pm_ndpsi_shift;
+      if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
+        f = Qsw_pm_ndpsi_shift;
       iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, mg_no_shifts,
                                           solver_pm->mms_squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
                                           solver_pm->sdim, g_gauge_field, f );
-      solver_pm->no_shifts -= mg_no_shifts;
-      solver_pm->shifts += mg_no_shifts;
-      solver_pm->mms_squared_solver_prec += mg_no_shifts;
-      iteration_count += cg_mms_tm_nd( Pup+mg_no_shifts, Pdn+mg_no_shifts, Qup, Qdn, solver_pm );
-      // Restoring solver_pm
-      solver_pm->no_shifts += mg_no_shifts;
-      solver_pm->shifts -= mg_no_shifts;
-      solver_pm->mms_squared_solver_prec -= mg_no_shifts;
-    } else
-      iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, solver_pm->no_shifts,
-                                          solver_pm->mms_squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
-                                          solver_pm->sdim, g_gauge_field, f );
+    } else {
+      iteration_count = cg_mms_tm_nd( Pup, Pdn, Qup, Qdn, solver_pm );
+    }
   } else if (solver_pm->type == RGMIXEDCG){
     matrix_mult_nd   f    = Qtm_pm_ndpsi_shift;
     matrix_mult_nd32 f32  = Qtm_pm_ndpsi_shift_32;
@@ -208,7 +216,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
         iteration_count += iter_local;
       }
     }
-  } else if (solver_pm->type == MG){
+  } else if (solver_pm->type == MG) {
     matrix_mult_nd f = Qtm_pm_ndpsi_shift;
     if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
       f = Qsw_pm_ndpsi_shift;

From c8d3e3d80ad8846691305fc44669659d2d4547e7 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 6 Sep 2017 11:14:08 +0300
Subject: [PATCH 20/85] Using initial guess for rgmixedCG in mms solver

---
 solver/monomial_solve.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 1b16f20b6..57a1f755e 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -51,6 +51,7 @@
 #include "operator/clovertm_operators.h"
 #include "operator/clovertm_operators_32.h"
 #include "monomial_solve.h"
+#include "linalg_eo.h"
 #ifdef DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
@@ -205,7 +206,31 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     solver_params_t temp_params;
     temp_params.mcg_delta = _default_mixcg_innereps;
     double iter_local = 0;
-    for(int i = 0; i < solver_pm->no_shifts; ++i){
+    for(int i = solver_pm->no_shifts-1; i>=0; i--){
+      // preparing initial guess                                                                                                                                                                       
+      if(i==solver_pm->no_shifts-1) {
+        zero_spinor_field(Pup[i], solver_pm->sdim);
+        zero_spinor_field(Pdn[i], solver_pm->sdim);
+      } else {
+        double coeff;
+        for( int j = solver_pm->no_shifts-1; j > i; j-- ) {
+          coeff = 1;
+          for( int k = solver_pm->no_shifts-1; k > i; k-- ) {
+            if(j!=k)
+              coeff *= (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[i]*solver_pm->shifts[i])/
+                (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[j]*solver_pm->shifts[j]);
+          }
+          if(j==solver_pm->no_shifts-1) {
+            mul(Pup[i], coeff, Pup[j], solver_pm->sdim);
+            mul(Pdn[i], coeff, Pdn[j], solver_pm->sdim);
+          } else {
+            assign_add_mul(Pup[i], Pup[j], coeff, solver_pm->sdim);
+            assign_add_mul(Pdn[i], Pdn[j], coeff, solver_pm->sdim);
+          }
+        }
+      }
+      
+      // inverting
       g_shift = solver_pm->shifts[i]*solver_pm->shifts[i]; 
       iter_local = rg_mixed_cg_her_nd( Pup[i], Pdn[i], Qup, Qdn, temp_params, solver_pm->max_iter,
                                        solver_pm->mms_squared_solver_prec[i], solver_pm->rel_prec, solver_pm->sdim, f, f32);

From 5188b6d695bfa80397efbdb3f55c122b166907dd Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 7 Sep 2017 14:21:15 +0300
Subject: [PATCH 21/85] Add convert_even_to_lexic functions

---
 linalg/Makefile.in             |   2 +-
 linalg/convert_even_to_lexic.c | 108 +++++++++++++++++++++++++++++++++
 linalg/convert_even_to_lexic.h |  26 ++++++++
 linalg_eo.h                    |   2 +-
 4 files changed, 136 insertions(+), 2 deletions(-)
 create mode 100644 linalg/convert_even_to_lexic.c
 create mode 100644 linalg/convert_even_to_lexic.h

diff --git a/linalg/Makefile.in b/linalg/Makefile.in
index 24e9f1e8a..3b15f8f1d 100644
--- a/linalg/Makefile.in
+++ b/linalg/Makefile.in
@@ -46,7 +46,7 @@ liblinalg_TARGETS = assign_add_mul_r_add_mul \
 	assign_mul_add_r_and_square \
 	addto_32 scalar_prod_r_32 assign_mul_add_r_32 assign_add_mul_r_32 \
 	square_norm_32 assign_to_32 diff_32 \
-	convert_odd_to_lexic set_even_to_zero mul_gamma5
+	convert_odd_to_lexic convert_even_to_lexic set_even_to_zero mul_gamma5
 
 liblinalg_STARGETS = diff assign_add_mul_r assign_mul_add_r square_norm
 
diff --git a/linalg/convert_even_to_lexic.c b/linalg/convert_even_to_lexic.c
new file mode 100644
index 000000000..6c56748d5
--- /dev/null
+++ b/linalg/convert_even_to_lexic.c
@@ -0,0 +1,108 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#ifdef MPI
+# include <mpi.h>
+#endif
+#ifdef OMP
+# include <omp.h>
+#endif
+#include "global.h"
+#include "su3.h"
+#include "convert_even_to_lexic.h"
+
+void convert_even_to_lexic(spinor * const P, spinor * const r) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+
+  int x, y, z, t, i, ix;
+  spinor * p = NULL;
+
+#ifdef OMP
+#pragma omp for
+#endif
+  for(x = 0; x < LX; x++) {
+    for(y = 0; y < LY; y++) {
+      for(z = 0; z < LZ; z++) {
+	for(t = 0; t < T; t++) {
+	  ix = g_ipt[t][x][y][z];
+	  i = g_lexic2eosub[ ix ];
+	  if((t+x+y+z+g_proc_coords[3]*LZ+g_proc_coords[2]*LY 
+	      + g_proc_coords[0]*T+g_proc_coords[1]*LX)%2 == 0) {
+	       p = r;
+	       memcpy((P+ix), (p+i), sizeof(spinor));
+	  }
+	}
+      }
+    }
+  }
+
+#ifdef OMP
+  } /*OpenMP closing brace */
+#endif
+
+  return;
+}
+
+/*
+ *      P: spinor with full volume 
+ *      r: new spinor even
+ */
+void convert_lexic_to_even(spinor * const r, spinor * const P) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+
+  int x, y, z, t, i, ix;
+  spinor * p = NULL;
+
+#ifdef OMP
+#pragma omp for
+#endif
+  for(x = 0; x < LX; x++) {
+    for(y = 0; y < LY; y++) {
+      for(z = 0; z < LZ; z++) {
+	for(t = 0; t < T; t++) {
+	  ix = g_ipt[t][x][y][z];
+	  i = g_lexic2eosub[ ix ];
+	  if((t+x+y+z+g_proc_coords[3]*LZ+g_proc_coords[2]*LY 
+	      + g_proc_coords[0]*T+g_proc_coords[1]*LX)%2 == 0) {
+	    p = r;
+	    memcpy((p+i), (P+ix), sizeof(spinor));
+	  }
+	}
+      }
+    }
+  }
+
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+
+  return;
+}
diff --git a/linalg/convert_even_to_lexic.h b/linalg/convert_even_to_lexic.h
new file mode 100644
index 000000000..04eb066c0
--- /dev/null
+++ b/linalg/convert_even_to_lexic.h
@@ -0,0 +1,26 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifndef _CONVERT_EVEN_TO_LEXIC_H
+#define _CONVERT_EVEN_TO_LEXIC_H
+
+void convert_even_to_lexic(spinor * const P, spinor * const r);
+void convert_lexic_to_even(spinor * const r, spinor * const P);
+
+#endif
diff --git a/linalg_eo.h b/linalg_eo.h
index 020f0483a..51f7f1ac3 100644
--- a/linalg_eo.h
+++ b/linalg_eo.h
@@ -66,7 +66,7 @@
 #include "linalg/mattimesvec.h"
 
 #include "linalg/convert_eo_to_lexic.h"
-
+#include "linalg/convert_even_to_lexic.h"
 #include "linalg/convert_odd_to_lexic.h"
 #include "linalg/set_even_to_zero.h"
 #include "linalg/mul_gamma5.h"

From 2c77d9ee4622bdf823fc8a9cdc582713c9ff8b9b Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 7 Sep 2017 14:24:14 +0300
Subject: [PATCH 22/85] Add Laplacian intial guess for MG and RG-MIXED-CG when
 are used as MMS solver.

---
 DDalphaAMG_interface.c    | 306 +++++++++++++++++++++++++++++++-------
 monomial/ndrat_monomial.c |   2 +-
 read_input.l              |   3 +-
 solver/monomial_solve.c   |  62 ++++----
 solver/solver_types.h     |   3 +-
 5 files changed, 295 insertions(+), 81 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 53b564bab..8abd88703 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -98,6 +98,7 @@ int MG_solver_eo(spinor * const Even_new, spinor * const Odd_new,
 #include "operator/tm_operators.h"
 #include "operator/tm_operators_nd.h"
 #include "operator/clovertm_operators.h"
+#include "operator/Hopping_Matrix.h"
 
 //Enable to test the solution. It cost an application more of the operator. 
 //TODO: test all the operators interfaced and then undefine this flag.
@@ -485,35 +486,171 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
   return mg_status.success;
 }
 
-static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * const up_old, spinor * const dn_old,
+static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old, spinor * const dn_old,
 			const double precision, const int N, matrix_mult_nd f)
 {
   
   // for rescaling  convention in DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> rescale by 1/4+m
   // moreover in the nd case, the tmLQCD is multiplied by phmc_invmaxev
   double mg_scale=0.5/g_kappa/phmc_invmaxev;
-  double *old1 = (double*) up_old; 
-  double *old2 = (double*) dn_old; 
-  double *new1 = (double*) up_new;
-  double *new2 = (double*) dn_new;
-  spinor ** solver_field = NULL;
-  
+  double sqnorm;
+  int init_guess = 0;
+  spinor *old1 = up_old; 
+  spinor *old2 = dn_old; 
+  spinor *new1 = up_new, *new1tmp;
+  spinor *new2 = dn_new, *new2tmp;
+  spinor ** solver_field = NULL, ** oe_solver_field = NULL;
+  int no_solver_field = 0;
+
   if( N != VOLUME && N != VOLUME/2 ) {
     if( g_proc_id == 0 )
       printf("ERROR: N = %d in MG_solve. Expettected N == VOLUME (%d) or VOLUME/2 (%d)\n", N, VOLUME, VOLUME/2);
     return 0;
   }
 
+  if (N==VOLUME/2) no_solver_field += 4;
+
+  // Checking if initial guess is given
+  sqnorm = square_norm(up_new, N, 1);
+  sqnorm += square_norm(dn_new, N, 1);
+  if ( sqnorm>0 ) init_guess = 1;
+
+  // In case of initial guess and squared operator, we do the inversion in two step and we need two more vectors
+  if ( init_guess && (
+            f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
+	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
+	    f == Qsw_pm_ndpsi_shift ))  // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+    no_solver_field += 2;
+
+  // Allocating and assigning fields
+  if(no_solver_field>0)
+    init_solver_field(&solver_field, VOLUMEPLUSRAND,no_solver_field);
+
   if (N==VOLUME/2) {
-    init_solver_field(&solver_field, VOLUMEPLUSRAND,4);
-    old1 = (double*) solver_field[0];
-    old2 = (double*) solver_field[1];
-    new1 = (double*) solver_field[2];
-    new2 = (double*) solver_field[3];
-    convert_odd_to_lexic( (spinor*) old1, up_old);
-    convert_odd_to_lexic( (spinor*) old2, dn_old);
+    old1 = solver_field[--no_solver_field];
+    old2 = solver_field[--no_solver_field];
+    new1 = solver_field[--no_solver_field];
+    new2 = solver_field[--no_solver_field];
+    convert_odd_to_lexic(old1, up_old);
+    convert_odd_to_lexic(old2, dn_old);
+    set_even_to_zero(old1);
+    set_even_to_zero(old2);
+  }
+
+  if ( init_guess && (
+            f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
+	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
+	    f == Qsw_pm_ndpsi_shift )) {// (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+    new1tmp = solver_field[--no_solver_field];
+    new2tmp = solver_field[--no_solver_field];
   }
+
+  // Reconstracting initial guess in case of oe
+  if ( init_guess && N==VOLUME/2 ) {
+    init_solver_field(&oe_solver_field, VOLUMEPLUSRAND, 4);
+    spinor* tmp11 = oe_solver_field[0];
+    spinor* tmp21 = oe_solver_field[1];
+    spinor* tmp12 = oe_solver_field[2];
+    spinor* tmp22 = oe_solver_field[3];
+
+#ifdef MGTEST
+    double differ[2];
+    f( tmp11, tmp12, up_new, dn_new);
+    diff( tmp11, tmp11, up_old, N);
+    diff( tmp12, tmp12, dn_old, N);
+    differ[0] = sqrt(square_norm(tmp11, N, 1)+square_norm(tmp12, N, 1));
+    differ[1] = sqrt(square_norm(up_old, N, 1)+square_norm(dn_old, N, 1));
+  
+    if(g_proc_id == 0)
+      printf("MG TEST: using initial guess. Relative residual = %e  \n", differ[0]/differ[1]);
+#endif
+
+    /* Reconstruct the even sites                */
+    if (    f == Qtm_pm_ndpsi       ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
+	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi       ||  // (Gamma5 Dh tau1)^2 - Schur complement squared
+	    f == Qsw_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+            f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
+            f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
+            f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
+            f == Qsw_tau1_ndpsi_sub_Ishift ) {// Gamma5 Dh tau1 - Schur complement with minus shift
+      // tau1 exchange tmp11 <-> tmp12
+      Hopping_Matrix(EO, tmp12, up_new);
+      Hopping_Matrix(EO, tmp11, dn_new);
+
+      Msw_ee_inv_ndpsi(tmp21, tmp22, tmp11, tmp12);
+
+      /* Assigning with plus sign for the even
+       * since in Hopping_Matrix the minus is missing
+       */
+      // tau1 exchange tmp22 <-> tmp21
+      convert_eo_to_lexic(new1, tmp22, up_new);
+      convert_eo_to_lexic(new2, tmp21, dn_new);
+    } else {
+      Hopping_Matrix(EO, tmp11, up_new);
+      Hopping_Matrix(EO, tmp12, dn_new);
+
+      Msw_ee_inv_ndpsi(tmp21, tmp22, tmp11, tmp12);
+
+      /* Assigning with plus sign for the even
+       * since in Hopping_Matrix the minus is missing
+       */
+      convert_eo_to_lexic(new1, tmp21, up_new);
+      convert_eo_to_lexic(new2, tmp22, dn_new);
+    }
+  
+    // if squared obtaining initial guess for Gamma5 Dh
+    if (    f == Qtm_pm_ndpsi       ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
+      Qtm_dagger_ndpsi(tmp11, tmp12, up_new, dn_new); // tau1 Gamma5 Dh tau1
+    }
+    else if(f == Qsw_pm_ndpsi       ) { // (Gamma5 Dh tau1)^2 - Schur complement squared
+      Qsw_dagger_ndpsi(tmp11, tmp12, up_new, dn_new); // tau1 Gamma5 Dh tau1
+    }
+    else if(f == Qtm_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+      Qtm_tau1_ndpsi_sub_Ishift(tmp12, tmp11, up_new, dn_new); // tau1 exchange tmp11 <-> tmp12  
+    }
+    else if(f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+      Qsw_tau1_ndpsi_sub_Ishift(tmp12, tmp11, up_new, dn_new); // tau1 exchange tmp11 <-> tmp12
+    }
+
+    if (    f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
+	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
+	    f == Qsw_pm_ndpsi_shift ){  // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+
+      // tau1 exchange new1tmp <-> new2tmp
+      convert_odd_to_lexic( new2tmp, tmp11);
+      convert_odd_to_lexic( new1tmp, tmp12);
+      Hopping_Matrix(EO, tmp21, tmp11);
+      Hopping_Matrix(EO, tmp22, tmp12);
+      Msw_ee_inv_ndpsi(tmp11, tmp12, tmp21, tmp22);
+      convert_even_to_lexic(new2tmp, tmp11);
+      convert_even_to_lexic(new1tmp, tmp12);
+    } 
+    finalize_solver(oe_solver_field, 4);
+  } 
+#ifdef MGTEST
+  else {
+    init_solver_field(&oe_solver_field, VOLUMEPLUSRAND, 2);
+    spinor* tmp1 = oe_solver_field[0];
+    spinor* tmp2 = oe_solver_field[1];
+
+    double differ[2];
+    f( tmp1, tmp2, up_new, dn_new);
+    diff( tmp1, tmp1, up_old, N);
+    diff( tmp2, tmp2, dn_old, N);
+    differ[0] = sqrt(square_norm(tmp1, N, 1)+square_norm(tmp2, N, 1));
+    differ[1] = sqrt(square_norm(up_old, N, 1)+square_norm(dn_old, N, 1));
   
+    if(g_proc_id == 0)
+      printf("MG TEST: using initial guess. Relative residual = %e  \n", differ[0]/differ[1]);
+    finalize_solver(oe_solver_field, 2);
+  }
+#endif
+
+
   // Checking if the operator is in the list and compatible with N
   if (      f == Qtm_ndpsi ||           //  Gamma5 Dh    - Schur complement with csw = 0
 	    f == Qsw_ndpsi ||           //  Gamma5 Dh    - Schur complement
@@ -523,10 +660,10 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
             f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
             f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
             f == Qsw_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with minus shift
-	    f == Qtm_pm_ndpsi ||        // (Gamma5 Dh)^2 - Schur complement squared with csw = 0
-	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh)^2 - Schur complement squared
-	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh)^2 - Schur complement squared with shift
+	    f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
+	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
+	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     if( N != VOLUME/2 && g_proc_id == 0 )
       printf("WARNING: expected N == VOLUME/2 for the required operator in MG_solve. Continuing with N == VOLUME\n");
   }
@@ -539,8 +676,8 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
 	   N==VOLUME?"":"Qsw_ndpsi");
 
   // Setting mu and eps
-  if (      f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh)^2 - Schur complement squared with shift
+  if (      f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     MG_update_mubar_epsbar( g_mubar, g_epsbar, sqrt(g_shift) );
   else if ( f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
             f == Qsw_tau1_ndpsi_add_Ishift )  // Gamma5 Dh tau1 - Schur complement with plus shift
@@ -551,7 +688,9 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
   else if ( f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar csw = 0
 	    f == Qsw_dagger_ndpsi )     //  Gamma5 Dh    - Schur complement with mu = -mubar
     MG_update_mubar_epsbar( -g_mubar, g_epsbar, 0 );
-  else if ( f == D_ndpsi )              //  Dh
+  else if ( f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
+	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
+            f == D_ndpsi )              //  Dh
     MG_update_mubar_epsbar( g_mubar, g_epsbar, 0 );
   else
     MG_update_mubar_epsbar( g_mubar, g_epsbar, 0 );
@@ -561,46 +700,111 @@ static int MG_solve_nd( spinor * const up_new, spinor * const dn_new, spinor * c
 	    f == Qsw_ndpsi ||           //  Gamma5 Dh    - Schur complement
 	    f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar csw = 0
 	    f == Qsw_dagger_ndpsi ) {   //  Gamma5 Dh    - Schur complement with mu = -mubar
-    mul_gamma5((spinor *const) old1, VOLUME);
-    mul_gamma5((spinor *const) old2, VOLUME);
-    DDalphaAMG_solve_doublet( new1, old1, new2, old2, precision, &mg_status );
+    mul_gamma5(old1, VOLUME);
+    mul_gamma5(old2, VOLUME);
+    if (init_guess) {
+      // Removing normalization from initial guess
+      mul_r(new1, 1/mg_scale, new1, VOLUME);
+      mul_r(new2, 1/mg_scale, new2, VOLUME);
+      DDalphaAMG_solve_doublet_with_guess( (double*) new1, (double*) old1, (double*) new2, (double*) old2,
+                                           precision, &mg_status );
+    } else {
+      DDalphaAMG_solve_doublet( (double*) new1, (double*) old1, (double*) new2, (double*) old2, 
+                                precision, &mg_status );
+    }
     if( N == VOLUME ) { // in case of VOLUME/2 old is a just local vector
-      mul_gamma5((spinor *const) old1, VOLUME);
-      mul_gamma5((spinor *const) old2, VOLUME);
+      mul_gamma5(old1, VOLUME);
+      mul_gamma5(old2, VOLUME);
     }
   }
   else if ( f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
             f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
             f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
             f == Qsw_tau1_ndpsi_sub_Ishift ) {// Gamma5 Dh tau1 - Schur complement with minus shift
-    mul_gamma5((spinor *const) old1, VOLUME);
-    mul_gamma5((spinor *const) old2, VOLUME);
+    mul_gamma5(old1, VOLUME);
+    mul_gamma5(old2, VOLUME);
     // tau1 exchange new1 <-> new2
-    DDalphaAMG_solve_doublet( new2, old1, new1, old2, precision, &mg_status );
+    if (init_guess) {
+      // Removing normalization from initial guess
+      mul_r(new1, 1/mg_scale, new1, VOLUME);
+      mul_r(new2, 1/mg_scale, new2, VOLUME);
+      DDalphaAMG_solve_doublet_with_guess( (double*) new2, (double*) old1, (double*) new1, (double*) old2, 
+                                           precision, &mg_status );
+    } else {
+      DDalphaAMG_solve_doublet( (double*) new2, (double*) old1, (double*) new1, (double*) old2, 
+                                precision, &mg_status );
+    }
     if( N == VOLUME ) { // in case of VOLUME/2 old is a just local vector
-      mul_gamma5((spinor *const) old1, VOLUME);
-      mul_gamma5((spinor *const) old2, VOLUME);
+      mul_gamma5(old1, VOLUME);
+      mul_gamma5(old2, VOLUME);
     }
   }	    
-  else if ( f == Qtm_pm_ndpsi ||        // (Gamma5 Dh)^2 - Schur complement squared with csw = 0
-	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh)^2 - Schur complement squared
-	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh)^2 - Schur complement squared with shift
-    mg_scale *= mg_scale;
+  else if ( f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
+	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
+	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     // DDalphaAMG: tau1 gamma5 Dh tau1 gamma5 Dh
     // tmLQCD:          gamma5 Dh tau1 gamma5 Dh tau1
-    DDalphaAMG_solve_doublet_squared_odd( new2, old2, new1, old1, precision, &mg_status );
+    if (init_guess) {
+      mul_gamma5(old1, VOLUME);
+      mul_gamma5(old2, VOLUME);
+      // Removing normalization from initial guess
+      mul_r(new1tmp, 1/mg_scale, new1tmp, VOLUME);
+      mul_r(new2tmp, 1/mg_scale, new2tmp, VOLUME);
+      DDalphaAMG_solve_doublet_with_guess( (double*) new2tmp, (double*) old1, (double*) new1tmp, (double*) old2,
+                                           precision, &mg_status );
+      if( N == VOLUME ) { // in case of VOLUME/2 old is a just local vector
+        mul_gamma5(old1, VOLUME);
+        mul_gamma5(old2, VOLUME);
+      }
+      mul_gamma5(new1tmp, VOLUME);
+      mul_gamma5(new2tmp, VOLUME);
+      set_even_to_zero(new1tmp);
+      set_even_to_zero(new2tmp);
+      // Removing normalization from initial guess
+      mg_scale *= mg_scale;
+      mul_r(new1, 1/mg_scale, new1, VOLUME);
+      mul_r(new2, 1/mg_scale, new2, VOLUME);
+      if (      f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+                f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+        MG_update_mubar_epsbar( g_mubar, g_epsbar, -sqrt(g_shift) );
+      DDalphaAMG_solve_doublet_with_guess( (double*) new2, (double*) new1tmp, (double*) new1, (double*) new2tmp,
+                                           precision, &mg_status );      
+    } else {
+      mg_scale *= mg_scale;
+      DDalphaAMG_solve_doublet_squared_odd( (double*) new2, (double*) old2, (double*) new1, (double*) old1,
+                                            precision, &mg_status );
+    }
+  }
+  else if ( f == D_ndpsi ) {            //  Dh
+    if (init_guess) {
+      // Removing normalization from initial guess
+      mul_r(new1, 1/mg_scale, new1, VOLUME);
+      mul_r(new2, 1/mg_scale, new2, VOLUME);
+      DDalphaAMG_solve_doublet_with_guess( (double*) new1, (double*) old1, (double*) new2, (double*) old2,
+                                           precision, &mg_status );
+    } else {
+      DDalphaAMG_solve_doublet( (double*) new1, (double*) old1, (double*) new2, (double*) old2,
+                                precision, &mg_status );
+    }
+  } else {
+    if (init_guess) {
+      // Removing normalization from initial guess
+      mul_r(new1, 1/mg_scale, new1, VOLUME);
+      mul_r(new2, 1/mg_scale, new2, VOLUME);
+      DDalphaAMG_solve_doublet_with_guess( (double*) new1, (double*) old1, (double*) new2, (double*) old2,
+                                           precision, &mg_status );
+    } else {
+      DDalphaAMG_solve_doublet( (double*) new1, (double*) old1, (double*) new2, (double*) old2,
+                                precision, &mg_status );
+    }
   }
-  else if ( f == D_ndpsi )              //  Dh
-    DDalphaAMG_solve_doublet( new1, old1, new2, old2, precision, &mg_status );
-  else
-    DDalphaAMG_solve_doublet( new1, old1, new2, old2, precision, &mg_status );
-  
   if (N==VOLUME/2) {
-    convert_lexic_to_odd(up_new, (spinor*) new1);
-    convert_lexic_to_odd(dn_new, (spinor*) new2);
-    finalize_solver(solver_field, 4);
+    convert_lexic_to_odd(up_new, new1);
+    convert_lexic_to_odd(dn_new, new2);
   }
+  if (no_solver_field>0)
+    finalize_solver(solver_field, no_solver_field);
   mul_r(up_new ,mg_scale, up_new, N);
   mul_r(dn_new ,mg_scale, dn_new, N);
   
@@ -665,8 +869,8 @@ static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new,
             f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
             f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
             f == Qsw_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with minus shift
-            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh)^2 - Schur complement squared with shift
+            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     if( N != VOLUME/2 ) {
       if( g_proc_id == 0 )
         printf("ERROR: expected N == VOLUME/2 for the required operator in MG_mms_solve_nd.\n");
@@ -679,8 +883,8 @@ static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new,
   // Setting mubar, epsbar and shifts
   if (	    f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
             f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
-            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh)^2 - Schur complement squared with shift
+            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     MG_update_mubar_epsbar( g_mubar, g_epsbar, shifts[0] );
     for( int i = 0; i < no_shifts; i++ ) {
       mg_odd_shifts[i]  = shifts[i]*mg_scale;
@@ -711,8 +915,8 @@ static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new,
       mul_gamma5((spinor *const) old2, VOLUME);
     }
   }	    
-  else if ( f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh)^2 - Schur complement squared with shift
+  else if ( f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     mg_scale *= mg_scale;
     // DDalphaAMG: tau1 gamma5 Dh tau1 gamma5 Dh
     // tmLQCD:          gamma5 Dh tau1 gamma5 Dh tau1
diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c
index 7c59ead16..0d92606a3 100644
--- a/monomial/ndrat_monomial.c
+++ b/monomial/ndrat_monomial.c
@@ -257,7 +257,7 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
 
 #ifdef DDalphaAMG
-  if( mnl->solver == MGMMSND ){
+  if( mnl->solver == MG ){
     // With MG we can solve directly the unsquared operator
     solver_pm.M_ndpsi = &Qtm_tau1_ndpsi_add_Ishift;
     if(mnl->type == NDCLOVERRAT)
diff --git a/read_input.l b/read_input.l
index ed35efd69..93aa1a29b 100644
--- a/read_input.l
+++ b/read_input.l
@@ -1742,8 +1742,7 @@ static inline void rmQuotes(char *str){
   }
   DDalphaAMG {
     if(myverbose) printf("  Solver set to \"%s\" line %d monomial %d\n", yytext, line_of_file, current_monomial);
-    // mnl->solver = MG;
-    mnl->solver = MGMMSND;
+    mnl->solver = MG;
     BEGIN(solver_caller);
   }
 }
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 57a1f755e..905ce8ecf 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -38,6 +38,7 @@
 # include<config.h>
 #endif
 #include "global.h"
+#include "start.h"
 #include "read_input.h"
 #include "default_input_values.h"
 #include "solver/solver.h"
@@ -165,7 +166,9 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     }
   } else if (solver_pm->type == CGMMSND){
     iteration_count = cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);
-  } else if (solver_pm->type == MGMMSND) {
+  }
+#ifdef DDalphaAMG
+  else if (solver_pm->type == MG) {
     // if the mg_mms_mass is larger than the smallest shift we use MG
     if (mg_mms_mass >= solver_pm->shifts[0]) { 
 
@@ -183,17 +186,45 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
         solver_pm->shifts -= mg_no_shifts;
         solver_pm->mms_squared_solver_prec -= mg_no_shifts;
       }
-            
+
       matrix_mult_nd f = Qtm_pm_ndpsi_shift;
       if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
         f = Qsw_pm_ndpsi_shift;
-      iteration_count = MG_mms_solver_nd( Pup, Pdn, Qup, Qdn, solver_pm->shifts, mg_no_shifts,
-                                          solver_pm->mms_squared_solver_prec, solver_pm->max_iter, solver_pm->rel_prec,
-                                          solver_pm->sdim, g_gauge_field, f );
+      for(int i = solver_pm->no_shifts-1; i>=0; i--){
+        // preparing initial guess                                                                                                                                                                       
+        if(i==solver_pm->no_shifts-1) {
+          zero_spinor_field(Pup[i], solver_pm->sdim);
+          zero_spinor_field(Pdn[i], solver_pm->sdim);
+        } else {
+          double coeff;
+          for( int j = solver_pm->no_shifts-1; j > i; j-- ) {
+            coeff = 1;
+            for( int k = solver_pm->no_shifts-1; k > i; k-- ) {
+              if(j!=k)
+                coeff *= (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[i]*solver_pm->shifts[i])/
+                  (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[j]*solver_pm->shifts[j]);
+            }
+            if(j==solver_pm->no_shifts-1) {
+              mul(Pup[i], coeff, Pup[j], solver_pm->sdim);
+              mul(Pdn[i], coeff, Pdn[j], solver_pm->sdim);
+            } else {
+              assign_add_mul(Pup[i], Pup[j], coeff, solver_pm->sdim);
+              assign_add_mul(Pdn[i], Pdn[j], coeff, solver_pm->sdim);
+            }
+          }
+        }
+        
+        g_shift = solver_pm->shifts[i]*solver_pm->shifts[i]; 
+        iteration_count += MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_pm->mms_squared_solver_prec[i], solver_pm->max_iter,
+                                         solver_pm->rel_prec, solver_pm->sdim, g_gauge_field, f );
+        g_shift = _default_g_shift;
+      }
     } else {
       iteration_count = cg_mms_tm_nd( Pup, Pdn, Qup, Qdn, solver_pm );
     }
-  } else if (solver_pm->type == RGMIXEDCG){
+  }
+#endif
+  else if (solver_pm->type == RGMIXEDCG){
     matrix_mult_nd   f    = Qtm_pm_ndpsi_shift;
     matrix_mult_nd32 f32  = Qtm_pm_ndpsi_shift_32;
     if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ){ 
@@ -241,25 +272,6 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
         iteration_count += iter_local;
       }
     }
-  } else if (solver_pm->type == MG) {
-    matrix_mult_nd f = Qtm_pm_ndpsi_shift;
-    if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
-      f = Qsw_pm_ndpsi_shift;
-    iteration_count = 0;
-    // solver_params_t struct needs to be passed to all solvers except for cgmms, so we need to construct it here
-    // and set the one relevant parameter
-    double iter_local = 0;
-    for(int i = 0; i < solver_pm->no_shifts; ++i){
-      g_shift = solver_pm->shifts[i]*solver_pm->shifts[i]; 
-      iter_local = MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_pm->mms_squared_solver_prec[i], solver_pm->max_iter,
-                                 solver_pm->rel_prec, solver_pm->sdim, g_gauge_field, f );
-      g_shift = _default_g_shift;
-      if(iter_local == -1){
-        return(-1);
-      } else {
-        iteration_count += iter_local;
-      }
-    }
   } else {
     if(g_proc_id==0) printf("Error: solver not allowed for ND mms solve. Aborting...\n");
     exit(2);      
diff --git a/solver/solver_types.h b/solver/solver_types.h
index 45addb0b6..d16491580 100644
--- a/solver/solver_types.h
+++ b/solver/solver_types.h
@@ -24,8 +24,7 @@ typedef enum SOLVER_TYPE {
  MCR,
  CR,
  BICG,
- MG,
- MGMMSND
+ MG
 } SOLVER_TYPE;
 
 #endif

From 44e25274107fbd6ce233f87c8ac344a291043fc2 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 14 Sep 2017 15:25:30 +0300
Subject: [PATCH 23/85] Bug fixes and restoring MGNumberOfShifts

---
 DDalphaAMG_interface.c  | 18 +++++++--
 DDalphaAMG_interface.h  |  1 +
 read_input.h            |  1 +
 read_input.l            | 10 ++++-
 solver/monomial_solve.c | 87 +++++++++++++++++++++++++++++++++--------
 5 files changed, 94 insertions(+), 23 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 8abd88703..04ba13eaf 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -34,6 +34,7 @@ int mg_lvl;
 int mg_blk[4];
 int mg_mixed_prec;
 int mg_setup_mu_set;
+int mg_no_shifts = 0;
 double mg_mms_mass = 0;
 double mg_setup_mu;
 double mg_cmu_factor;
@@ -120,6 +121,7 @@ int mg_lvl=3;
 int mg_blk[4] = {0, 0, 0, 0};
 int mg_mixed_prec=0;
 int mg_setup_mu_set = 0; //flag that enable the use of mg_setup_mu in the setup phase
+int mg_no_shifts = 0; // number of shifts to invert with MG. MMS-CG is used for the others at larger mass.
 double mg_mms_mass = 0.1; // mass shift value for switching from MMS-CG to MG. MMS-CG is used for larger masses than the value.
 double mg_setup_mu = 0.; 
 double mg_cmu_factor = 1.0;
@@ -209,7 +211,7 @@ static inline int MG_check_nd( spinor * const up_new, spinor * const dn_new, spi
 {
   double differ[2], residual;
   spinor ** check_vect = NULL;
-  double acc_factor = 2;
+  double acc_factor = 4;
   
   init_solver_field(&check_vect, VOLUMEPLUSRAND,2);
   f( check_vect[0], check_vect[1], up_new, dn_new);
@@ -752,7 +754,7 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
       mul_r(new1tmp, 1/mg_scale, new1tmp, VOLUME);
       mul_r(new2tmp, 1/mg_scale, new2tmp, VOLUME);
       DDalphaAMG_solve_doublet_with_guess( (double*) new2tmp, (double*) old1, (double*) new1tmp, (double*) old2,
-                                           precision, &mg_status );
+                                           precision/2, &mg_status );
       if( N == VOLUME ) { // in case of VOLUME/2 old is a just local vector
         mul_gamma5(old1, VOLUME);
         mul_gamma5(old2, VOLUME);
@@ -769,7 +771,7 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
                 f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
         MG_update_mubar_epsbar( g_mubar, g_epsbar, -sqrt(g_shift) );
       DDalphaAMG_solve_doublet_with_guess( (double*) new2, (double*) new1tmp, (double*) new1, (double*) new2tmp,
-                                           precision, &mg_status );      
+                                           precision/2, &mg_status );      
     } else {
       mg_scale *= mg_scale;
       DDalphaAMG_solve_doublet_squared_odd( (double*) new2, (double*) old2, (double*) new1, (double*) old1,
@@ -1226,8 +1228,16 @@ int MG_solver_nd(spinor * const up_new, spinor * const dn_new,
   success = MG_solve_nd( up_new, dn_new, up_old, dn_old, mg_prec, N, f );
   
 #ifdef MGTEST
-  if(success) 
+  if(success) {
     success = MG_check_nd( up_new, dn_new, up_old, dn_old, N, mg_prec, f );
+
+    if(!success) {
+      success = MG_solve_nd( up_new, dn_new, up_old, dn_old, mg_prec, N, f);
+    
+      if(success) 
+        success = MG_check_nd( up_new, dn_new, up_old, dn_old, N, mg_prec, f );
+    }
+  }
 #endif
   
   if(!success) {
diff --git a/DDalphaAMG_interface.h b/DDalphaAMG_interface.h
index a19560994..f7e3b094a 100644
--- a/DDalphaAMG_interface.h
+++ b/DDalphaAMG_interface.h
@@ -37,6 +37,7 @@ extern int mg_lvl;
 extern int mg_blk[4];
 extern int mg_mixed_prec;
 extern int mg_setup_mu_set;
+extern int mg_no_shifts;
 extern double mg_mms_mass;
 extern double mg_setup_mu;
 extern double mg_cmu_factor;
diff --git a/read_input.h b/read_input.h
index 2cea6a321..53a04323b 100644
--- a/read_input.h
+++ b/read_input.h
@@ -131,6 +131,7 @@ extern "C"
   extern int mg_blk[4];
   extern int mg_mixed_prec;
   extern int mg_setup_mu_set;
+  extern int mg_no_shifts;
   extern double mg_mms_mass;
   extern double mg_setup_mu;
   extern double mg_cmu_factor;
diff --git a/read_input.l b/read_input.l
index 93aa1a29b..6adf79b40 100644
--- a/read_input.l
+++ b/read_input.l
@@ -721,10 +721,16 @@ static inline void rmQuotes(char *str){
     mg_omp_num_threads=a;
     if(myverbose) printf("  MG_omp_num_threads set to %d line %d operator %d\n", mg_omp_num_threads, line_of_file, current_operator);
   }
-  {SPC}*MGMMSMass{EQL}{DIGIT}+ {
+  {SPC}*MGNumberOfShifts{EQL}{DIGIT}+ {
+    sscanf(yytext, " %[a-zA-Z] = %d", name, &a);
+    mg_no_shifts=a;
+    mg_mms_mass=0;
+    if(myverbose) printf("  MG_Num_of_shifts set to %d line %d operator %d\n", mg_omp_num_threads, line_of_file, current_operator);
+  }
+  {SPC}*MGMMSMass{EQL}{FLT}+ {
     sscanf(yytext, " %[a-zA-Z] = %lf", name, &c);
     mg_mms_mass=c;
-    if(myverbose) printf("  MGMMSMass set to %f line %d operator %d\n", mg_mms_mass, line_of_file, current_operator);
+    if(myverbose) printf("  MG_MMS_Mass set to %f line %d operator %d\n", mg_mms_mass, line_of_file, current_operator);
   }
   EndDDalphaAMG{SPC}* {
   if(myverbose) printf("DDalphaAMG parsed in line %d\n\n", line_of_file);
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 905ce8ecf..272d6950f 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -166,16 +166,68 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     }
   } else if (solver_pm->type == CGMMSND){
     iteration_count = cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);
+#define TEST
+#ifdef TEST
+    double differ[2], residual;
+    spinor ** check_vect = NULL;
+    matrix_mult_nd f = Qtm_pm_ndpsi_shift;
+    if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
+      f = Qsw_pm_ndpsi_shift;
+    
+    init_solver_field(&check_vect, VOLUMEPLUSRAND/2,4);
+    differ[1] = sqrt(square_norm(Qup, solver_pm->sdim, 1)+square_norm(Qdn, solver_pm->sdim, 1));
+    
+    for(int i = solver_pm->no_shifts-1; i>=0; i--){
+      // preparing initial guess
+      if(i==solver_pm->no_shifts-1) {
+        zero_spinor_field(check_vect[0], solver_pm->sdim);
+        zero_spinor_field(check_vect[1], solver_pm->sdim);
+      } else {
+        double coeff;
+        for( int j = solver_pm->no_shifts-1; j > i; j-- ) {
+          coeff = 1;
+          for( int k = solver_pm->no_shifts-1; k > i; k-- ) {
+            if(j!=k)
+              coeff *= (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[i]*solver_pm->shifts[i])/
+                (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[j]*solver_pm->shifts[j]);
+          }
+          if(j==solver_pm->no_shifts-1) {
+            mul_r(check_vect[0], coeff, Pup[j], solver_pm->sdim);
+            mul_r(check_vect[1], coeff, Pdn[j], solver_pm->sdim);
+          } else {
+            assign_add_mul_r(check_vect[0], Pup[j], coeff, solver_pm->sdim);
+            assign_add_mul_r(check_vect[1], Pdn[j], coeff, solver_pm->sdim);
+          }
+        }
+      }
+      
+      g_shift = solver_pm->shifts[i]*solver_pm->shifts[i]; 
+    
+      f( check_vect[2], check_vect[3], check_vect[0], check_vect[1]);
+      diff( check_vect[2], check_vect[2], Qup, solver_pm->sdim);
+      diff( check_vect[3], check_vect[3], Qdn, solver_pm->sdim);
+      differ[0] = sqrt(square_norm(check_vect[2], solver_pm->sdim, 1)+square_norm(check_vect[3], solver_pm->sdim, 1));
+      
+      residual = differ[0]/differ[1];
+      
+      if(g_proc_id == 0)
+        printf("CHECH: shift: %d relative residual: %e\n",i,residual); 
+      g_shift = _default_g_shift;
+    }
+    finalize_solver(check_vect, 4);
+#endif
   }
 #ifdef DDalphaAMG
   else if (solver_pm->type == MG) {
     // if the mg_mms_mass is larger than the smallest shift we use MG
-    if (mg_mms_mass >= solver_pm->shifts[0]) { 
+    if (mg_no_shifts > 0 || mg_mms_mass >= solver_pm->shifts[0]) { 
 
       // if the mg_mms_mass is smaller than the larger shifts, we use CGMMS for those
+      if(mg_mms_mass >= solver_pm->shifts[0]) {
+        mg_no_shifts = solver_pm->no_shifts;
+        while (mg_mms_mass < solver_pm->shifts[mg_no_shifts-1]) { mg_no_shifts--; }
+      }
       int no_shifts = solver_pm->no_shifts;
-      int mg_no_shifts = solver_pm->no_shifts;
-      while (mg_mms_mass < solver_pm->shifts[mg_no_shifts-1]) { mg_no_shifts--; }
       if (mg_no_shifts < no_shifts) {
         solver_pm->no_shifts = no_shifts - mg_no_shifts;
         solver_pm->shifts += mg_no_shifts;
@@ -190,26 +242,27 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
       matrix_mult_nd f = Qtm_pm_ndpsi_shift;
       if( solver_pm->M_ndpsi == Qsw_pm_ndpsi ) 
         f = Qsw_pm_ndpsi_shift;
-      for(int i = solver_pm->no_shifts-1; i>=0; i--){
-        // preparing initial guess                                                                                                                                                                       
-        if(i==solver_pm->no_shifts-1) {
+
+      for(int i = mg_no_shifts-1; i>=0; i--){
+        // preparing initial guess
+        if(i==no_shifts-1) {
           zero_spinor_field(Pup[i], solver_pm->sdim);
           zero_spinor_field(Pdn[i], solver_pm->sdim);
         } else {
           double coeff;
-          for( int j = solver_pm->no_shifts-1; j > i; j-- ) {
+          for( int j = no_shifts-1; j > i; j-- ) {
             coeff = 1;
-            for( int k = solver_pm->no_shifts-1; k > i; k-- ) {
+            for( int k = no_shifts-1; k > i; k-- ) {
               if(j!=k)
                 coeff *= (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[i]*solver_pm->shifts[i])/
                   (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[j]*solver_pm->shifts[j]);
             }
-            if(j==solver_pm->no_shifts-1) {
-              mul(Pup[i], coeff, Pup[j], solver_pm->sdim);
-              mul(Pdn[i], coeff, Pdn[j], solver_pm->sdim);
+            if(j==no_shifts-1) {
+              mul_r(Pup[i], coeff, Pup[j], solver_pm->sdim);
+              mul_r(Pdn[i], coeff, Pdn[j], solver_pm->sdim);
             } else {
-              assign_add_mul(Pup[i], Pup[j], coeff, solver_pm->sdim);
-              assign_add_mul(Pdn[i], Pdn[j], coeff, solver_pm->sdim);
+              assign_add_mul_r(Pup[i], Pup[j], coeff, solver_pm->sdim);
+              assign_add_mul_r(Pdn[i], Pdn[j], coeff, solver_pm->sdim);
             }
           }
         }
@@ -252,11 +305,11 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
                 (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[j]*solver_pm->shifts[j]);
           }
           if(j==solver_pm->no_shifts-1) {
-            mul(Pup[i], coeff, Pup[j], solver_pm->sdim);
-            mul(Pdn[i], coeff, Pdn[j], solver_pm->sdim);
+            mul_r(Pup[i], coeff, Pup[j], solver_pm->sdim);
+            mul_r(Pdn[i], coeff, Pdn[j], solver_pm->sdim);
           } else {
-            assign_add_mul(Pup[i], Pup[j], coeff, solver_pm->sdim);
-            assign_add_mul(Pdn[i], Pdn[j], coeff, solver_pm->sdim);
+            assign_add_mul_r(Pup[i], Pup[j], coeff, solver_pm->sdim);
+            assign_add_mul_r(Pdn[i], Pdn[j], coeff, solver_pm->sdim);
           }
         }
       }

From 462c9fa68ca678aca79008abec236487a2c200da Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 21 Sep 2017 12:15:11 +0300
Subject: [PATCH 24/85] Add initial guess to single inversions done in ndrat
 heatbath

---
 monomial/ndrat_monomial.c | 82 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 72 insertions(+), 10 deletions(-)

diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c
index 0d92606a3..74e90687d 100644
--- a/monomial/ndrat_monomial.c
+++ b/monomial/ndrat_monomial.c
@@ -47,6 +47,7 @@
 #include "rational/rational.h"
 #include "phmc.h"
 #include "ndrat_monomial.h"
+#include "default_input_values.h"
 #ifdef DDalphaAMG
 #  include "DDalphaAMG_interface.h"
 #endif
@@ -257,17 +258,78 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
 
 #ifdef DDalphaAMG
-  if( mnl->solver == MG ){
-    // With MG we can solve directly the unsquared operator
-    solver_pm.M_ndpsi = &Qtm_tau1_ndpsi_add_Ishift;
-    if(mnl->type == NDCLOVERRAT)
-      solver_pm.M_ndpsi = &Qsw_tau1_ndpsi_add_Ishift;
-    
-    mnl->iter0 = MG_mms_solver_nd( g_chi_up_spinor_field, g_chi_dn_spinor_field, mnl->pf, mnl->pf2, 
-                                   solver_pm.shifts, solver_pm.no_shifts,solver_pm.mms_squared_solver_prec, 
-                                   solver_pm.max_iter, solver_pm.rel_prec, solver_pm.sdim, g_gauge_field, 
-                                   solver_pm.M_ndpsi );
+  // With MG we can solve directly the unsquared operator
+  if( mnl->solver == MG && (mg_no_shifts > 0 || mg_mms_mass >= solver_pm.shifts[0]) ){
+
+    // if the mg_mms_mass is smaller than the largest shifts, we use CGMMS for those
+    // in case mg_no_shifts is used, then mg_mms_mass = 0
+    if(mg_mms_mass >= solver_pm.shifts[0]) {
+      mg_no_shifts = solver_pm.no_shifts;
+      while (mg_mms_mass < solver_pm.shifts[mg_no_shifts-1]) { mg_no_shifts--; }
+    }
+    int no_shifts = solver_pm.no_shifts;
+    if (mg_no_shifts < no_shifts) {
+      solver_pm.no_shifts = no_shifts - mg_no_shifts;
+      solver_pm.shifts += mg_no_shifts;
+      solver_pm.mms_squared_solver_prec += mg_no_shifts;
+      // We store the solutions not in the right place (without shifting of mg_no_shifts)
+      // for them applying the operator and storing at the right place the unsquared solution.
+      mnl->iter0 = cg_mms_tm_nd( g_chi_up_spinor_field, g_chi_dn_spinor_field, mnl->pf, mnl->pf2, &solver_pm );
+      for(int j = solver_pm.no_shifts-1; j >= 0; j--) {
+        // Q_h * tau^1 - i nu_j
+        // this needs phmc_Cpol = 1 to work!
+        if(mnl->type == NDCLOVERRAT) {
+          Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[j+mg_no_shifts], g_chi_dn_spinor_field[j+mg_no_shifts],
+                                   g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+                                   I*solver_pm.shifts[j], 1., mnl->EVMaxInv);
+        }
+        else {
+          Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[j+mg_no_shifts], g_chi_dn_spinor_field[j+mg_no_shifts],
+                                 g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+                                 I*solver_pm.shifts[j], 1., mnl->EVMaxInv);
+        }
+      }
+      // Restoring solver_pm
+      solver_pm.no_shifts = no_shifts;
+      solver_pm.shifts -= mg_no_shifts;
+      solver_pm.mms_squared_solver_prec -= mg_no_shifts;
+    }
 
+    matrix_mult_nd f = Qtm_tau1_ndpsi_add_Ishift;
+    if( solver_pm.M_ndpsi == Qsw_pm_ndpsi )
+      f = Qsw_tau1_ndpsi_add_Ishift;
+
+    // preparing initial guess
+    for(int i = mg_no_shifts-1; i>=0; i--){
+      if(i==no_shifts-1) {
+        zero_spinor_field(g_chi_up_spinor_field[i], solver_pm.sdim);
+        zero_spinor_field(g_chi_dn_spinor_field[i], solver_pm.sdim);
+      } else {
+        double coeff;
+        for( int j = no_shifts-1; j > i; j-- ) {
+          coeff = 1;
+          for( int k = no_shifts-1; k > i; k-- ) {
+            if(j!=k)
+              coeff *= (solver_pm.shifts[k]-solver_pm.shifts[i])/(solver_pm.shifts[k]-solver_pm.shifts[j]);
+          }
+          if(j==no_shifts-1) {
+            mul_r(g_chi_up_spinor_field[i], coeff, g_chi_up_spinor_field[j], solver_pm.sdim);
+            mul_r(g_chi_dn_spinor_field[i], coeff, g_chi_dn_spinor_field[j], solver_pm.sdim);
+          } else {
+            assign_add_mul_r(g_chi_up_spinor_field[i], g_chi_up_spinor_field[j], coeff, solver_pm.sdim);
+            assign_add_mul_r(g_chi_dn_spinor_field[i], g_chi_dn_spinor_field[j], coeff, solver_pm.sdim);
+          }
+        }
+      }
+      
+      // g_shift = shift^2 and then in Qsw_tau1_ndpsi_add_Ishift the square root is taken
+      g_shift = solver_pm.shifts[i]*solver_pm.shifts[i]; 
+      mnl->iter0 += MG_solver_nd( g_chi_up_spinor_field[i], g_chi_dn_spinor_field[i], mnl->pf, mnl->pf2,
+                                  solver_pm.mms_squared_solver_prec[i],
+                                  solver_pm.max_iter, solver_pm.rel_prec, solver_pm.sdim, g_gauge_field, f );
+      g_shift = _default_g_shift;
+    }
+    
     assign(mnl->w_fields[2], mnl->pf, VOLUME/2);
     assign(mnl->w_fields[3], mnl->pf2, VOLUME/2);
 

From b89776166db1e1d02ac4a5323bb59685031cff31 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 21 Sep 2017 12:20:59 +0300
Subject: [PATCH 25/85] Comment not needed test

---
 solver/monomial_solve.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 272d6950f..fb0c9c3ae 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -166,8 +166,10 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     }
   } else if (solver_pm->type == CGMMSND){
     iteration_count = cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_pm);
-#define TEST
+    /*
+#undef TEST
 #ifdef TEST
+    //Testing the initial guess
     double differ[2], residual;
     spinor ** check_vect = NULL;
     matrix_mult_nd f = Qtm_pm_ndpsi_shift;
@@ -216,6 +218,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     }
     finalize_solver(check_vect, 4);
 #endif
+    */
   }
 #ifdef DDalphaAMG
   else if (solver_pm->type == MG) {
@@ -223,6 +226,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     if (mg_no_shifts > 0 || mg_mms_mass >= solver_pm->shifts[0]) { 
 
       // if the mg_mms_mass is smaller than the larger shifts, we use CGMMS for those
+      // in case mg_no_shifts is used, then mg_mms_mass = 0
       if(mg_mms_mass >= solver_pm->shifts[0]) {
         mg_no_shifts = solver_pm->no_shifts;
         while (mg_mms_mass < solver_pm->shifts[mg_no_shifts-1]) { mg_no_shifts--; }

From 0d57268b1ae14faf27419a111fbf1fb569b741fe Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Mon, 25 Sep 2017 15:06:58 +0300
Subject: [PATCH 26/85] Bug fix in allocation introduced by previous commit.

---
 monomial/ndrat_monomial.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c
index 217d04419..16c67dc2d 100644
--- a/monomial/ndrat_monomial.c
+++ b/monomial/ndrat_monomial.c
@@ -480,9 +480,17 @@ int init_ndrat_monomial(const int id) {
 
   init_rational(&mnl->rat, scale);
 
-  if(init_chi_spinor_field(VOLUMEPLUSRAND/2, (mnl->rat.np+2)/2) != 0) {
-    fprintf(stderr, "Not enough memory for Chi fields! Aborting...\n");
-    exit(0);
+  if(mnl->type == RAT || mnl->type == CLOVERRAT ||
+     mnl->type == RATCOR || mnl->type == CLOVERRATCOR) {
+    if(init_chi_spinor_field(VOLUMEPLUSRAND/2, (mnl->rat.np+2)/2) != 0) {
+      fprintf(stderr, "Not enough memory for Chi fields! Aborting...\n");
+      exit(0);
+    }
+  } else {
+    if(init_chi_spinor_field(VOLUMEPLUSRAND/2, (mnl->rat.np+1)) != 0) {
+      fprintf(stderr, "Not enough memory for Chi fields! Aborting...\n");
+      exit(0);
+    }
   }
 
   return(0);

From 15c6edbbe644b2d387c7e63046ebfe9e326161ac Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 10 Nov 2017 16:28:13 +0200
Subject: [PATCH 27/85] Add full clover-improved operator

---
 DDalphaAMG_interface.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 04ba13eaf..3912dc5f5 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -406,7 +406,11 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
   else if ( f == D_psi ||         //          Full operator    with plus mu
 	    f == Q_plus_psi ||    // Gamma5 - Full operator    with plus mu 
 	    f == Q_minus_psi ||   // Gamma5 - Full operator    with minus mu
-	    f == Q_pm_psi ) {     //          Full operator    squared
+	    f == Q_pm_psi ||      //          Full operator    squared
+            f == Qsw_full_plus_psi || // Gamma5 - Full operator    with plus mu
+            f == Qsw_full_minus_psi|| //Gamma5 - Full operator    with plus mu
+            f == Qsw_full_pm_psi   || //          Full operator    squared
+            f == Msw_full_minus_psi) {//         Full operator    with minus mu
     if( N != VOLUME && g_proc_id == 0 )
       printf("WARNING: expected N == VOLUME for the required operator in MG_solve. Continuing with N == VOLUME/2\n");
   }
@@ -422,6 +426,8 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
 	    f == Msw_minus_psi || //          Schur complement with minus mu
 	    f == Qtm_minus_psi || // Gamma5 - Schur complement with minus mu 
 	    f == Qsw_minus_psi || // Gamma5 - Schur complement with minus mu
+            f == Qsw_full_minus_psi|| //Gamma5 - Full operator    with plus mu
+            f == Msw_full_minus_psi|| //         Full operator    with minus mu
 	    f == Q_minus_psi )    // Gamma5 - Full operator    with minus mu
     MG_update_mu(-g_mu, -g_mu3);
   else if ( f == Mtm_plus_psi ||  //          Schur complement with plus mu 
@@ -432,6 +438,8 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
 	    f == Q_plus_psi ||    // Gamma5 - Full operator    with plus mu 
 	    f == Qtm_pm_psi ||    //          Schur complement squared
 	    f == Qsw_pm_psi ||    //          Schur complement squared
+            f == Qsw_full_plus_psi || // Gamma5 - Full operator    with plus mu
+            f == Qsw_full_pm_psi   || //          Full operator    squared
 	    f == Q_pm_psi )       //          Full operator    squared
     MG_update_mu(g_mu, g_mu3); 
   else
@@ -444,7 +452,10 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
 	    f == Qsw_minus_psi || // Gamma5 - Schur complement with minus mu 
 	    f == Qsw_psi ||       // Gamma5 - Schur complement with mu=0 on odd sites
 	    f == Q_plus_psi ||    // Gamma5 - Full operator    with plus mu 
-	    f == Q_minus_psi ) {  // Gamma5 - Full operator    with minus mu
+	    f == Q_minus_psi ||   // Gamma5 - Full operator    with minus mu
+            f == Qsw_full_plus_psi || // Gamma5 - Full operator    with plus mu
+            f == Qsw_full_minus_psi|| //Gamma5 - Full operator    with plus mu
+            f == Qsw_full_pm_psi ) {  //          Full operator    squared
     mul_gamma5((spinor *const) old, VOLUME);
     DDalphaAMG_solve( new, old, precision, &mg_status );
     if( N == VOLUME ) // in case of VOLUME/2 old is a just local vector
@@ -464,8 +475,10 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
 	    f == Mtm_minus_psi || //          Schur complement with minus mu 
 	    f == Msw_minus_psi || //          Schur complement with minus mu
 	    f == Msw_psi ||       //          Schur complement with mu=0 on odd sites
-	    f == D_psi )          //          Full operator    with plus mu
+	    f == D_psi ||         //          Full operator    with plus mu
+            f == Msw_full_minus_psi) {//         Full operator    with minus mu
     DDalphaAMG_solve( new, old, precision, &mg_status );
+  }
   else
     DDalphaAMG_solve( new, old, precision, &mg_status );
   
@@ -1200,6 +1213,8 @@ int MG_solver_eo(spinor * const Even_new, spinor * const Odd_new,
     f=&Q_plus_psi;
   else if (f_full == Msw_full)
     f=&D_psi;
+  else if (f_full == Qsw_full)
+    f=&Qsw_full_plus_psi;
   else {
     f=&D_psi;
     if( g_proc_id == 0 )

From c6d8d469fd20690b90683cf71815d8107569a1f0 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 10 Nov 2017 17:01:24 +0200
Subject: [PATCH 28/85] Add function square_and_max for spinor

---
 linalg/Makefile.in      |  2 +-
 linalg/square_and_max.c | 93 +++++++++++++++++++++++++++++++++++++++++
 linalg/square_and_max.h | 35 ++++++++++++++++
 linalg_eo.h             |  1 +
 4 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 linalg/square_and_max.c
 create mode 100644 linalg/square_and_max.h

diff --git a/linalg/Makefile.in b/linalg/Makefile.in
index 3b15f8f1d..c866a2d4d 100644
--- a/linalg/Makefile.in
+++ b/linalg/Makefile.in
@@ -34,7 +34,7 @@ liblinalg_TARGETS = assign_add_mul_r_add_mul \
 	assign_mul_bra_add_mul_ket_add_r \
 	scalar_prod_r scalar_prod_i \
 	square_and_prod_r assign_mul_bra_add_mul_r mul_r mul_r_32 \
-	diff_and_square_norm assign \
+	diff_and_square_norm square_and_max assign \
 	scalar_prod mul_diff_r mul_diff_mul assign_add_mul assign_mul_add add \
 	assign_diff_mul mul_add_mul mul assign_add_mul_add_mul \
 	assign_mul_bra_add_mul_ket_add assign_mul_add_mul_add_mul_add_mul_r \
diff --git a/linalg/square_and_max.c b/linalg/square_and_max.c
new file mode 100644
index 000000000..990c1c83a
--- /dev/null
+++ b/linalg/square_and_max.c
@@ -0,0 +1,93 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * File square_and_max.c
+ *
+ *   void square_and_max(spinor * const P )
+ *     Returns the square norm and max local deviation of *P
+ *
+ *******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#ifdef TM_USE_MPI
+# include <mpi.h>
+#endif
+#ifdef TM_USE_OMP
+# include <omp.h>
+# include "global.h"
+#endif
+#include <complex.h>
+#include "su3.h"
+#include "su3adj.h"
+#include "su3spinor.h"
+#include "square_and_max.h"
+
+void square_and_max(double * const sum, double * const max, const spinor * const P, const int N)
+{
+  int ix;
+  double ALIGN ks,kc,ds,tr,ts,tt;
+  spinor *s;
+  
+  ks=0.0;
+  kc=0.0;
+
+#if (defined BGL && defined XLC)
+  __alignx(16, S);
+  __alignx(16, R);
+#endif
+  
+  for (ix = 0; ix < N; ix++)
+  {
+    s=(spinor *) P + ix;
+
+    ds=s->s0.c0 * conj(s->s0.c0) + s->s0.c1 * conj(s->s0.c1) + s->s0.c2 * conj(s->s0.c2) +  
+      s->s1.c0 * conj(s->s1.c0) + s->s1.c1 * conj(s->s1.c1) + s->s1.c2 * conj(s->s1.c2) +  
+      s->s2.c0 * conj(s->s2.c0) + s->s2.c1 * conj(s->s2.c1) + s->s2.c2 * conj(s->s2.c2) +
+      s->s3.c0 * conj(s->s3.c0) + s->s3.c1 * conj(s->s3.c1) + s->s3.c2 * conj(s->s3.c2);
+    
+    tr=ds + kc;
+    ts=tr + ks;
+    tt=ts-ks;
+    ks=ts;
+    kc=tr-tt;
+
+    if(ds > *max) *max = ds;
+  }
+  kc=ks + kc;
+  *sum=kc;
+
+#if defined TM_USE_MPI
+
+  MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+#endif
+
+#if defined TM_USE_MPI
+
+  MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  *max = kc;
+
+#endif
+
+  return;
+}
diff --git a/linalg/square_and_max.h b/linalg/square_and_max.h
new file mode 100644
index 000000000..67c3dda3c
--- /dev/null
+++ b/linalg/square_and_max.h
@@ -0,0 +1,35 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifndef _SQUARE_AND_MAX_H
+#define _SQUARE_AND_MAX_H
+
+#include "su3.h"
+
+/* double square_norm(spinor * const P )
+ *     Returns the square norm of *P and the local maximal norm */
+
+void square_and_max(double * const sum, double * const max, const spinor * const P, const int N);
+
+
+
+#endif
+
+
+
diff --git a/linalg_eo.h b/linalg_eo.h
index 51f7f1ac3..1eb8ff70b 100644
--- a/linalg_eo.h
+++ b/linalg_eo.h
@@ -30,6 +30,7 @@
 #include "linalg/scalar_prod_r_32.h"
 #include "linalg/scalar_prod_i.h"
 #include "linalg/square_and_prod_r.h"
+#include "linalg/square_and_max.h"
 #include "linalg/assign_add_mul_r.h"
 #include "linalg/assign_add_mul_r_32.h"
 #include "linalg/assign_mul_bra_add_mul_r.h"

From 1b2977f53b5e8f18ec7511eebfc3ca765ff0626b Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Mon, 13 Nov 2017 11:13:42 +0200
Subject: [PATCH 29/85] Add min and relative norm to square_and_minmax function
 (renamed from square_and_max)

---
 linalg/Makefile.in         |   2 +-
 linalg/square_and_minmax.c | 157 +++++++++++++++++++++++++++++++++++++
 linalg/square_and_minmax.h |  39 +++++++++
 linalg_eo.h                |   2 +-
 4 files changed, 198 insertions(+), 2 deletions(-)
 create mode 100644 linalg/square_and_minmax.c
 create mode 100644 linalg/square_and_minmax.h

diff --git a/linalg/Makefile.in b/linalg/Makefile.in
index c866a2d4d..1ae57de96 100644
--- a/linalg/Makefile.in
+++ b/linalg/Makefile.in
@@ -34,7 +34,7 @@ liblinalg_TARGETS = assign_add_mul_r_add_mul \
 	assign_mul_bra_add_mul_ket_add_r \
 	scalar_prod_r scalar_prod_i \
 	square_and_prod_r assign_mul_bra_add_mul_r mul_r mul_r_32 \
-	diff_and_square_norm square_and_max assign \
+	diff_and_square_norm square_and_minmax assign \
 	scalar_prod mul_diff_r mul_diff_mul assign_add_mul assign_mul_add add \
 	assign_diff_mul mul_add_mul mul assign_add_mul_add_mul \
 	assign_mul_bra_add_mul_ket_add assign_mul_add_mul_add_mul_add_mul_r \
diff --git a/linalg/square_and_minmax.c b/linalg/square_and_minmax.c
new file mode 100644
index 000000000..749690019
--- /dev/null
+++ b/linalg/square_and_minmax.c
@@ -0,0 +1,157 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * File square_and_max.c
+ *
+ *   void square_and_max(spinor * const P )
+ *     Returns the square norm and max local deviation of *P
+ *
+ *******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#ifdef TM_USE_MPI
+# include <mpi.h>
+#endif
+#ifdef TM_USE_OMP
+# include <omp.h>
+# include "global.h"
+#endif
+#include <complex.h>
+#include "su3.h"
+#include "su3adj.h"
+#include "su3spinor.h"
+#include "square_and_minmax.h"
+
+void square_and_minmax(double * const sum, double * const min, double * const max, const spinor * const P, const int N)
+{
+  int ix;
+  double ALIGN ks,kc,ds,tr,ts,tt;
+  spinor *s;
+  
+  ks=0.0;
+  kc=0.0;
+  *max = 0.0;
+  *min = -1;
+
+#if (defined BGL && defined XLC)
+  __alignx(16, S);
+  __alignx(16, R);
+#endif
+  
+  for (ix = 0; ix < N; ix++)
+  {
+    s=(spinor *) P + ix;
+
+    ds=s->s0.c0 * conj(s->s0.c0) + s->s0.c1 * conj(s->s0.c1) + s->s0.c2 * conj(s->s0.c2) +  
+      s->s1.c0 * conj(s->s1.c0) + s->s1.c1 * conj(s->s1.c1) + s->s1.c2 * conj(s->s1.c2) +  
+      s->s2.c0 * conj(s->s2.c0) + s->s2.c1 * conj(s->s2.c1) + s->s2.c2 * conj(s->s2.c2) +
+      s->s3.c0 * conj(s->s3.c0) + s->s3.c1 * conj(s->s3.c1) + s->s3.c2 * conj(s->s3.c2);
+    
+    tr=ds + kc;
+    ts=tr + ks;
+    tt=ts-ks;
+    ks=ts;
+    kc=tr-tt;
+
+    if(ds > *max) *max = ds;
+    if(ds < *min || *min < 0) *min = ds;
+  }
+  kc=ks + kc;
+  *sum=kc;
+
+#if defined TM_USE_MPI
+
+  MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+#endif
+
+#if defined TM_USE_MPI
+
+  MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  *max = kc;
+
+#endif
+
+  return;
+}
+
+void square_and_minmax_rel(double * const sum, double * const min, double * const max, const spinor * const P, const spinor * const Q, const int N)
+{
+  int ix;
+  double ALIGN ks,kc,ds,dr,tr,ts,tt;
+  spinor *s, *r;
+  
+  ks=0.0;
+  kc=0.0;
+  *max = 0.0;
+  *min = -1;
+
+#if (defined BGL && defined XLC)
+  __alignx(16, S);
+  __alignx(16, R);
+#endif
+  
+  for (ix = 0; ix < N; ix++)
+  {
+    s=(spinor *) P + ix;
+    r=(spinor *) Q + ix;
+
+    ds=s->s0.c0 * conj(s->s0.c0) + s->s0.c1 * conj(s->s0.c1) + s->s0.c2 * conj(s->s0.c2) +  
+      s->s1.c0 * conj(s->s1.c0) + s->s1.c1 * conj(s->s1.c1) + s->s1.c2 * conj(s->s1.c2) +  
+      s->s2.c0 * conj(s->s2.c0) + s->s2.c1 * conj(s->s2.c1) + s->s2.c2 * conj(s->s2.c2) +
+      s->s3.c0 * conj(s->s3.c0) + s->s3.c1 * conj(s->s3.c1) + s->s3.c2 * conj(s->s3.c2);
+
+    dr=r->s0.c0 * conj(r->s0.c0) + r->s0.c1 * conj(r->s0.c1) + r->s0.c2 * conj(r->s0.c2) +  
+      r->s1.c0 * conj(r->s1.c0) + r->s1.c1 * conj(r->s1.c1) + r->s1.c2 * conj(r->s1.c2) +  
+      r->s2.c0 * conj(r->s2.c0) + r->s2.c1 * conj(r->s2.c1) + r->s2.c2 * conj(r->s2.c2) +
+      r->s3.c0 * conj(r->s3.c0) + r->s3.c1 * conj(r->s3.c1) + r->s3.c2 * conj(r->s3.c2);
+    
+    ds = ds/dr;
+
+    tr=ds + kc;
+    ts=tr + ks;
+    tt=ts-ks;
+    ks=ts;
+    kc=tr-tt;
+
+    if(ds > *max) *max = ds;
+    if(ds < *min || *min < 0) *min = ds;
+  }
+  kc=ks + kc;
+  *sum=kc;
+
+#if defined TM_USE_MPI
+
+  MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+#endif
+
+#if defined TM_USE_MPI
+
+  MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  *max = kc;
+
+#endif
+
+  return;
+}
diff --git a/linalg/square_and_minmax.h b/linalg/square_and_minmax.h
new file mode 100644
index 000000000..8238fdd8a
--- /dev/null
+++ b/linalg/square_and_minmax.h
@@ -0,0 +1,39 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifndef _SQUARE_AND_MAX_H
+#define _SQUARE_AND_MAX_H
+
+#include "su3.h"
+
+/* double square_and_minmax(spinor * const P )
+ *     Returns the square norm of *P and the local minimal/maximal norm */
+
+/* double square_and_minmax(spinor * const P, spinor * const Q )
+ *     Returns the square norm of *P/*Q (locally) and the local minimal/maximal norm */
+
+void square_and_minmax(double * const sum, double * const min, double * const max, const spinor * const P, const int N);
+void square_and_minmax_rel(double * const sum, double * const min, double * const max, const spinor * const P,  const spinor * const Q, const int N);
+
+
+
+#endif
+
+
+
diff --git a/linalg_eo.h b/linalg_eo.h
index 1eb8ff70b..2bba98c4f 100644
--- a/linalg_eo.h
+++ b/linalg_eo.h
@@ -30,7 +30,7 @@
 #include "linalg/scalar_prod_r_32.h"
 #include "linalg/scalar_prod_i.h"
 #include "linalg/square_and_prod_r.h"
-#include "linalg/square_and_max.h"
+#include "linalg/square_and_minmax.h"
 #include "linalg/assign_add_mul_r.h"
 #include "linalg/assign_add_mul_r_32.h"
 #include "linalg/assign_mul_bra_add_mul_r.h"

From 6157c1bfe70a0bf8ab7cda297b0347edb3208f38 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 22 Nov 2017 10:33:32 +0200
Subject: [PATCH 30/85] Bug fix in freeing memory

---
 DDalphaAMG_interface.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 3912dc5f5..6ae04160f 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -542,11 +542,12 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
   if(no_solver_field>0)
     init_solver_field(&solver_field, VOLUMEPLUSRAND,no_solver_field);
 
+  int assign_solver_field = 0;
   if (N==VOLUME/2) {
-    old1 = solver_field[--no_solver_field];
-    old2 = solver_field[--no_solver_field];
-    new1 = solver_field[--no_solver_field];
-    new2 = solver_field[--no_solver_field];
+    old1 = solver_field[assign_solver_field++];
+    old2 = solver_field[assign_solver_field++];
+    new1 = solver_field[assign_solver_field++];
+    new2 = solver_field[assign_solver_field++];
     convert_odd_to_lexic(old1, up_old);
     convert_odd_to_lexic(old2, dn_old);
     set_even_to_zero(old1);
@@ -558,8 +559,8 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
 	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
 	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
 	    f == Qsw_pm_ndpsi_shift )) {// (Gamma5 Dh tau1)^2 - Schur complement squared with shift
-    new1tmp = solver_field[--no_solver_field];
-    new2tmp = solver_field[--no_solver_field];
+    new1tmp = solver_field[assign_solver_field++];
+    new2tmp = solver_field[assign_solver_field++];
   }
 
   // Reconstracting initial guess in case of oe

From a8e88e44a96738df5ec7818e22923312083af7c4 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 22 Nov 2017 11:24:31 +0200
Subject: [PATCH 31/85] Changed square_and_max to square_and_minmax

---
 linalg/square_and_max.c    |  93 -------------
 linalg/square_and_max.h    |  35 -----
 linalg/square_and_minmax.c | 275 ++++++++++++++++++++++++++++++++++++-
 linalg/square_and_minmax.h |   2 +
 4 files changed, 273 insertions(+), 132 deletions(-)
 delete mode 100644 linalg/square_and_max.c
 delete mode 100644 linalg/square_and_max.h

diff --git a/linalg/square_and_max.c b/linalg/square_and_max.c
deleted file mode 100644
index 990c1c83a..000000000
--- a/linalg/square_and_max.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- *
- * File square_and_max.c
- *
- *   void square_and_max(spinor * const P )
- *     Returns the square norm and max local deviation of *P
- *
- *******************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#ifdef TM_USE_MPI
-# include <mpi.h>
-#endif
-#ifdef TM_USE_OMP
-# include <omp.h>
-# include "global.h"
-#endif
-#include <complex.h>
-#include "su3.h"
-#include "su3adj.h"
-#include "su3spinor.h"
-#include "square_and_max.h"
-
-void square_and_max(double * const sum, double * const max, const spinor * const P, const int N)
-{
-  int ix;
-  double ALIGN ks,kc,ds,tr,ts,tt;
-  spinor *s;
-  
-  ks=0.0;
-  kc=0.0;
-
-#if (defined BGL && defined XLC)
-  __alignx(16, S);
-  __alignx(16, R);
-#endif
-  
-  for (ix = 0; ix < N; ix++)
-  {
-    s=(spinor *) P + ix;
-
-    ds=s->s0.c0 * conj(s->s0.c0) + s->s0.c1 * conj(s->s0.c1) + s->s0.c2 * conj(s->s0.c2) +  
-      s->s1.c0 * conj(s->s1.c0) + s->s1.c1 * conj(s->s1.c1) + s->s1.c2 * conj(s->s1.c2) +  
-      s->s2.c0 * conj(s->s2.c0) + s->s2.c1 * conj(s->s2.c1) + s->s2.c2 * conj(s->s2.c2) +
-      s->s3.c0 * conj(s->s3.c0) + s->s3.c1 * conj(s->s3.c1) + s->s3.c2 * conj(s->s3.c2);
-    
-    tr=ds + kc;
-    ts=tr + ks;
-    tt=ts-ks;
-    ks=ts;
-    kc=tr-tt;
-
-    if(ds > *max) *max = ds;
-  }
-  kc=ks + kc;
-  *sum=kc;
-
-#if defined TM_USE_MPI
-
-  MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-
-#endif
-
-#if defined TM_USE_MPI
-
-  MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
-  *max = kc;
-
-#endif
-
-  return;
-}
diff --git a/linalg/square_and_max.h b/linalg/square_and_max.h
deleted file mode 100644
index 67c3dda3c..000000000
--- a/linalg/square_and_max.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef _SQUARE_AND_MAX_H
-#define _SQUARE_AND_MAX_H
-
-#include "su3.h"
-
-/* double square_norm(spinor * const P )
- *     Returns the square norm of *P and the local maximal norm */
-
-void square_and_max(double * const sum, double * const max, const spinor * const P, const int N);
-
-
-
-#endif
-
-
-
diff --git a/linalg/square_and_minmax.c b/linalg/square_and_minmax.c
index 749690019..0b1b5a41d 100644
--- a/linalg/square_and_minmax.c
+++ b/linalg/square_and_minmax.c
@@ -1,5 +1,5 @@
 /***********************************************************************
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ * copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
  *
  * This file is part of tmLQCD.
  *
@@ -83,9 +83,8 @@ void square_and_minmax(double * const sum, double * const min, double * const ma
 
   MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 
-#endif
-
-#if defined TM_USE_MPI
+  MPI_Allreduce(min, &kc, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+  *min = kc;
 
   MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
   *max = kc;
@@ -144,13 +143,281 @@ void square_and_minmax_rel(double * const sum, double * const min, double * cons
 
   MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 
+  MPI_Allreduce(min, &kc, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+  *min = kc;
+
+  MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  *max = kc;
+
+#endif
+
+  return;
+}
+
+void square_and_minmax_abs(double * const sum, double * const min, double * const max,  double * const min_abs, double * const max_abs, const spinor * const P, const int N)
+{
+  int ix;
+  double ALIGN ks,kc,ds,dds,tr,ts,tt;
+  spinor *s;
+  
+  ks=0.0;
+  kc=0.0;
+  *max = 0.0;
+  *min = -1;
+  *max_abs = 0.0;
+  *min_abs = -1;
+
+#if (defined BGL && defined XLC)
+  __alignx(16, S);
+  __alignx(16, R);
 #endif
+  
+  for (ix = 0; ix < N; ix++)
+  {
+    s=(spinor *) P + ix;
+
+    dds=s->s0.c0 * conj(s->s0.c0);
+    ds=dds;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s0.c1 * conj(s->s0.c1);
+    ds+=dds;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s0.c2 * conj(s->s0.c2);
+    ds+=dds;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s1.c0 * conj(s->s1.c0);
+    ds+=dds;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s1.c1 * conj(s->s1.c1);
+    ds+=dds;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s1.c2 * conj(s->s1.c2);
+    ds+=dds;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s2.c0 * conj(s->s2.c0);
+    ds+=dds;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s2.c1 * conj(s->s2.c1);
+    ds+=dds;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s2.c2 * conj(s->s2.c2);
+    ds+=dds;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s3.c0 * conj(s->s3.c0);
+    ds+=dds;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s3.c1 * conj(s->s3.c1);
+    ds+=dds;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s3.c2 * conj(s->s3.c2);
+    ds+=dds;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    tr=ds + kc;
+    ts=tr + ks;
+    tt=ts-ks;
+    ks=ts;
+    kc=tr-tt;
+
+    if(ds > *max) *max = ds;
+    if(ds < *min || *min < 0) *min = ds;
+  }
+  kc=ks + kc;
+  *sum=kc;
 
 #if defined TM_USE_MPI
 
+  MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+  MPI_Allreduce(min, &kc, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+  *min = kc;
+
   MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
   *max = kc;
 
+  MPI_Allreduce(min_abs, &kc, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+  *min_abs = kc;
+
+  MPI_Allreduce(max_abs, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  *max_abs = kc;
+
+#endif
+
+  return;
+}
+
+void square_and_minmax_rel_abs(double * const sum, double * const min, double * const max, double * const min_abs, double * const max_abs, const spinor * const P, const spinor * const Q, const int N)
+{
+  int ix;
+  double ALIGN ks,kc,ds,dds,dr,ddr,tr,ts,tt;
+  spinor *s, *r;
+  
+  ks=0.0;
+  kc=0.0;
+  *max = 0.0;
+  *min = -1;
+  *max_abs = 0.0;
+  *min_abs = -1;
+
+#if (defined BGL && defined XLC)
+  __alignx(16, S);
+  __alignx(16, R);
+#endif
+  
+  for (ix = 0; ix < N; ix++)
+  {
+    s=(spinor *) P + ix;
+    r=(spinor *) Q + ix;
+
+    dds=s->s0.c0 * conj(s->s0.c0);
+    ddr=r->s0.c0 * conj(r->s0.c0);
+    ds=dds;
+    dr=ddr;
+    dds/=ddr;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s0.c1 * conj(s->s0.c1);
+    ddr=r->s0.c1 * conj(r->s0.c1);
+    ds+=dds;
+    dr+=ddr;
+    dds/=ddr;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s0.c2 * conj(s->s0.c2);
+    ddr=r->s0.c2 * conj(r->s0.c2);
+    ds+=dds;
+    dr+=ddr;
+    dds/=ddr;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s1.c0 * conj(s->s1.c0);
+    ddr=r->s1.c0 * conj(r->s1.c0);
+    ds+=dds;
+    dr+=ddr;
+    dds/=ddr;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s1.c1 * conj(s->s1.c1);
+    ddr=r->s1.c1 * conj(r->s1.c1);
+    ds+=dds;
+    dr+=ddr;
+    dds/=ddr;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s1.c2 * conj(s->s1.c2);
+    ddr=r->s1.c2 * conj(r->s1.c2);
+    ds+=dds;
+    dr+=ddr;
+    dds/=ddr;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s2.c0 * conj(s->s2.c0);
+    ddr=r->s2.c0 * conj(r->s2.c0);
+    ds+=dds;
+    dr+=ddr;
+    dds/=ddr;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s2.c1 * conj(s->s2.c1);
+    ddr=r->s2.c1 * conj(r->s2.c1);
+    ds+=dds;
+    dr+=ddr;
+    dds/=ddr;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s2.c2 * conj(s->s2.c2);
+    ddr=r->s2.c2 * conj(r->s2.c2);
+    ds+=dds;
+    dr+=ddr;
+    dds/=ddr;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s3.c0 * conj(s->s3.c0);
+    ddr=r->s3.c0 * conj(r->s3.c0);
+    ds+=dds;
+    dr+=ddr;
+    dds/=ddr;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s3.c1 * conj(s->s3.c1);
+    ddr=r->s3.c1 * conj(r->s3.c1);
+    ds+=dds;
+    dr+=ddr;
+    dds/=ddr;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+
+    dds=s->s3.c2 * conj(s->s3.c2);
+    ddr=r->s3.c2 * conj(r->s3.c2);
+    ds+=dds;
+    dr+=ddr;
+    dds/=ddr;
+    if(dds > *max_abs) *max_abs = dds;
+    else if(dds < *min_abs || *min_abs < 0) *min_abs = dds;
+    
+    ds = ds/dr;
+
+    tr=ds + kc;
+    ts=tr + ks;
+    tt=ts-ks;
+    ks=ts;
+    kc=tr-tt;
+
+    if(ds > *max) *max = ds;
+    if(ds < *min || *min < 0) *min = ds;
+  }
+  kc=ks + kc;
+  *sum=kc;
+
+#if defined TM_USE_MPI
+
+  MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+
+  MPI_Allreduce(min, &kc, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+  *min = kc;
+
+  MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  *max = kc;
+
+  MPI_Allreduce(min_abs, &kc, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
+  *min_abs = kc;
+
+  MPI_Allreduce(max_abs, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
+  *max_abs = kc;
+
 #endif
 
   return;
diff --git a/linalg/square_and_minmax.h b/linalg/square_and_minmax.h
index 8238fdd8a..9f2f85f67 100644
--- a/linalg/square_and_minmax.h
+++ b/linalg/square_and_minmax.h
@@ -30,6 +30,8 @@
 
 void square_and_minmax(double * const sum, double * const min, double * const max, const spinor * const P, const int N);
 void square_and_minmax_rel(double * const sum, double * const min, double * const max, const spinor * const P,  const spinor * const Q, const int N);
+void square_and_minmax_abs(double * const sum, double * const min, double * const max, double * const min_abs, double * const max_abs, const spinor * const P, const int N);
+void square_and_minmax_rel_abs(double * const sum, double * const min, double * const max, double * const min_abs, double * const max_abs, const spinor * const P,  const spinor * const Q, const int N);
 
 
 

From 0b5addae7a8b26b31dece9fbf62914e8bd85ed28 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 7 Dec 2017 12:04:31 +0200
Subject: [PATCH 32/85] Add choice of mms solver for the RHMC with Wilson

---
 invert_eo.c                |   2 +-
 monomial/rat_monomial.c    |  27 ++++----
 monomial/ratcor_monomial.c |  26 ++++----
 read_input.l               |  31 ++++++++-
 solver/cg_mms_tm.c         |  19 ++++--
 solver/cg_mms_tm.h         |   2 +-
 solver/cg_mms_tm_nd.c      |  36 +++++++---
 solver/monomial_solve.c    | 132 ++++++++++++++++++++++++++++++++++++-
 solver/monomial_solve.h    |   2 +
 9 files changed, 231 insertions(+), 46 deletions(-)

diff --git a/invert_eo.c b/invert_eo.c
index 3358b0bad..7447f01de 100644
--- a/invert_eo.c
+++ b/invert_eo.c
@@ -469,7 +469,7 @@ int invert_eo(spinor * const Even_new, spinor * const Odd_new,
       if(g_proc_id == 0) {printf("# Using multi mass CG!\n"); fflush(stdout);}
       
       gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], VOLUME);
-      iter = cg_mms_tm(P, g_spinor_field[DUM_DERI+1],&solver_params,&cgmms_reached_prec);
+      iter = cg_mms_tm(P, g_spinor_field[DUM_DERI+1],&solver_params);
       g_mu = shifts[0];
       Q_minus_psi(g_spinor_field[DUM_DERI+1], P[0]);
       
diff --git a/monomial/rat_monomial.c b/monomial/rat_monomial.c
index 7ff3beaad..46a921f62 100644
--- a/monomial/rat_monomial.c
+++ b/monomial/rat_monomial.c
@@ -55,7 +55,7 @@
 void rat_derivative(const int id, hamiltonian_field_t * const hf) {
   monomial * mnl = &monomial_list[id];
   solver_pm_t solver_pm;
-  double atime, etime, dummy;
+  double atime, etime;
   atime = gettime();
   g_mu = 0;
   g_mu3 = 0.;
@@ -80,15 +80,16 @@ void rat_derivative(const int id, hamiltonian_field_t * const hf) {
 
   solver_pm.max_iter = mnl->maxiter;
   solver_pm.squared_solver_prec = mnl->forceprec;
+  solver_pm.mms_squared_solver_prec = NULL;
   solver_pm.no_shifts = mnl->rat.np;
   solver_pm.shifts = mnl->rat.mu;
   solver_pm.rel_prec = g_relative_precision_flag;
-  solver_pm.type = CGMMS;
+  solver_pm.type = mnl->solver;
   solver_pm.M_psi = mnl->Qsq;
   solver_pm.sdim = VOLUME/2;
   // this generates all X_j,o (odd sites only) -> g_chi_up_spinor_field
-  mnl->iter1 += cg_mms_tm(g_chi_up_spinor_field, mnl->pf,
-			  &solver_pm, &dummy);
+  mnl->iter1 += solve_mms_tm(g_chi_up_spinor_field, mnl->pf,
+			  &solver_pm);
   
   for(int j = (mnl->rat.np-1); j > -1; j--) {
     mnl->Qp(mnl->w_fields[0], g_chi_up_spinor_field[j]);
@@ -146,7 +147,7 @@ void rat_derivative(const int id, hamiltonian_field_t * const hf) {
 void rat_heatbath(const int id, hamiltonian_field_t * const hf) {
   monomial * mnl = &monomial_list[id];
   solver_pm_t solver_pm;
-  double atime, etime, dummy;
+  double atime, etime;
   atime = gettime();
   // only for non-twisted operators
   g_mu = 0.;
@@ -175,14 +176,15 @@ void rat_heatbath(const int id, hamiltonian_field_t * const hf) {
   // set solver parameters
   solver_pm.max_iter = mnl->maxiter;
   solver_pm.squared_solver_prec = mnl->accprec;
+  solver_pm.mms_squared_solver_prec = NULL;
   solver_pm.no_shifts = mnl->rat.np;
   solver_pm.shifts = mnl->rat.nu;
-  solver_pm.type = CGMMS;
+  solver_pm.type = mnl->solver;
   solver_pm.M_psi = mnl->Qsq;
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
-  mnl->iter0 = cg_mms_tm(g_chi_up_spinor_field, mnl->pf,
-			 &solver_pm, &dummy);
+  mnl->iter0 = solve_mms_tm(g_chi_up_spinor_field, mnl->pf,
+			 &solver_pm);
 
   assign(mnl->w_fields[2], mnl->pf, VOLUME/2);
 
@@ -210,7 +212,7 @@ void rat_heatbath(const int id, hamiltonian_field_t * const hf) {
 double rat_acc(const int id, hamiltonian_field_t * const hf) {
   solver_pm_t solver_pm;
   monomial * mnl = &monomial_list[id];
-  double atime, etime, dummy;
+  double atime, etime;
   atime = gettime();
   // only for non-twisted operators
   g_mu = 0.;
@@ -225,14 +227,15 @@ double rat_acc(const int id, hamiltonian_field_t * const hf) {
 
   solver_pm.max_iter = mnl->maxiter;
   solver_pm.squared_solver_prec = mnl->accprec;
+  solver_pm.mms_squared_solver_prec = NULL;
   solver_pm.no_shifts = mnl->rat.np;
   solver_pm.shifts = mnl->rat.mu;
-  solver_pm.type = CGMMS;
+  solver_pm.type = mnl->solver;
   solver_pm.M_psi = mnl->Qsq;
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
-  mnl->iter0 += cg_mms_tm(g_chi_up_spinor_field, mnl->pf,
-			  &solver_pm, &dummy);
+  mnl->iter0 += solve_mms_tm(g_chi_up_spinor_field, mnl->pf,
+			  &solver_pm);
 
   // apply R to the pseudo-fermion fields
   assign(mnl->w_fields[0], mnl->pf, VOLUME/2);
diff --git a/monomial/ratcor_monomial.c b/monomial/ratcor_monomial.c
index a7354c360..55e9790a2 100644
--- a/monomial/ratcor_monomial.c
+++ b/monomial/ratcor_monomial.c
@@ -62,7 +62,8 @@ void ratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
   solver_pm_t solver_pm;
   double atime, etime, delta;
   spinor * up0, * up1, * tup;
-  double coefs[6] = {1./4., -3./32., 7./122., -77./2048., 231./8192., -1463./65536.};
+  double coefs[6] = {1./4., -3./32., 7./128., -77./2048., 231./8192., -1463./65536.}; // series of (1+x)^(1/4)
+  double coefs_check[6] = {1./2., -1./8., 1./16., -5./128., 7./256., -21./1024.}; // series of (1+x)^(1/2)
   atime = gettime();
   nd_set_global_parameter(mnl);
   g_mu = 0.;
@@ -91,7 +92,7 @@ void ratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
   solver_pm.squared_solver_prec = mnl->accprec;
   solver_pm.no_shifts = mnl->rat.np;
   solver_pm.shifts = mnl->rat.mu;
-  solver_pm.type = CGMMS;
+  solver_pm.type = mnl->solver;
   solver_pm.M_psi = mnl->Qsq;
   solver_pm.sdim = VOLUME/2;
   solver_pm.rel_prec = g_relative_precision_flag;
@@ -186,10 +187,9 @@ double apply_Z_psi(spinor * const k_up,	spinor * const l_up,
 		     const int id, hamiltonian_field_t * const hf,
 		     solver_pm_t * solver_pm) {
   monomial * mnl = &monomial_list[id];
-  double dummy;
 
-  mnl->iter0 += cg_mms_tm(g_chi_up_spinor_field, l_up,
-			  solver_pm, &dummy);  
+  mnl->iter0 += solve_mms_tm(g_chi_up_spinor_field, l_up,
+			  solver_pm);  
   
   // apply R to the pseudo-fermion fields
   assign(k_up, l_up, VOLUME/2);
@@ -199,8 +199,8 @@ double apply_Z_psi(spinor * const k_up,	spinor * const l_up,
   }
 
   // apply R a second time
-  cg_mms_tm(g_chi_up_spinor_field, k_up,
-	    solver_pm, &dummy);
+  solve_mms_tm(g_chi_up_spinor_field, k_up,
+	    solver_pm);
   for(int j = (mnl->rat.np-1); j > -1; j--) {
     assign_add_mul_r(k_up, g_chi_up_spinor_field[j], 
 		     mnl->rat.rmu[j], VOLUME/2);
@@ -224,8 +224,8 @@ void check_C_psi(spinor * const k_up, spinor * const l_up,
 		 const int id, hamiltonian_field_t * const hf,
 		 solver_pm_t * solver_pm) {
   monomial * mnl = &monomial_list[id];
-  double dummy;
-  mnl->iter0 = cg_mms_tm(g_chi_up_spinor_field, l_up, solver_pm, &dummy);
+
+  mnl->iter0 = solve_mms_tm(g_chi_up_spinor_field, l_up, solver_pm);
 
   assign(k_up, l_up, VOLUME/2);
 
@@ -244,16 +244,16 @@ void check_C_psi(spinor * const k_up, spinor * const l_up,
   }
   //apply R
   solver_pm->shifts = mnl->rat.mu;
-  cg_mms_tm(g_chi_up_spinor_field, k_up,
-	    solver_pm, &dummy);
+  solve_mms_tm(g_chi_up_spinor_field, k_up,
+	    solver_pm);
   for(int j = (mnl->rat.np-1); j > -1; j--) {
     assign_add_mul_r(k_up, g_chi_up_spinor_field[j], 
 		     mnl->rat.rmu[j], VOLUME/2);
   }
   // apply C^dagger
   solver_pm->shifts = mnl->rat.nu;
-  cg_mms_tm(g_chi_up_spinor_field, k_up,
-	    solver_pm, &dummy);
+  solve_mms_tm(g_chi_up_spinor_field, k_up,
+	    solver_pm);
   for(int j = (mnl->rat.np-1); j > -1; j--) {
     if(mnl->type == NDCLOVERRATCOR || mnl->type == NDCLOVERRAT) {
       //Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
diff --git a/read_input.l b/read_input.l
index 6adf79b40..df383f8dc 100644
--- a/read_input.l
+++ b/read_input.l
@@ -1233,9 +1233,14 @@ static inline void rmQuotes(char *str){
     BEGIN(name_caller);
   }
   DDalphaAMG {
+#ifdef DDalphaAMG
     optr->solver = MG;
     if(myverbose) printf("  Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator);
     BEGIN(name_caller);
+#else
+    printf("ERROR line %d operator %d: DDalphaAMG library not included\n", line_of_file, current_operator);
+    exit(1);
+#endif
   }
 }
 
@@ -1502,7 +1507,14 @@ static inline void rmQuotes(char *str){
   }
 }
 
-<DETMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL>{
+<RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL>{
+  {SPC}*Solver{EQL} {
+   solver_caller=YY_START;
+   BEGIN(MSOLVER);
+  }
+}
+
+<DETMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL>{
   {SPC}*2KappaMu{EQL}{FLT} {
     sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
     mnl->mu = c;
@@ -1718,10 +1730,20 @@ static inline void rmQuotes(char *str){
     mnl->solver = BICGSTAB;
     BEGIN(solver_caller);
   }
+  cgmms {
+    if(myverbose) printf("  Solver set to \"%s\" line %d monomial %d\n", yytext, line_of_file, current_monomial);
+    mnl->solver = CGMMS;
+    BEGIN(solver_caller);
+  }
   DDalphaAMG {
+#ifdef DDalphaAMG
     if(myverbose) printf("  Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator);
     mnl->solver = MG;
     BEGIN(solver_caller);
+#else
+    printf("ERROR line %d operator %d: DDalphaAMG library not included\n", line_of_file, current_operator);
+    exit(1);
+#endif
   }
 }
 
@@ -1747,9 +1769,14 @@ static inline void rmQuotes(char *str){
     BEGIN(solver_caller);
   }
   DDalphaAMG {
-    if(myverbose) printf("  Solver set to \"%s\" line %d monomial %d\n", yytext, line_of_file, current_monomial);
+#ifdef DDalphaAMG
+    if(myverbose) printf("  Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator);
     mnl->solver = MG;
     BEGIN(solver_caller);
+#else
+    printf("ERROR line %d operator %d: DDalphaAMG library not included\n", line_of_file, current_operator);
+    exit(1);
+#endif
   }
 }
 
diff --git a/solver/cg_mms_tm.c b/solver/cg_mms_tm.c
index 36ef80bae..04aad40a4 100644
--- a/solver/cg_mms_tm.c
+++ b/solver/cg_mms_tm.c
@@ -63,7 +63,7 @@ static void free_mms_tm();
 
 /* P output = solution , Q input = source */
 int cg_mms_tm(spinor ** const P, spinor * const Q,
-		 solver_pm_t * solver_pm, double * cgmms_reached_prec) {
+		 solver_pm_t * solver_pm) {
 
   static double normsq, pro, err, squarenorm;
   int iteration, N = solver_pm->sdim, no_shifts = solver_pm->no_shifts;
@@ -153,15 +153,22 @@ int cg_mms_tm(spinor ** const P, spinor * const Q,
       // falls below a threshold
       // this is useful for computing time and needed, because otherwise
       // zita might get smaller than DOUBLE_EPS and, hence, zero
-      if(iteration > 0 && (iteration % 20 == 0) && (im == no_shifts-1)) {
+      if(iteration > 0 && (iteration % 10 == 0) && (im == no_shifts-1)) {
 	double sn = square_norm(ps_mms_solver[no_shifts-2], N, 1);
+        err = alphas[no_shifts-1]*alphas[no_shifts-1]*sn;
         // while because more than one shift could be converged
-	while(alphas[no_shifts-1]*alphas[no_shifts-1]*sn <= solver_pm->mms_squared_solver_prec[no_shifts-1] && no_shifts>1) {
+	while(((err <= solver_pm->mms_squared_solver_prec[no_shifts-1]) && (solver_pm->rel_prec == 0)) ||
+              ((err <= solver_pm->mms_squared_solver_prec[no_shifts-1]*squarenorm) && (solver_pm->rel_prec > 0))) {
 	  no_shifts--;
 	  if(g_debug_level > 2 && g_proc_id == 0) {
 	    printf("# CGMMS: at iteration %d removed one shift, %d remaining\n", iteration, no_shifts);
       	  }
-          sn = square_norm(ps_mms_solver[no_shifts-2], N, 1);
+          if(no_shifts>1) {
+            sn = square_norm(ps_mms_solver[no_shifts-2], N, 1);
+            err = alphas[no_shifts-1]*alphas[no_shifts-1]*sn;
+          } else {
+            break;
+          }
 	}
       }
     }
@@ -182,9 +189,7 @@ int cg_mms_tm(spinor ** const P, spinor * const Q,
     if( ((err <= solver_pm->mms_squared_solver_prec[0]) && (solver_pm->rel_prec == 0) && no_shifts==1) ||
         ((err <= solver_pm->mms_squared_solver_prec[0]*squarenorm) && (solver_pm->rel_prec > 0) && no_shifts==1) ||
         (iteration == solver_pm->max_iter -1) ) {
-      /* FIXME temporary output of precision until a better solution can be found */
-      *cgmms_reached_prec = err;
-      break;
+        break;
     }
 
     /* Compute betas[0](i+1) = (r(i+1),r(i+1))/(r(i),r(i))
diff --git a/solver/cg_mms_tm.h b/solver/cg_mms_tm.h
index 1b70facbe..58e872224 100644
--- a/solver/cg_mms_tm.h
+++ b/solver/cg_mms_tm.h
@@ -28,6 +28,6 @@
 #include "matrix_mult_typedef.h"
 #include "su3.h"
 
-int cg_mms_tm(spinor ** const P,spinor * const Q, solver_pm_t * const params, double * reached_prec);
+int cg_mms_tm(spinor ** const P,spinor * const Q, solver_pm_t * const params);
 
 #endif
diff --git a/solver/cg_mms_tm_nd.c b/solver/cg_mms_tm_nd.c
index 66707495e..f8aa774d7 100644
--- a/solver/cg_mms_tm_nd.c
+++ b/solver/cg_mms_tm_nd.c
@@ -72,6 +72,8 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
   double atime, etime;
   const int nr_sf = 4;
 
+  if(g_proc_id == 0 && g_debug_level > 2) printf("# CGMMSND: solving %d shifts\n", shifts);
+
   // if solver_pm->mms_squared_solver_prec is NULL,
   // filling it with solver_pm->squared_solver_prec
   double *mms_squared_solver_prec = NULL;
@@ -169,16 +171,32 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
         double sn = square_norm(ps_mms_solver[2*(shifts-1)], N, 1);
         sn += square_norm(ps_mms_solver[2*(shifts-1)+1], N, 1);
         err = alphas[shifts-1]*alphas[shifts-1]*sn;
-	while(((err <= solver_pm->squared_solver_prec[shifts-1]) && (solver_pm->rel_prec == 0)) ||
-              ((err <= solver_pm->squared_solver_prec[shifts-1]*squarenorm) && (solver_pm->rel_prec > 0))
-              && shifts>1) {
+	while(((err <= solver_pm->mms_squared_solver_prec[shifts-1]) && (solver_pm->rel_prec == 0)) ||
+              ((err <= solver_pm->mms_squared_solver_prec[shifts-1]*squarenorm) && (solver_pm->rel_prec > 0))) {
+          // for testing purpose
+	  if(g_debug_level > 3) {
+	    if (g_proc_id == 0) printf("# CGMMSND: residual of remaining shifts\n");
+	    if (g_proc_id == 0) printf("#\t id\t\t shift\t residual\n");
+            for(int is = shifts; is>0; is--) {
+              sn = square_norm(ps_mms_solver[2*is], N, 1);
+              sn += square_norm(ps_mms_solver[2*is+1], N, 1);
+              err = alphas[is]*alphas[is]*sn;
+              if (g_proc_id == 0) printf("#\t %d\t\t %e\t %e\n", is, sigma[is], solver_pm->rel_prec ? err/squarenorm : err);
+            }
+            if (g_proc_id == 0) printf("#\t %d\t\t %e\t %e\n", 0, sigma[0], solver_pm->rel_prec ? normsq/squarenorm : normsq);
+	  }
 	  shifts--;
 	  if(g_debug_level > 2 && g_proc_id == 0) {
-	    printf("# CGMMSND: at iteration %d removed one shift, %d remaining\n", iteration, shifts);
+	    printf("# CGMMSND: at iteration %d removed one shift with residual %e. %d shifts remaining\n", iteration, solver_pm->rel_prec ? err/squarenorm : err, shifts);
 	  }
-          sn = square_norm(ps_mms_solver[2*(shifts-1)], N, 1);
-          sn += square_norm(ps_mms_solver[2*(shifts-1)+1], N, 1);
-          err = alphas[shifts-1]*alphas[shifts-1]*sn;
+          // computing next shift residual and looping for all the converged
+          if(shifts>1) {
+            sn = square_norm(ps_mms_solver[2*(shifts-1)], N, 1);
+            sn += square_norm(ps_mms_solver[2*(shifts-1)+1], N, 1);
+            err = alphas[shifts-1]*alphas[shifts-1]*sn;
+          } else {
+            break;
+          }
 	}
       }
     }
@@ -193,7 +211,7 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
     /* Check whether the precision eps_sq is reached */
 
     err = square_norm(solver_field[0], N, 1) + square_norm(solver_field[1], N, 1);
-
+    
     if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
       printf("# CGMMSND iteration: %d residue: %g\n", iteration, err); fflush( stdout );
     }
@@ -224,7 +242,7 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
   if(iteration == solver_pm->max_iter -1) iteration = -1;
   else iteration++;
   if(g_debug_level > 0 && g_proc_id == 0) {
-    printf("# CGMMS (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_pm->no_shifts, iteration, solver_pm->squared_solver_prec, etime - atime); 
+    printf("# CGMMSND (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_pm->no_shifts, iteration, solver_pm->squared_solver_prec, etime - atime); 
   }
 
   // freeing mms_squared_solver_prec if it has been allocated
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index fb0c9c3ae..f333e4be3 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -26,7 +26,11 @@
  *
  *
  *   int solve_degenerate(spinor * const P, spinor * const Q, const int max_iter, 
-           double eps_sq, const int rel_prec, const int N, matrix_mult f)
+ *                       double eps_sq, const int rel_prec, const int N, matrix_mult f)
+ *
+ *   int solve_mms_tm(spinor ** const P, spinor * const Q,
+ *                    solver_pm_t * solver_pm)  
+ *
  *   int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn, 
  *                    spinor * const Qup, spinor * const Qdn, 
  *                    solver_pm_t * solver_pm)  
@@ -135,6 +139,132 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
 }
 
 
+int solve_mms_tm(spinor ** const P, spinor * const Q,
+                 solver_pm_t * solver_pm){ 
+  int iteration_count = 0; 
+
+ if (solver_pm->type == CGMMS){
+    iteration_count = cg_mms_tm(P, Q, solver_pm);
+  }
+#ifdef DDalphaAMG
+  else if (solver_pm->type == MG) {
+    // if the mg_mms_mass is larger than the smallest shift we use MG
+    if (mg_no_shifts > 0 || mg_mms_mass >= solver_pm->shifts[0]) { 
+      // if solver_pm->mms_squared_solver_prec is NULL,
+      // filling it with solver_pm->squared_solver_prec
+      double *mms_squared_solver_prec = NULL;
+      if (solver_pm->mms_squared_solver_prec == NULL) {
+        mms_squared_solver_prec = (double*) malloc(solver_pm->no_shifts*sizeof(double));
+        for (int i=0; i<solver_pm->no_shifts; i++)
+          mms_squared_solver_prec[i] = solver_pm->squared_solver_prec;
+        solver_pm->mms_squared_solver_prec = mms_squared_solver_prec;
+      }
+
+      // if the mg_mms_mass is smaller than the larger shifts, we use CGMMS for those
+      // in case mg_no_shifts is used, then mg_mms_mass = 0
+      if(mg_mms_mass >= solver_pm->shifts[0]) {
+        mg_no_shifts = solver_pm->no_shifts;
+        while (mg_mms_mass < solver_pm->shifts[mg_no_shifts-1]) { mg_no_shifts--; }
+      }
+      int no_shifts = solver_pm->no_shifts;
+      if (mg_no_shifts < no_shifts) {
+        solver_pm->no_shifts = no_shifts - mg_no_shifts;
+        solver_pm->shifts += mg_no_shifts;
+        solver_pm->mms_squared_solver_prec += mg_no_shifts;
+        iteration_count = cg_mms_tm( P+mg_no_shifts, Q, solver_pm );
+        // Restoring solver_pm
+        solver_pm->no_shifts = no_shifts;
+        solver_pm->shifts -= mg_no_shifts;
+        solver_pm->mms_squared_solver_prec -= mg_no_shifts;
+      }
+
+      for(int i = mg_no_shifts-1; i>=0; i--){
+        // preparing initial guess
+        if(i==no_shifts-1) {
+          zero_spinor_field(P[i], solver_pm->sdim);
+        } else {
+          double coeff;
+          for( int j = no_shifts-1; j > i; j-- ) {
+            coeff = 1;
+            for( int k = no_shifts-1; k > i; k-- ) {
+              if(j!=k)
+                coeff *= (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[i]*solver_pm->shifts[i])/
+                  (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[j]*solver_pm->shifts[j]);
+            }
+            if(j==no_shifts-1) {
+              mul_r(P[i], coeff, P[j], solver_pm->sdim);
+            } else {
+              assign_add_mul_r(P[i], P[j], coeff, solver_pm->sdim);
+            }
+          }
+        }
+        g_mu3 = solver_pm->shifts[i]; 
+        iteration_count += MG_solver( P[i], Q, solver_pm->mms_squared_solver_prec[i], solver_pm->max_iter,
+                                         solver_pm->rel_prec, solver_pm->sdim, g_gauge_field, solver_pm->M_psi );
+        g_mu3 = _default_g_mu3;
+      }
+      // freeing mms_squared_solver_prec if it has been allocated
+      if(mms_squared_solver_prec != NULL) {
+        free(mms_squared_solver_prec);
+        solver_pm->mms_squared_solver_prec = NULL;
+      }
+    } else {
+      iteration_count = cg_mms_tm( P, Q, solver_pm );
+    }
+  }
+#endif
+  else if (solver_pm->type == RGMIXEDCG){
+    matrix_mult32 f32  = Qtm_pm_psi_32;
+    if( solver_pm->M_psi == Qsw_pm_psi ){ 
+      f32  = Qsw_pm_psi_32;
+    }
+    iteration_count = 0;
+    // solver_params_t struct needs to be passed to all solvers except for cgmms, so we need to construct it here
+    // and set the one relevant parameter
+    solver_params_t temp_params;
+    temp_params.mcg_delta = _default_mixcg_innereps;
+    double iter_local = 0;
+    for(int i = solver_pm->no_shifts-1; i>=0; i--){
+      // preparing initial guess                                                                                                                                                                       
+      if(i==solver_pm->no_shifts-1) {
+        zero_spinor_field(P[i], solver_pm->sdim);
+      } else {
+        double coeff;
+        for( int j = solver_pm->no_shifts-1; j > i; j-- ) {
+          coeff = 1;
+          for( int k = solver_pm->no_shifts-1; k > i; k-- ) {
+            if(j!=k)
+              coeff *= (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[i]*solver_pm->shifts[i])/
+                (solver_pm->shifts[k]*solver_pm->shifts[k]-solver_pm->shifts[j]*solver_pm->shifts[j]);
+          }
+          if(j==solver_pm->no_shifts-1) {
+            mul_r(P[i], coeff, P[j], solver_pm->sdim);
+          } else {
+            assign_add_mul_r(P[i], P[j], coeff, solver_pm->sdim);
+          }
+        }
+      }
+      
+      // inverting
+      g_mu3 = solver_pm->shifts[i]; 
+      iter_local = rg_mixed_cg_her( P[i], Q, temp_params, solver_pm->max_iter,
+                                    solver_pm->mms_squared_solver_prec[i], solver_pm->rel_prec, solver_pm->sdim,
+                                    solver_pm->M_psi, f32);
+      g_mu3 = _default_g_mu3;
+      if(iter_local == -1){
+        return(-1);
+      } else {
+        iteration_count += iter_local;
+      }
+    }
+  } else {
+    if(g_proc_id==0) printf("Error: solver not allowed for TM mms solve. Aborting...\n");
+    exit(2);      
+  }
+
+  return(iteration_count);
+}
+
 int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn, 
                  spinor * const Qup, spinor * const Qdn, 
                  solver_pm_t * solver_pm){ 
diff --git a/solver/monomial_solve.h b/solver/monomial_solve.h
index 0cbe54391..cc3aab1dd 100644
--- a/solver/monomial_solve.h
+++ b/solver/monomial_solve.h
@@ -25,6 +25,8 @@
 #include"su3.h"
     int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_params, const int max_iter, 
            double eps_sq, const int rel_prec, const int N, matrix_mult f, int solver_type);
+    int solve_mms_tm(spinor ** const P, spinor * const Q,
+                     solver_pm_t * solver_pm);
     int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn, 
                      spinor * const Qup, spinor * const Qdn, 
                      solver_pm_t * solver_pm);

From d7399499fb1286f7ae007a972847334dff1eee0d Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Mon, 11 Dec 2017 14:22:02 +0200
Subject: [PATCH 33/85] Move initial guess to separate function

---
 solver/Makefile.in      |   2 +-
 solver/init_guess.c     | 143 ++++++++++++++++++++++++++++++++++++++++
 solver/init_guess.h     |  36 ++++++++++
 solver/monomial_solve.c | 139 +++-----------------------------------
 4 files changed, 190 insertions(+), 130 deletions(-)
 create mode 100644 solver/init_guess.c
 create mode 100644 solver/init_guess.h

diff --git a/solver/Makefile.in b/solver/Makefile.in
index 7c74903d2..acc712794 100644
--- a/solver/Makefile.in
+++ b/solver/Makefile.in
@@ -45,7 +45,7 @@ libsolver_TARGETS = bicgstab_complex gmres incr_eigcg eigcg restart_X ortho \
                     dirac_operator_eigenvectors	spectral_proj \
                     jdher_su3vect cg_her_su3vect eigenvalues_Jacobi \
 		    mcr cr mcr4complex bicg_complex monomial_solve \
-		    solver_types
+		    solver_types init_guess
 
 libsolver_OBJECTS = $(addsuffix .o, ${libsolver_TARGETS})
 
diff --git a/solver/init_guess.c b/solver/init_guess.c
new file mode 100644
index 000000000..29b8f0944
--- /dev/null
+++ b/solver/init_guess.c
@@ -0,0 +1,143 @@
+/***********************************************************************
+ *
+ *
+ * Copyright (C) 2016 Simone Bacchio
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ***********************************************************************/
+
+#ifndef _INIT_GUESS_H
+#define _INIT_GUESS_H
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "global.h"
+#include "su3.h"
+#include "gamma.h"
+#include "linalg_eo.h"
+#include "start.h"
+#include "gettime.h"
+#include "solver/solver.h"
+#include "solver_field.h"
+#include "operator/tm_operators.h"
+#include "operator/tm_operators_nd.h"
+#include "init_guess.h"
+#include <io/params.h>
+
+int init_guess_mms(spinor ** const P, spinor * const Q,
+                   int shift, solver_params_t * const solver_params) {
+  double * shifts=solver_params->shifts;
+  int no_shifts = solver_params->no_shifts;
+  if(shift==no_shifts-1) {
+    zero_spinor_field(P[shift], solver_params->sdim);
+  } else {
+    double coeff;
+    for( int j = no_shifts-1; j > shift; j-- ) {
+      coeff = 1;
+      for( int k = no_shifts-1; k > shift; k-- ) {
+        if(j!=k)
+          coeff *= (shifts[k]*shifts[k]-shifts[shift]*shifts[shift])/
+                   (shifts[k]*shifts[k]-shifts[j]*shifts[j]);
+      }
+      if(j==no_shifts-1) {
+        mul_r(P[shift], coeff, P[j], solver_params->sdim);
+      } else {
+        assign_add_mul_r(P[shift], P[j], coeff, solver_params->sdim);
+      }
+    }
+  }
+  if(g_debug_level > 2){
+    double old_g_mu3 = g_mu3;
+    spinor** temp;
+    if(solver_params->sdim == VOLUME/2) {
+      init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
+    } else {
+      init_solver_field(&temp, VOLUMEPLUSRAND, 1);
+    }
+
+    g_mu3 = solver_params->shifts[shift]; 
+    solver_params->M_psi( temp[0], P[shift]);
+    g_mu3 = old_g_mu3;
+
+    diff( temp[0], temp[0], Q, solver_params->sdim);
+    double res = sqrt(square_norm(temp[0], solver_params->sdim, 1)/square_norm(Q, solver_params->sdim, 1));
+      
+    finalize_solver(temp, 1);
+    if(g_proc_id == 0)
+        printf("INITIAL GUESS: shift: %d relative residual: %e\n",shift,res); 
+  }
+
+}
+
+int init_guess_mms_nd(spinor ** const Pup, spinor ** const Pdn, 
+                      spinor * const Qup, spinor * const Qdn, 
+                      int shift, solver_params_t * solver_params) {
+  double * shifts=solver_params->shifts;
+  int no_shifts = solver_params->no_shifts;
+  if(shift==no_shifts-1) {
+    zero_spinor_field(Pup[shift], solver_params->sdim);
+    zero_spinor_field(Pdn[shift], solver_params->sdim);
+  } else {
+    double coeff;
+    for( int j = no_shifts-1; j > shift; j-- ) {
+      coeff = 1;
+      for( int k = no_shifts-1; k > shift; k-- ) {
+        if(j!=k)
+          coeff *= (shifts[k]*shifts[k]-shifts[shift]*shifts[shift])/
+                   (shifts[k]*shifts[k]-shifts[j]*shifts[j]);
+      }
+      if(j==no_shifts-1) {
+        mul_r(Pup[shift], coeff, Pup[j], solver_params->sdim);
+        mul_r(Pdn[shift], coeff, Pdn[j], solver_params->sdim);
+      } else {
+        assign_add_mul_r(Pup[shift], Pup[j], coeff, solver_params->sdim);
+        assign_add_mul_r(Pdn[shift], Pdn[j], coeff, solver_params->sdim);
+      }
+    }
+  }
+  if(g_debug_level > 2){
+    double old_g_shift = g_shift;
+    matrix_mult_nd f = Qtm_pm_ndpsi_shift;
+    spinor** temp;
+    if(solver_params->sdim == VOLUME/2) {
+      init_solver_field(&temp, VOLUMEPLUSRAND/2, 2);
+    } else {
+      init_solver_field(&temp, VOLUMEPLUSRAND, 2);
+    }
+    if( solver_params->M_ndpsi == Qsw_pm_ndpsi ) 
+      f = Qsw_pm_ndpsi_shift;
+
+    g_shift = shifts[shift]*shifts[shift]; 
+    f( temp[0], temp[1], Pup[shift], Pdn[shift]);
+    g_shift = old_g_shift;
+
+    diff( temp[0], temp[0], Qup, solver_params->sdim);
+    diff( temp[1], temp[1], Qdn, solver_params->sdim);
+    double res = sqrt(square_norm(temp[0], solver_params->sdim, 1)+square_norm(temp[1], solver_params->sdim, 1))/
+      sqrt(square_norm(Qup, solver_params->sdim, 1)+square_norm(Qdn, solver_params->sdim, 1));
+      
+    finalize_solver(temp, 2);
+    if(g_proc_id == 0)
+        printf("INITIAL GUESS: shift: %d relative residual: %e\n",shift,res); 
+  }
+}
+
+#endif
diff --git a/solver/init_guess.h b/solver/init_guess.h
new file mode 100644
index 000000000..763f5a72f
--- /dev/null
+++ b/solver/init_guess.h
@@ -0,0 +1,36 @@
+/***********************************************************************
+ *
+ *
+ * Copyright (C) 2016 Simone Bacchio
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ***********************************************************************/
+
+#ifndef _INIT_GUESS_H
+#define _INIT_GUESS_H
+
+#include"su3.h"
+#include"solver.h"
+
+int init_guess_mms(spinor ** const P, spinor * const Q,
+                   int shift, solver_params_t * const params);
+
+int init_guess_mms_nd(spinor ** const Pup, spinor ** const Pdn, 
+                      spinor * const Qup, spinor * const Qdn, 
+                      int shift, solver_params_t * solver_params);
+
+#endif
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index a25587f21..25e7b46d5 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -55,6 +55,7 @@
 #include "phmc.h"
 #include "solver/solver.h"
 #include "solver/solver_field.h"
+#include "solver/init_guess.h"
 #include "solver/matrix_mult_typedef.h"
 #include "solver/solver_types.h"
 #include "solver/solver_params.h"
@@ -241,25 +242,8 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
       }
 
       for(int i = mg_no_shifts-1; i>=0; i--){
-        // preparing initial guess
-        if(i==no_shifts-1) {
-          zero_spinor_field(P[i], solver_params->sdim);
-        } else {
-          double coeff;
-          for( int j = no_shifts-1; j > i; j-- ) {
-            coeff = 1;
-            for( int k = no_shifts-1; k > i; k-- ) {
-              if(j!=k)
-                coeff *= (solver_params->shifts[k]*solver_params->shifts[k]-solver_params->shifts[i]*solver_params->shifts[i])/
-                  (solver_params->shifts[k]*solver_params->shifts[k]-solver_params->shifts[j]*solver_params->shifts[j]);
-            }
-            if(j==no_shifts-1) {
-              mul_r(P[i], coeff, P[j], solver_params->sdim);
-            } else {
-              assign_add_mul_r(P[i], P[j], coeff, solver_params->sdim);
-            }
-          }
-        }
+        // preparing initial guess                                                                                                                                                                       
+        init_guess_mms(P, Q, i, solver_params);
         g_mu3 = solver_params->shifts[i]; 
         iteration_count += MG_solver( P[i], Q, solver_params->mms_squared_solver_prec[i], solver_params->max_iter,
                                          solver_params->rel_prec, solver_params->sdim, g_gauge_field, solver_params->M_psi );
@@ -288,24 +272,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
     double iter_local = 0;
     for(int i = solver_params->no_shifts-1; i>=0; i--){
       // preparing initial guess                                                                                                                                                                       
-      if(i==solver_params->no_shifts-1) {
-        zero_spinor_field(P[i], solver_params->sdim);
-      } else {
-        double coeff;
-        for( int j = solver_params->no_shifts-1; j > i; j-- ) {
-          coeff = 1;
-          for( int k = solver_params->no_shifts-1; k > i; k-- ) {
-            if(j!=k)
-              coeff *= (solver_params->shifts[k]*solver_params->shifts[k]-solver_params->shifts[i]*solver_params->shifts[i])/
-                (solver_params->shifts[k]*solver_params->shifts[k]-solver_params->shifts[j]*solver_params->shifts[j]);
-          }
-          if(j==solver_params->no_shifts-1) {
-            mul_r(P[i], coeff, P[j], solver_params->sdim);
-          } else {
-            assign_add_mul_r(P[i], P[j], coeff, solver_params->sdim);
-          }
-        }
-      }
+      init_guess_mms(P, Q, i, solver_params);
       
       // inverting
       g_mu3 = solver_params->shifts[i]; 
@@ -396,59 +363,13 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     }
   } else if (solver_params->type == CGMMSND){
     iteration_count = cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_params);
-    /*
-#undef TEST
-#ifdef TEST
     //Testing the initial guess
-    double differ[2], residual;
-    spinor ** check_vect = NULL;
-    matrix_mult_nd f = Qtm_pm_ndpsi_shift;
-    if( solver_params->M_ndpsi == Qsw_pm_ndpsi ) 
-      f = Qsw_pm_ndpsi_shift;
-    
-    init_solver_field(&check_vect, VOLUMEPLUSRAND/2,4);
-    differ[1] = sqrt(square_norm(Qup, solver_params->sdim, 1)+square_norm(Qdn, solver_params->sdim, 1));
-    
-    for(int i = solver_params->no_shifts-1; i>=0; i--){
-      // preparing initial guess
-      if(i==solver_params->no_shifts-1) {
-        zero_spinor_field(check_vect[0], solver_params->sdim);
-        zero_spinor_field(check_vect[1], solver_params->sdim);
-      } else {
-        double coeff;
-        for( int j = solver_params->no_shifts-1; j > i; j-- ) {
-          coeff = 1;
-          for( int k = solver_params->no_shifts-1; k > i; k-- ) {
-            if(j!=k)
-              coeff *= (solver_params->shifts[k]*solver_params->shifts[k]-solver_params->shifts[i]*solver_params->shifts[i])/
-                (solver_params->shifts[k]*solver_params->shifts[k]-solver_params->shifts[j]*solver_params->shifts[j]);
-          }
-          if(j==solver_params->no_shifts-1) {
-            mul_r(check_vect[0], coeff, Pup[j], solver_params->sdim);
-            mul_r(check_vect[1], coeff, Pdn[j], solver_params->sdim);
-          } else {
-            assign_add_mul_r(check_vect[0], Pup[j], coeff, solver_params->sdim);
-            assign_add_mul_r(check_vect[1], Pdn[j], coeff, solver_params->sdim);
-          }
-        }
+    if( g_debug_level > 3 ){
+      for(int i = solver_params->no_shifts-1; i>=0; i--){
+        // preparing initial guess
+        init_guess_mms_nd(Pup, Pdn, Qup, Qdn, i, solver_params);
       }
-      
-      g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
-    
-      f( check_vect[2], check_vect[3], check_vect[0], check_vect[1]);
-      diff( check_vect[2], check_vect[2], Qup, solver_params->sdim);
-      diff( check_vect[3], check_vect[3], Qdn, solver_params->sdim);
-      differ[0] = sqrt(square_norm(check_vect[2], solver_params->sdim, 1)+square_norm(check_vect[3], solver_params->sdim, 1));
-      
-      residual = differ[0]/differ[1];
-      
-      if(g_proc_id == 0)
-        printf("CHECH: shift: %d relative residual: %e\n",i,residual); 
-      g_shift = _default_g_shift;
     }
-    finalize_solver(check_vect, 4);
-#endif
-    */
   }
 #ifdef DDalphaAMG
   else if (solver_params->type == MG) {
@@ -479,27 +400,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
 
       for(int i = mg_no_shifts-1; i>=0; i--){
         // preparing initial guess
-        if(i==no_shifts-1) {
-          zero_spinor_field(Pup[i], solver_params->sdim);
-          zero_spinor_field(Pdn[i], solver_params->sdim);
-        } else {
-          double coeff;
-          for( int j = no_shifts-1; j > i; j-- ) {
-            coeff = 1;
-            for( int k = no_shifts-1; k > i; k-- ) {
-              if(j!=k)
-                coeff *= (solver_params->shifts[k]*solver_params->shifts[k]-solver_params->shifts[i]*solver_params->shifts[i])/
-                  (solver_params->shifts[k]*solver_params->shifts[k]-solver_params->shifts[j]*solver_params->shifts[j]);
-            }
-            if(j==no_shifts-1) {
-              mul_r(Pup[i], coeff, Pup[j], solver_params->sdim);
-              mul_r(Pdn[i], coeff, Pdn[j], solver_params->sdim);
-            } else {
-              assign_add_mul_r(Pup[i], Pup[j], coeff, solver_params->sdim);
-              assign_add_mul_r(Pdn[i], Pdn[j], coeff, solver_params->sdim);
-            }
-          }
-        }
+        init_guess_mms_nd(Pup, Pdn, Qup, Qdn, i, solver_params);
         
         g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
         iteration_count += MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_params->mms_squared_solver_prec[i], solver_params->max_iter,
@@ -526,27 +427,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     double iter_local = 0;
     for(int i = solver_params->no_shifts-1; i>=0; i--){
       // preparing initial guess                                                                                                                                                                       
-      if(i==solver_params->no_shifts-1) {
-        zero_spinor_field(Pup[i], solver_params->sdim);
-        zero_spinor_field(Pdn[i], solver_params->sdim);
-      } else {
-        double coeff;
-        for( int j = solver_params->no_shifts-1; j > i; j-- ) {
-          coeff = 1;
-          for( int k = solver_params->no_shifts-1; k > i; k-- ) {
-            if(j!=k)
-              coeff *= (solver_params->shifts[k]*solver_params->shifts[k]-solver_params->shifts[i]*solver_params->shifts[i])/
-                (solver_params->shifts[k]*solver_params->shifts[k]-solver_params->shifts[j]*solver_params->shifts[j]);
-          }
-          if(j==solver_params->no_shifts-1) {
-            mul_r(Pup[i], coeff, Pup[j], solver_params->sdim);
-            mul_r(Pdn[i], coeff, Pdn[j], solver_params->sdim);
-          } else {
-            assign_add_mul_r(Pup[i], Pup[j], coeff, solver_params->sdim);
-            assign_add_mul_r(Pdn[i], Pdn[j], coeff, solver_params->sdim);
-          }
-        }
-      }
+      init_guess_mms_nd(Pup, Pdn, Qup, Qdn, i, solver_params);
       
       // inverting
       g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 

From b0d4aaa41c1be61fc675b5a77e4fb6d0be45453a Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Mon, 11 Dec 2017 17:34:11 +0200
Subject: [PATCH 34/85] Add inverson for single operator in monomial

---
 monomial/ndrat_monomial.c | 119 +++-----------------------------------
 solver/init_guess.c       |  59 +++++++++++++++++--
 solver/init_guess.h       |   3 +
 solver/monomial_solve.c   |  50 +++++++++++++++-
 4 files changed, 111 insertions(+), 120 deletions(-)

diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c
index 1cad48106..606131625 100644
--- a/monomial/ndrat_monomial.c
+++ b/monomial/ndrat_monomial.c
@@ -48,9 +48,6 @@
 #include "phmc.h"
 #include "ndrat_monomial.h"
 #include "default_input_values.h"
-#ifdef DDalphaAMG
-#  include "DDalphaAMG_interface.h"
-#endif
 
 void nd_set_global_parameter(monomial * const mnl) {
 
@@ -236,118 +233,16 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) {
   mnl->solver_params.rel_prec = g_relative_precision_flag;
 
   // this generates all X_j,o (odd sites only) -> g_chi_up|dn_spinor_field
-  mnl->iter0 = solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
-                            mnl->pf, mnl->pf2, &(mnl->solver_params) );
-
-#ifdef DDalphaAMG
-  // With MG we can solve directly the unsquared operator
-  if( mnl->solver == MG && (mg_no_shifts > 0 || mg_mms_mass >= mnl->solver_params.shifts[0]) ){
-
-    // if the mg_mms_mass is smaller than the largest shifts, we use CGMMS for those
-    // in case mg_no_shifts is used, then mg_mms_mass = 0
-    if(mg_mms_mass >= mnl->solver_params.shifts[0]) {
-      mg_no_shifts = mnl->solver_params.no_shifts;
-      while (mg_mms_mass < mnl->solver_params.shifts[mg_no_shifts-1]) { mg_no_shifts--; }
-    }
-    int no_shifts = mnl->solver_params.no_shifts;
-    if (mg_no_shifts < no_shifts) {
-      mnl->solver_params.no_shifts = no_shifts - mg_no_shifts;
-      mnl->solver_params.shifts += mg_no_shifts;
-      if(mnl->solver_params.mms_squared_solver_prec!=NULL)
-        mnl->solver_params.mms_squared_solver_prec += mg_no_shifts;
-      // We store the solutions not in the right place (without shifting of mg_no_shifts)
-      // for them applying the operator and storing at the right place the unsquared solution.
-      mnl->iter0 = cg_mms_tm_nd( g_chi_up_spinor_field, g_chi_dn_spinor_field, mnl->pf, mnl->pf2, &(mnl->solver_params) );
-      for(int j = mnl->solver_params.no_shifts-1; j >= 0; j--) {
-        // Q_h * tau^1 - i nu_j
-        // this needs phmc_Cpol = 1 to work!
-        if(mnl->type == NDCLOVERRAT) {
-          Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[j+mg_no_shifts], g_chi_dn_spinor_field[j+mg_no_shifts],
-                                   g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-                                   I*mnl->solver_params.shifts[j], 1., mnl->EVMaxInv);
-        }
-        else {
-          Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[j+mg_no_shifts], g_chi_dn_spinor_field[j+mg_no_shifts],
-                                 g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-                                 I*mnl->solver_params.shifts[j], 1., mnl->EVMaxInv);
-        }
-      }
-      // Restoring mnl->solver_params
-      mnl->solver_params.no_shifts = no_shifts;
-      mnl->solver_params.shifts -= mg_no_shifts;
-      if(mnl->solver_params.mms_squared_solver_prec!=NULL)
-        mnl->solver_params.mms_squared_solver_prec -= mg_no_shifts;
-    }
-
-    matrix_mult_nd f = Qtm_tau1_ndpsi_add_Ishift;
-    if( mnl->solver_params.M_ndpsi == Qsw_pm_ndpsi )
-      f = Qsw_tau1_ndpsi_add_Ishift;
-
-    // preparing initial guess
-    for(int i = mg_no_shifts-1; i>=0; i--){
-      if(i==no_shifts-1) {
-        zero_spinor_field(g_chi_up_spinor_field[i], mnl->solver_params.sdim);
-        zero_spinor_field(g_chi_dn_spinor_field[i], mnl->solver_params.sdim);
-      } else {
-        double coeff;
-        for( int j = no_shifts-1; j > i; j-- ) {
-          coeff = 1;
-          for( int k = no_shifts-1; k > i; k-- ) {
-            if(j!=k)
-              coeff *= (mnl->solver_params.shifts[k]-mnl->solver_params.shifts[i])/(mnl->solver_params.shifts[k]-mnl->solver_params.shifts[j]);
-          }
-          if(j==no_shifts-1) {
-            mul_r(g_chi_up_spinor_field[i], coeff, g_chi_up_spinor_field[j], mnl->solver_params.sdim);
-            mul_r(g_chi_dn_spinor_field[i], coeff, g_chi_dn_spinor_field[j], mnl->solver_params.sdim);
-          } else {
-            assign_add_mul_r(g_chi_up_spinor_field[i], g_chi_up_spinor_field[j], coeff, mnl->solver_params.sdim);
-            assign_add_mul_r(g_chi_dn_spinor_field[i], g_chi_dn_spinor_field[j], coeff, mnl->solver_params.sdim);
-          }
-        }
-      }
-      
-      // g_shift = shift^2 and then in Qsw_tau1_ndpsi_add_Ishift the square root is taken
-      g_shift = mnl->solver_params.shifts[i]*mnl->solver_params.shifts[i]; 
-      mnl->iter0 += MG_solver_nd( g_chi_up_spinor_field[i], g_chi_dn_spinor_field[i], mnl->pf, mnl->pf2,
-                                  mnl->solver_params.mms_squared_solver_prec[i],
-                                  mnl->solver_params.max_iter, mnl->solver_params.rel_prec, mnl->solver_params.sdim, g_gauge_field, f );
-      g_shift = _default_g_shift;
-    }
+  mnl->iter0 = solve_mms_nd_plus(g_chi_up_spinor_field, g_chi_dn_spinor_field,
+                                 mnl->pf, mnl->pf2, &(mnl->solver_params) );
+  
+  assign(mnl->w_fields[2], mnl->pf, VOLUME/2);
+  assign(mnl->w_fields[3], mnl->pf2, VOLUME/2);
     
-    assign(mnl->w_fields[2], mnl->pf, VOLUME/2);
-    assign(mnl->w_fields[3], mnl->pf2, VOLUME/2);
-
-    // apply C to the random field to generate pseudo-fermion fields
-    for(int j = (mnl->rat.np-1); j > -1; j--) {
+  // apply C to the random field to generate pseudo-fermion fields
+  for(int j = (mnl->rat.np-1); j > -1; j--) {
       assign_add_mul(mnl->pf, g_chi_up_spinor_field[j], I*mnl->rat.rnu[j], VOLUME/2);
       assign_add_mul(mnl->pf2, g_chi_dn_spinor_field[j], I*mnl->rat.rnu[j], VOLUME/2);
-    }
-  } else 
-#endif
-    {
-    mnl->iter0 = solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
-                              mnl->pf, mnl->pf2, &(mnl->solver_params));
-    
-    assign(mnl->w_fields[2], mnl->pf, VOLUME/2);
-    assign(mnl->w_fields[3], mnl->pf2, VOLUME/2);
-    
-    // apply C to the random field to generate pseudo-fermion fields
-    for(int j = (mnl->rat.np-1); j > -1; j--) {
-      // Q_h * tau^1 - i nu_j
-      // this needs phmc_Cpol = 1 to work!
-      if(mnl->type == NDCLOVERRAT) {
-        Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
-                                 g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-                                 I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
-      }
-      else {
-        Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->rat.np], g_chi_dn_spinor_field[mnl->rat.np],
-                               g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-                               I*mnl->rat.nu[j], 1., mnl->EVMaxInv);
-      }
-      assign_add_mul(mnl->pf, g_chi_up_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
-      assign_add_mul(mnl->pf2, g_chi_dn_spinor_field[mnl->rat.np], I*mnl->rat.rnu[j], VOLUME/2);
-    }
   }
 
   free(mnl->solver_params.mms_squared_solver_prec);
diff --git a/solver/init_guess.c b/solver/init_guess.c
index 29b8f0944..d8d5e611d 100644
--- a/solver/init_guess.c
+++ b/solver/init_guess.c
@@ -20,9 +20,6 @@
  *
  ***********************************************************************/
 
-#ifndef _INIT_GUESS_H
-#define _INIT_GUESS_H
-
 #ifdef HAVE_CONFIG_H
 # include<config.h>
 #endif
@@ -116,14 +113,14 @@ int init_guess_mms_nd(spinor ** const Pup, spinor ** const Pdn,
   if(g_debug_level > 2){
     double old_g_shift = g_shift;
     matrix_mult_nd f = Qtm_pm_ndpsi_shift;
+    if( solver_params->M_ndpsi == Qsw_pm_ndpsi ) 
+      f = Qsw_pm_ndpsi_shift;
     spinor** temp;
     if(solver_params->sdim == VOLUME/2) {
       init_solver_field(&temp, VOLUMEPLUSRAND/2, 2);
     } else {
       init_solver_field(&temp, VOLUMEPLUSRAND, 2);
     }
-    if( solver_params->M_ndpsi == Qsw_pm_ndpsi ) 
-      f = Qsw_pm_ndpsi_shift;
 
     g_shift = shifts[shift]*shifts[shift]; 
     f( temp[0], temp[1], Pup[shift], Pdn[shift]);
@@ -140,4 +137,54 @@ int init_guess_mms_nd(spinor ** const Pup, spinor ** const Pdn,
   }
 }
 
-#endif
+int init_guess_mms_nd_plus(spinor ** const Pup, spinor ** const Pdn, 
+                           spinor * const Qup, spinor * const Qdn, 
+                           int shift, solver_params_t * solver_params) {
+  double * shifts=solver_params->shifts;
+  int no_shifts = solver_params->no_shifts;
+  if(shift==no_shifts-1) {
+    zero_spinor_field(Pup[shift], solver_params->sdim);
+    zero_spinor_field(Pdn[shift], solver_params->sdim);
+  } else {
+    double coeff;
+    for( int j = no_shifts-1; j > shift; j-- ) {
+      coeff = 1;
+      for( int k = no_shifts-1; k > shift; k-- ) {
+        if(j!=k)
+          coeff *= (shifts[k]-shifts[shift])/(shifts[k]-shifts[j]);
+      }
+      if(j==no_shifts-1) {
+        mul_r(Pup[shift], coeff, Pup[j], solver_params->sdim);
+        mul_r(Pdn[shift], coeff, Pdn[j], solver_params->sdim);
+      } else {
+        assign_add_mul_r(Pup[shift], Pup[j], coeff, solver_params->sdim);
+        assign_add_mul_r(Pdn[shift], Pdn[j], coeff, solver_params->sdim);
+      }
+    }
+  }
+  if(g_debug_level > 2){
+    double old_g_shift = g_shift;
+    matrix_mult_nd f = Qtm_tau1_ndpsi_add_Ishift;
+    if( solver_params->M_ndpsi == Qsw_pm_ndpsi )
+      f = Qsw_tau1_ndpsi_add_Ishift;
+    spinor** temp;
+    if(solver_params->sdim == VOLUME/2) {
+      init_solver_field(&temp, VOLUMEPLUSRAND/2, 2);
+    } else {
+      init_solver_field(&temp, VOLUMEPLUSRAND, 2);
+    }
+
+    g_shift = shifts[shift]*shifts[shift]; 
+    f( temp[0], temp[1], Pup[shift], Pdn[shift]);
+    g_shift = old_g_shift;
+
+    diff( temp[0], temp[0], Qup, solver_params->sdim);
+    diff( temp[1], temp[1], Qdn, solver_params->sdim);
+    double res = sqrt(square_norm(temp[0], solver_params->sdim, 1)+square_norm(temp[1], solver_params->sdim, 1))/
+      sqrt(square_norm(Qup, solver_params->sdim, 1)+square_norm(Qdn, solver_params->sdim, 1));
+      
+    finalize_solver(temp, 2);
+    if(g_proc_id == 0)
+        printf("INITIAL GUESS: shift: %d relative residual: %e\n",shift,res); 
+  }
+}
diff --git a/solver/init_guess.h b/solver/init_guess.h
index 763f5a72f..8ae29e80c 100644
--- a/solver/init_guess.h
+++ b/solver/init_guess.h
@@ -33,4 +33,7 @@ int init_guess_mms_nd(spinor ** const Pup, spinor ** const Pdn,
                       spinor * const Qup, spinor * const Qdn, 
                       int shift, solver_params_t * solver_params);
 
+int init_guess_mms_nd_plus(spinor ** const Pup, spinor ** const Pdn, 
+                           spinor * const Qup, spinor * const Qdn, 
+                           int shift, solver_params_t * solver_params);
 #endif
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 25e7b46d5..6949d0675 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -271,7 +271,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
     temp_params.mcg_delta = _default_mixcg_innereps;
     double iter_local = 0;
     for(int i = solver_params->no_shifts-1; i>=0; i--){
-      // preparing initial guess                                                                                                                                                                       
+      // preparing initial guess
       init_guess_mms(P, Q, i, solver_params);
       
       // inverting
@@ -426,7 +426,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     temp_params.mcg_delta = _default_mixcg_innereps;
     double iter_local = 0;
     for(int i = solver_params->no_shifts-1; i>=0; i--){
-      // preparing initial guess                                                                                                                                                                       
+      // preparing initial guess
       init_guess_mms_nd(Pup, Pdn, Qup, Qdn, i, solver_params);
       
       // inverting
@@ -452,3 +452,49 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
   }
   return(iteration_count);
 }
+
+int solve_mms_nd_plus(spinor ** const Pup, spinor ** const Pdn, 
+                      spinor * const Qup, spinor * const Qdn, 
+                      solver_params_t * solver_params){ 
+
+  int iteration_count = 0; 
+
+#ifdef DDalphaAMG
+  // With MG we can solve directly the unsquared operator
+  if( solver_params->type == MG ){
+    matrix_mult_nd f = Qtm_tau1_ndpsi_add_Ishift;
+    if( solver_params->M_ndpsi == Qsw_pm_ndpsi )
+      f = Qsw_tau1_ndpsi_add_Ishift;
+    for(int i = solver_params->no_shifts-1; i>=0; i--){
+      // preparing initial guess
+      init_guess_mms_nd_plus(Pup, Pdn, Qup, Qdn, i, solver_params);
+  
+      // g_shift = shift^2 and then in Qsw_tau1_ndpsi_add_Ishift the square root is taken
+      g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
+      iteration_count += MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_params->mms_squared_solver_prec[i],
+                                       solver_params->max_iter, solver_params->rel_prec, solver_params->sdim,
+                                       g_gauge_field, f );
+      g_shift = _default_g_shift;
+    }
+  } else 
+#endif
+  {
+    iteration_count = solve_mms_nd(Pup, Pdn, Qup, Qdn, solver_params);
+    
+    // apply operator for retrieving unsquared solution
+    matrix_mult_nd f = Qtm_tau1_ndpsi_sub_Ishift;
+    if( solver_params->M_ndpsi == Qsw_pm_ndpsi )
+      f = Qsw_tau1_ndpsi_sub_Ishift;
+    spinor** temp;
+    init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
+    for(int i = solver_params->no_shifts-1; i>=0; i--){
+      g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
+      f(temp[0],temp[1],Pup[i],Pdn[i]);
+      assign(Pup[i], temp[0], VOLUME/2);
+      assign(Pdn[i], temp[1], VOLUME/2);
+      g_shift = _default_g_shift;
+    }
+    finalize_solver(temp, 2);
+  }
+  return iteration_count;
+}

From e0b26532b540771f66c7d42c0191001eefac74a8 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 12 Dec 2017 14:23:48 +0200
Subject: [PATCH 35/85] Removing mms_squared_solver_prec. Not needed at the
 end.

---
 monomial/ndrat_monomial.c   |  6 -----
 solver/cg_mms_tm.c          | 24 ++++----------------
 solver/cg_mms_tm_nd.c       | 24 ++++----------------
 solver/monomial_solve.c     | 44 +++++--------------------------------
 solver/rg_mixed_cg_her_nd.c |  2 +-
 solver/solver_params.h      |  2 --
 6 files changed, 14 insertions(+), 88 deletions(-)

diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c
index 606131625..b6cc2adb9 100644
--- a/monomial/ndrat_monomial.c
+++ b/monomial/ndrat_monomial.c
@@ -245,9 +245,6 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) {
       assign_add_mul(mnl->pf2, g_chi_dn_spinor_field[j], I*mnl->rat.rnu[j], VOLUME/2);
   }
 
-  free(mnl->solver_params.mms_squared_solver_prec);
-  mnl->solver_params.mms_squared_solver_prec = NULL;
-
   etime = gettime();
   if(g_proc_id == 0) {
     if(g_debug_level > 1) {
@@ -304,9 +301,6 @@ double ndrat_acc(const int id, hamiltonian_field_t * const hf) {
   mnl->energy1 = scalar_prod_r(mnl->pf, mnl->w_fields[0], VOLUME/2, 1);
   mnl->energy1 += scalar_prod_r(mnl->pf2, mnl->w_fields[1], VOLUME/2, 1);
 
-  free(mnl->solver_params.mms_squared_solver_prec);
-  mnl->solver_params.mms_squared_solver_prec = NULL;
-
   etime = gettime();
   if(g_proc_id == 0) {
     if(g_debug_level > 1) {
diff --git a/solver/cg_mms_tm.c b/solver/cg_mms_tm.c
index 4372fba5d..4001ea9a9 100644
--- a/solver/cg_mms_tm.c
+++ b/solver/cg_mms_tm.c
@@ -72,16 +72,6 @@ int cg_mms_tm(spinor ** const P, spinor * const Q,
   double atime, etime;
   const int nr_sf = 3;
 
-  // if solver_params->mms_squared_solver_prec is NULL,
-  // filling it with solver_params->squared_solver_prec
-  double *mms_squared_solver_prec = NULL;
-  if (solver_params->mms_squared_solver_prec == NULL) {
-    mms_squared_solver_prec = (double*) malloc(solver_params->no_shifts*sizeof(double));
-    for (int i=0; i<solver_params->no_shifts; i++)
-      mms_squared_solver_prec[i] = solver_params->squared_solver_prec;
-    solver_params->mms_squared_solver_prec = mms_squared_solver_prec;
-  }
-
   atime = gettime();
   if(solver_params->sdim == VOLUME) {
     init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf);
@@ -157,8 +147,8 @@ int cg_mms_tm(spinor ** const P, spinor * const Q,
 	double sn = square_norm(ps_mms_solver[no_shifts-2], N, 1);
         err = alphas[no_shifts-1]*alphas[no_shifts-1]*sn;
         // while because more than one shift could be converged
-	while(((err <= solver_params->mms_squared_solver_prec[no_shifts-1]) && (solver_params->rel_prec == 0)) ||
-              ((err <= solver_params->mms_squared_solver_prec[no_shifts-1]*squarenorm) && (solver_params->rel_prec > 0))) {
+	while(((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0)) ||
+              ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0))) {
 	  no_shifts--;
 	  if(g_debug_level > 2 && g_proc_id == 0) {
 	    printf("# CGMMS: at iteration %d removed one shift, %d remaining\n", iteration, no_shifts);
@@ -186,8 +176,8 @@ int cg_mms_tm(spinor ** const P, spinor * const Q,
       printf("# CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
     }
 
-    if( ((err <= solver_params->mms_squared_solver_prec[0]) && (solver_params->rel_prec == 0) && no_shifts==1) ||
-        ((err <= solver_params->mms_squared_solver_prec[0]*squarenorm) && (solver_params->rel_prec > 0) && no_shifts==1) ||
+    if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0) && no_shifts==1) ||
+        ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0) && no_shifts==1) ||
         (iteration == solver_params->max_iter -1) ) {
         break;
     }
@@ -213,12 +203,6 @@ int cg_mms_tm(spinor ** const P, spinor * const Q,
     printf("# CGMMS (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_params->no_shifts, iteration, solver_params->squared_solver_prec, etime - atime); 
   }
 
-  // freeing mms_squared_solver_prec if it has been allocated
-  if(mms_squared_solver_prec != NULL) {
-    free(mms_squared_solver_prec);
-    solver_params->mms_squared_solver_prec = NULL;
-  }
-  
   finalize_solver(solver_field, nr_sf);
   return(iteration);
 }
diff --git a/solver/cg_mms_tm_nd.c b/solver/cg_mms_tm_nd.c
index 7bec82725..c1a1ff17a 100644
--- a/solver/cg_mms_tm_nd.c
+++ b/solver/cg_mms_tm_nd.c
@@ -74,16 +74,6 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
 
   if(g_proc_id == 0 && g_debug_level > 2) printf("# CGMMSND: solving %d shifts\n", shifts);
 
-  // if solver_params->mms_squared_solver_prec is NULL,
-  // filling it with solver_params->squared_solver_prec
-  double *mms_squared_solver_prec = NULL;
-  if (solver_params->mms_squared_solver_prec == NULL) {
-    mms_squared_solver_prec = (double*) malloc(solver_params->no_shifts*sizeof(double));
-    for (int i=0; i<solver_params->no_shifts; i++)
-      mms_squared_solver_prec[i] = solver_params->squared_solver_prec;
-    solver_params->mms_squared_solver_prec = mms_squared_solver_prec;
-  }
-
   atime = gettime();
   if(solver_params->sdim == VOLUME) {
     init_solver_field(&solver_field, VOLUMEPLUSRAND, 2*nr_sf);
@@ -171,8 +161,8 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
         double sn = square_norm(ps_mms_solver[2*(shifts-1)], N, 1);
         sn += square_norm(ps_mms_solver[2*(shifts-1)+1], N, 1);
         err = alphas[shifts-1]*alphas[shifts-1]*sn;
-	while(((err <= solver_params->mms_squared_solver_prec[shifts-1]) && (solver_params->rel_prec == 0)) ||
-              ((err <= solver_params->mms_squared_solver_prec[shifts-1]*squarenorm) && (solver_params->rel_prec > 0))) {
+	while(((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0)) ||
+              ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0))) {
           // for testing purpose
 	  if(g_debug_level > 3) {
 	    if (g_proc_id == 0) printf("# CGMMSND: residual of remaining shifts\n");
@@ -216,8 +206,8 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
       printf("# CGMMSND iteration: %d residue: %g\n", iteration, err); fflush( stdout );
     }
 
-    if( ((err <= solver_params->mms_squared_solver_prec[0]) && (solver_params->rel_prec == 0) && shifts==1) ||
-	((err <= solver_params->mms_squared_solver_prec[0]*squarenorm) && (solver_params->rel_prec > 0) && shifts==1) ||
+    if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0) && shifts==1) ||
+	((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0) && shifts==1) ||
         (iteration == solver_params->max_iter -1) ) {
       break;
     }
@@ -245,12 +235,6 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
     printf("# CGMMSND (%d shifts): iter: %d eps_sq: %1.4e %1.4e t/s\n", solver_params->no_shifts, iteration, solver_params->squared_solver_prec, etime - atime); 
   }
 
-  // freeing mms_squared_solver_prec if it has been allocated
-  if(mms_squared_solver_prec != NULL) {
-    free(mms_squared_solver_prec);
-    solver_params->mms_squared_solver_prec = NULL;
-  }
-  
   finalize_solver(solver_field, 2*nr_sf);
   return(iteration);
 }
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 6949d0675..eeb93162f 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -213,16 +213,6 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
   else if (solver_params->type == MG) {
     // if the mg_mms_mass is larger than the smallest shift we use MG
     if (mg_no_shifts > 0 || mg_mms_mass >= solver_params->shifts[0]) { 
-      // if solver_params->mms_squared_solver_prec is NULL,
-      // filling it with solver_params->squared_solver_prec
-      double *mms_squared_solver_prec = NULL;
-      if (solver_params->mms_squared_solver_prec == NULL) {
-        mms_squared_solver_prec = (double*) malloc(solver_params->no_shifts*sizeof(double));
-        for (int i=0; i<solver_params->no_shifts; i++)
-          mms_squared_solver_prec[i] = solver_params->squared_solver_prec;
-        solver_params->mms_squared_solver_prec = mms_squared_solver_prec;
-      }
-
       // if the mg_mms_mass is smaller than the larger shifts, we use CGMMS for those
       // in case mg_no_shifts is used, then mg_mms_mass = 0
       if(mg_mms_mass >= solver_params->shifts[0]) {
@@ -233,27 +223,20 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
       if (mg_no_shifts < no_shifts) {
         solver_params->no_shifts = no_shifts - mg_no_shifts;
         solver_params->shifts += mg_no_shifts;
-        solver_params->mms_squared_solver_prec += mg_no_shifts;
         iteration_count = cg_mms_tm( P+mg_no_shifts, Q, solver_params );
         // Restoring solver_params
         solver_params->no_shifts = no_shifts;
         solver_params->shifts -= mg_no_shifts;
-        solver_params->mms_squared_solver_prec -= mg_no_shifts;
       }
 
       for(int i = mg_no_shifts-1; i>=0; i--){
         // preparing initial guess                                                                                                                                                                       
         init_guess_mms(P, Q, i, solver_params);
         g_mu3 = solver_params->shifts[i]; 
-        iteration_count += MG_solver( P[i], Q, solver_params->mms_squared_solver_prec[i], solver_params->max_iter,
+        iteration_count += MG_solver( P[i], Q, solver_params->squared_solver_prec, solver_params->max_iter,
                                          solver_params->rel_prec, solver_params->sdim, g_gauge_field, solver_params->M_psi );
         g_mu3 = _default_g_mu3;
       }
-      // freeing mms_squared_solver_prec if it has been allocated
-      if(mms_squared_solver_prec != NULL) {
-        free(mms_squared_solver_prec);
-        solver_params->mms_squared_solver_prec = NULL;
-      }
     } else {
       iteration_count = cg_mms_tm( P, Q, solver_params );
     }
@@ -277,7 +260,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
       // inverting
       g_mu3 = solver_params->shifts[i]; 
       iter_local = rg_mixed_cg_her( P[i], Q, temp_params, solver_params->max_iter,
-                                    solver_params->mms_squared_solver_prec[i], solver_params->rel_prec, solver_params->sdim,
+                                    solver_params->squared_solver_prec, solver_params->rel_prec, solver_params->sdim,
                                     solver_params->M_psi, f32);
       g_mu3 = _default_g_mu3;
       if(iter_local == -1){
@@ -299,16 +282,6 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
                  solver_params_t * solver_params){ 
   int iteration_count = 0; 
 
-  // if solver_params->mms_squared_solver_prec is NULL,
-  // filling it with solver_params->squared_solver_prec
-  double *mms_squared_solver_prec = NULL;
-  if (solver_params->mms_squared_solver_prec == NULL) {
-    mms_squared_solver_prec = (double*) malloc(solver_params->no_shifts*sizeof(double));
-    for (int i=0; i<solver_params->no_shifts; i++)
-      mms_squared_solver_prec[i] = solver_params->squared_solver_prec;
-    solver_params->mms_squared_solver_prec = mms_squared_solver_prec;
-  }
-
 #ifdef TM_USE_QPHIX
   if(solver_params->external_inverter == QPHIX_INVERTER){
     spinor** temp;
@@ -386,12 +359,10 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
       if (mg_no_shifts < no_shifts) {
         solver_params->no_shifts = no_shifts - mg_no_shifts;
         solver_params->shifts += mg_no_shifts;
-        solver_params->mms_squared_solver_prec += mg_no_shifts;
         iteration_count = cg_mms_tm_nd( Pup+mg_no_shifts, Pdn+mg_no_shifts, Qup, Qdn, solver_params );
         // Restoring solver_params
         solver_params->no_shifts = no_shifts;
         solver_params->shifts -= mg_no_shifts;
-        solver_params->mms_squared_solver_prec -= mg_no_shifts;
       }
 
       matrix_mult_nd f = Qtm_pm_ndpsi_shift;
@@ -403,7 +374,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
         init_guess_mms_nd(Pup, Pdn, Qup, Qdn, i, solver_params);
         
         g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
-        iteration_count += MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_params->mms_squared_solver_prec[i], solver_params->max_iter,
+        iteration_count += MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_params->squared_solver_prec, solver_params->max_iter,
                                          solver_params->rel_prec, solver_params->sdim, g_gauge_field, f );
         g_shift = _default_g_shift;
       }
@@ -432,7 +403,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
       // inverting
       g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
       iter_local = rg_mixed_cg_her_nd( Pup[i], Pdn[i], Qup, Qdn, temp_params, solver_params->max_iter,
-                                       solver_params->mms_squared_solver_prec[i], solver_params->rel_prec, solver_params->sdim, f, f32);
+                                       solver_params->squared_solver_prec, solver_params->rel_prec, solver_params->sdim, f, f32);
       g_shift = _default_g_shift;
       if(iter_local == -1){
         return(-1);
@@ -445,11 +416,6 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     exit(2);      
   }
 
-  // freeing mms_squared_solver_prec if it has been allocated
-  if(mms_squared_solver_prec != NULL) {
-    free(mms_squared_solver_prec);
-    solver_params->mms_squared_solver_prec = NULL;
-  }
   return(iteration_count);
 }
 
@@ -471,7 +437,7 @@ int solve_mms_nd_plus(spinor ** const Pup, spinor ** const Pdn,
   
       // g_shift = shift^2 and then in Qsw_tau1_ndpsi_add_Ishift the square root is taken
       g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
-      iteration_count += MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_params->mms_squared_solver_prec[i],
+      iteration_count += MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_params->squared_solver_prec,
                                        solver_params->max_iter, solver_params->rel_prec, solver_params->sdim,
                                        g_gauge_field, f );
       g_shift = _default_g_shift;
diff --git a/solver/rg_mixed_cg_her_nd.c b/solver/rg_mixed_cg_her_nd.c
index b39c5d406..e2541d163 100644
--- a/solver/rg_mixed_cg_her_nd.c
+++ b/solver/rg_mixed_cg_her_nd.c
@@ -267,7 +267,7 @@ int rg_mixed_cg_her_nd(spinor * const P_up, spinor * const P_dn, spinor * const
 
   assign_to_32(r_up,rhigh_up,N); assign_to_32(r_dn,rhigh_dn,N);
   rho_sp = rho_dp;
-  assign_32(p_up,r_up,N); assign_32(p_dn,r_dn,N); 
+  assign_32(p_up,r_up,N); assign_32(p_dn,r_dn,N);
 
   iter_in_sp += inner_loop(x_up, x_dn, p_up, p_dn, q_up, q_dn, r_up, r_dn, &rho_sp, delta, 
                            f32, (float)target_eps_sq, 
diff --git a/solver/solver_params.h b/solver/solver_params.h
index 40405667d..fac350515 100644
--- a/solver/solver_params.h
+++ b/solver/solver_params.h
@@ -89,8 +89,6 @@ typedef struct {
   matrix_mult_nd32 M_ndpsi32;  
   // pointer to array of shifts
   double * shifts;
-  // squared desired residue for each shift in mms. If NULL use squared_solver_prec for all
-  double * mms_squared_solver_prec;
   
   solution_type_t solution_type;
   

From 0e5c2b668bcd9b14f35eb9f04d4c3d7869d713e9 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 12 Dec 2017 14:30:26 +0200
Subject: [PATCH 36/85] Done FIXME

---
 solver/monomial_solve.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index eeb93162f..7b66b7b91 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -193,14 +193,15 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
       mul_gamma5(P[shift], VOLUME/2);
     }
     if(g_debug_level > 0){
-      // FIXME: in the shift-by-shift branch, the shifted operator exists explicitly and could be used to 
-      // truly check the residual here
-      solver_params->M_psi(temp[0], P[0]);
-      diff(temp[0], temp[0], Q, VOLUME/2);
-      double diffnorm = square_norm(temp[0], VOLUME/2, 1); 
-      if( g_proc_id == 0 ){
-        printf("# solve_mshift_oneflavour residual check: %e\n", diffnorm);
-        printf("# NOTE that this currently repors the residual for the *unishfted* operator!\n");
+      for(int i = mg_no_shifts-1; i>=0; i--){
+        g_mu3 = solver_params->shifts[i]; 
+        solver_params->M_psi(temp[0], P[i]);
+        g_mu3 = _default_g_mu3;
+        diff(temp[0], temp[0], Q, VOLUME/2);
+        double diffnorm = square_norm(temp[0], VOLUME/2, 1); 
+        if( g_proc_id == 0 ){
+          printf("# solve_mms_tm residual check: shift %d, res. %e\n", i, diffnorm);
+        }
       }
     }
     finalize_solver(temp, 1);

From 777edf46709c615c915ca4384c6468e76830a296 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 12 Dec 2017 14:34:00 +0200
Subject: [PATCH 37/85] Missed declaration of solve_mms_nd_plus

---
 solver/monomial_solve.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/solver/monomial_solve.h b/solver/monomial_solve.h
index 776fb2129..6a42c4558 100644
--- a/solver/monomial_solve.h
+++ b/solver/monomial_solve.h
@@ -33,5 +33,8 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
 int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn, 
                  spinor * const Qup, spinor * const Qdn, 
                  solver_params_t * solver_params);
+int solve_mms_nd_plus(spinor ** const Pup, spinor ** const Pdn, 
+                      spinor * const Qup, spinor * const Qdn, 
+                      solver_params_t * solver_params);
 
 #endif

From 5d7ef13ad575ff3dfc9dd670c7d3891ae471f15d Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 12 Dec 2017 14:41:53 +0200
Subject: [PATCH 38/85] Done FIXME part 2

---
 solver/monomial_solve.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 7b66b7b91..c6fea6ce1 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -191,11 +191,9 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
                                                         solver_params->compression_type);
     for( int shift = 0; shift < solver_params->no_shifts; shift++){
       mul_gamma5(P[shift], VOLUME/2);
-    }
-    if(g_debug_level > 0){
-      for(int i = mg_no_shifts-1; i>=0; i--){
-        g_mu3 = solver_params->shifts[i]; 
-        solver_params->M_psi(temp[0], P[i]);
+      if(g_debug_level > 0){
+        g_mu3 = solver_params->shifts[shift]; 
+        solver_params->M_psi(temp[0], P[shift]);
         g_mu3 = _default_g_mu3;
         diff(temp[0], temp[0], Q, VOLUME/2);
         double diffnorm = square_norm(temp[0], VOLUME/2, 1); 
@@ -305,17 +303,20 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     for( int shift = 0; shift < solver_params->no_shifts; shift++){
       mul_r_gamma5(Pup[shift], maxev_sq, VOLUME/2);
       mul_r_gamma5(Pdn[shift], maxev_sq, VOLUME/2);
-    }
-    if( g_debug_level > 0 ){
-      // FIXME: in the shift-by-shift branch, the shifted operator exists explicitly and could be used to 
-      // truly check the residual here
-      solver_params->M_ndpsi(temp[0], temp[1], Pup[0], Pdn[0]);
-      diff(temp[0], temp[0], Qup, VOLUME/2);
-      diff(temp[1], temp[1], Qdn, VOLUME/2);
-      double diffnorm = square_norm(temp[0], VOLUME/2, 1) + square_norm(temp[1], VOLUME/2, 1); 
-      if( g_proc_id == 0 ){
-        printf("# solve_mms_nd residual check: %e\n", diffnorm);
-        printf("# NOTE that this currently repors the residual for the *unishfted* operator!\n");
+      if( g_debug_level > 0 ){
+        matrix_mult_nd f = Qtm_pm_ndpsi_shift;
+        if( solver_params->M_ndpsi == Qsw_pm_ndpsi ) 
+          f = Qsw_pm_ndpsi_shift;
+        g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
+        f(temp[0], temp[1], Pup[shift], Pdn[shift]);
+        g_shift = _default_g_shift;
+        diff(temp[0], temp[0], Qup, VOLUME/2);
+        diff(temp[1], temp[1], Qdn, VOLUME/2);
+        double diffnorm = square_norm(temp[0], VOLUME/2, 1) + square_norm(temp[1], VOLUME/2, 1); 
+        if( g_proc_id == 0 ){
+          printf("# solve_mms_nd residual check: %e\n", diffnorm);
+          printf("# NOTE that this currently repors the residual for the *unishfted* operator!\n");
+        }
       }
     }
     finalize_solver(temp, 2);

From 578bfbb317dd454e34993574ad5c6435724695d4 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 12 Dec 2017 14:45:00 +0200
Subject: [PATCH 39/85] Removing spurious changings

---
 solver/cg_mms_tm.c    | 4 ++--
 solver/cg_mms_tm_nd.c | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/solver/cg_mms_tm.c b/solver/cg_mms_tm.c
index 4001ea9a9..a1616affb 100644
--- a/solver/cg_mms_tm.c
+++ b/solver/cg_mms_tm.c
@@ -176,8 +176,8 @@ int cg_mms_tm(spinor ** const P, spinor * const Q,
       printf("# CGMMS iteration: %d residue: %g\n", iteration, err); fflush( stdout );
     }
 
-    if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0) && no_shifts==1) ||
-        ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0) && no_shifts==1) ||
+    if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0)) ||
+        ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0)) ||
         (iteration == solver_params->max_iter -1) ) {
         break;
     }
diff --git a/solver/cg_mms_tm_nd.c b/solver/cg_mms_tm_nd.c
index c1a1ff17a..3134da49f 100644
--- a/solver/cg_mms_tm_nd.c
+++ b/solver/cg_mms_tm_nd.c
@@ -201,13 +201,13 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
     /* Check whether the precision eps_sq is reached */
 
     err = square_norm(solver_field[0], N, 1) + square_norm(solver_field[1], N, 1);
-    
+
     if(g_debug_level > 2 && g_proc_id == g_stdio_proc) {
       printf("# CGMMSND iteration: %d residue: %g\n", iteration, err); fflush( stdout );
     }
 
-    if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0) && shifts==1) ||
-	((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0) && shifts==1) ||
+    if( ((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0)) ||
+	((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0)) ||
         (iteration == solver_params->max_iter -1) ) {
       break;
     }

From 4d61d9b6c8115de200a39e03f9f59b7d4bb1d954 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 12 Dec 2017 15:08:03 +0200
Subject: [PATCH 40/85] Fixing indentation and removing spurios changings

---
 monomial/ndrat_monomial.c    | 11 +++-------
 monomial/ndratcor_monomial.c |  3 ---
 monomial/rat_monomial.c      |  2 +-
 operator/tm_operators_nd.h   | 40 ++++++++++++++++++------------------
 4 files changed, 24 insertions(+), 32 deletions(-)

diff --git a/monomial/ndrat_monomial.c b/monomial/ndrat_monomial.c
index b6cc2adb9..d3a84299b 100644
--- a/monomial/ndrat_monomial.c
+++ b/monomial/ndrat_monomial.c
@@ -100,7 +100,6 @@ void ndrat_derivative(const int id, hamiltonian_field_t * const hf) {
   mnl->solver_params.shifts = mnl->rat.mu;
   mnl->solver_params.rel_prec = g_relative_precision_flag;
   mnl->solver_params.type = mnl->solver; 
-
   mnl->solver_params.M_ndpsi = &Qtm_pm_ndpsi;
   mnl->solver_params.M_ndpsi32 = &Qtm_pm_ndpsi_32;    
   if(mnl->type == NDCLOVERRAT) {
@@ -231,14 +230,12 @@ void ndrat_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
   mnl->solver_params.sdim = VOLUME/2;
   mnl->solver_params.rel_prec = g_relative_precision_flag;
-
-  // this generates all X_j,o (odd sites only) -> g_chi_up|dn_spinor_field
   mnl->iter0 = solve_mms_nd_plus(g_chi_up_spinor_field, g_chi_dn_spinor_field,
                                  mnl->pf, mnl->pf2, &(mnl->solver_params) );
-  
+
   assign(mnl->w_fields[2], mnl->pf, VOLUME/2);
   assign(mnl->w_fields[3], mnl->pf2, VOLUME/2);
-    
+
   // apply C to the random field to generate pseudo-fermion fields
   for(int j = (mnl->rat.np-1); j > -1; j--) {
       assign_add_mul(mnl->pf, g_chi_up_spinor_field[j], I*mnl->rat.rnu[j], VOLUME/2);
@@ -284,8 +281,7 @@ double ndrat_acc(const int id, hamiltonian_field_t * const hf) {
   }
   mnl->solver_params.sdim = VOLUME/2;
   mnl->solver_params.rel_prec = g_relative_precision_flag;
-
-  mnl->iter0 = solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
+  mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
                             mnl->pf, mnl->pf2, &(mnl->solver_params) );
 
   // apply R to the pseudo-fermion fields
@@ -300,7 +296,6 @@ double ndrat_acc(const int id, hamiltonian_field_t * const hf) {
 
   mnl->energy1 = scalar_prod_r(mnl->pf, mnl->w_fields[0], VOLUME/2, 1);
   mnl->energy1 += scalar_prod_r(mnl->pf2, mnl->w_fields[1], VOLUME/2, 1);
-
   etime = gettime();
   if(g_proc_id == 0) {
     if(g_debug_level > 1) {
diff --git a/monomial/ndratcor_monomial.c b/monomial/ndratcor_monomial.c
index 655fe7b13..01f9b69c5 100644
--- a/monomial/ndratcor_monomial.c
+++ b/monomial/ndratcor_monomial.c
@@ -160,7 +160,6 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
       up1 = tup; dn1 = tdn;
     }
   }
-
   etime = gettime();
   if(g_proc_id == 0) {
     if(g_debug_level > 1) {
@@ -253,7 +252,6 @@ void apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
 		     solver_params_t * solver_params) {
   monomial * mnl = &monomial_list[id];
 
-  // apply R to the pseudo-fermion fields
   mnl->iter0 += solve_mms_nd(g_chi_up_spinor_field, g_chi_dn_spinor_field,
 			                       l_up, l_dn, solver_params);  
   
@@ -277,7 +275,6 @@ void apply_Z_ndpsi(spinor * const k_up, spinor * const k_dn,
     assign_add_mul_r(k_dn, g_chi_dn_spinor_field[j], 
 		     mnl->rat.rmu[j], VOLUME/2);
   }
-
   mul_r(g_chi_up_spinor_field[mnl->rat.np], mnl->rat.A*mnl->rat.A, 
 	k_up, VOLUME/2);
   mul_r(g_chi_dn_spinor_field[mnl->rat.np], mnl->rat.A*mnl->rat.A, 
diff --git a/monomial/rat_monomial.c b/monomial/rat_monomial.c
index 991002648..c9d807204 100644
--- a/monomial/rat_monomial.c
+++ b/monomial/rat_monomial.c
@@ -90,7 +90,7 @@ void rat_derivative(const int id, hamiltonian_field_t * const hf) {
   mnl->solver_params.sdim = VOLUME/2;
   // this generates all X_j,o (odd sites only) -> g_chi_up_spinor_field
   mnl->iter1 += solve_mms_tm(g_chi_up_spinor_field, mnl->pf,
-                                        &(mnl->solver_params) );
+                             &(mnl->solver_params) );
   
   for(int j = (mnl->rat.np-1); j > -1; j--) {
     mnl->Qp(mnl->w_fields[0], g_chi_up_spinor_field[j]);
diff --git a/operator/tm_operators_nd.h b/operator/tm_operators_nd.h
index e7a9dc755..138e9b93b 100644
--- a/operator/tm_operators_nd.h
+++ b/operator/tm_operators_nd.h
@@ -23,8 +23,8 @@
 #define _TM_OPERATTORS_ND_H
 
 void mul_one_pm_itau2(spinor * const p, spinor * const q,
-          spinor * const r, spinor * const s,
-          const double sign, const int N);
+                      spinor * const r, spinor * const s,
+                      const double sign, const int N);
 
 void M_full_ndpsi(spinor * const Even_new_s, spinor * const Odd_new_s, 
                   spinor * const Even_new_c, spinor * const Odd_new_c, 
@@ -41,9 +41,9 @@ void D_ndpsi(spinor * const l_strange, spinor * const l_charm,
              spinor * const k_strange,  spinor * const k_charm);
 
 void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
-         spinor * const k_strange,  spinor * const k_charm);
+               spinor * const k_strange,  spinor * const k_charm);
 void Qsw_ndpsi(spinor * const l_strange, spinor * const l_charm,
-         spinor * const k_strange, spinor * const k_charm);
+               spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_tau1_ndpsi_add_Ishift(spinor * const l_strange, spinor * const l_charm,
                                spinor * const k_strange,  spinor * const k_charm);
@@ -56,49 +56,49 @@ void Qsw_tau1_ndpsi_sub_Ishift(spinor * const l_strange, spinor * const l_charm,
 
 
 void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
-          spinor * const k_strange, spinor * const k_charm);
+                      spinor * const k_strange, spinor * const k_charm);
 void Qsw_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
-          spinor * const k_strange, spinor * const k_charm);
+                      spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
                   spinor * const k_strange, spinor * const k_charm);
 void Qtm_pm_ndpsi_shift(spinor * const l_strange, spinor * const l_charm,
-      spinor * const k_strange, spinor * const k_charm);
+                        spinor * const k_strange, spinor * const k_charm);
 
 void Qsw_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
-      spinor * const k_strange, spinor * const k_charm);
+                  spinor * const k_strange, spinor * const k_charm);
 void Qsw_pm_ndpsi_shift(spinor * const l_strange, spinor * const l_charm,
-      spinor * const k_strange, spinor * const k_charm);
+                        spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k);
 void Qsw_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k);
 
 void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
-          spinor * const k_strange, spinor * const k_charm, 
-          const _Complex double z, const double Cpol, const double invev);
+                            spinor * const k_strange, spinor * const k_charm, 
+                            const _Complex double z, const double Cpol, const double invev);
 void Qsw_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
-            spinor * const k_strange, spinor * const k_charm, 
-            const _Complex double z, const double Cpol, const double invev);
+                              spinor * const k_strange, spinor * const k_charm, 
+                              const _Complex double z, const double Cpol, const double invev);
 
 void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-             spinor * const k_strange, spinor * const k_charm, 
-       const int ieo);
+                   spinor * const k_strange, spinor * const k_charm, 
+                   const int ieo);
 void H_eo_sw_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-       spinor * const k_strange, spinor * const k_charm);
+                   spinor * const k_strange, spinor * const k_charm);
 
 
 void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-        spinor * const k_strange, spinor * const k_charm,
-        const double mu, const double eps);
+                    spinor * const k_strange, spinor * const k_charm,
+                    const double mu, const double eps);
 
 void Msw_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-          spinor * const k_strange, spinor * const k_charm);
+                      spinor * const k_strange, spinor * const k_charm);
 
 void Q_test_epsilon(spinor * const l_strange, spinor * const l_charm,
                     spinor * const k_strange, spinor * const k_charm);
 
 void Qtau1_P_ndpsi(spinor * const l_strange, spinor * const l_charm,
-    spinor * const k_strange, spinor * const k_charm);
+                   spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_pm_Ptm_pm_psi(spinor * const l, spinor * const k);
 

From a244d08ed4509778e444b5b6a4712932fafa2d86 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 12 Dec 2017 15:15:32 +0200
Subject: [PATCH 41/85] Replacing tab with spaces

---
 DDalphaAMG_interface.c | 212 ++++++++++++++++++++---------------------
 1 file changed, 106 insertions(+), 106 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 7abf9c7ca..e5c6bf9fd 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -67,16 +67,16 @@ void MG_finalize(void) {
 }
 
 int MG_solver(spinor * const phi_new, spinor * const phi_old,
-	      const double precision, const int max_iter,const int rel_prec,
-	      const int N, su3 **gf, matrix_mult f) {
+              const double precision, const int max_iter,const int rel_prec,
+              const int N, su3 **gf, matrix_mult f) {
     printf("ERROR: MG_solver called but DDalphaAMG library not included.\n");
     exit(1);
 }
 
 int MG_solver_eo(spinor * const Even_new, spinor * const Odd_new,
-		 spinor * const Even, spinor * const Odd,
-		 const double precision, const int max_iter, const int rel_prec,
-		 const int N, su3 **gf, matrix_mult_full f_full) {
+                 spinor * const Even, spinor * const Odd,
+                 const double precision, const int max_iter, const int rel_prec,
+                 const int N, su3 **gf, matrix_mult_full f_full) {
     printf("ERROR: MG_solver_eo called but DDalphaAMG library not included.\n");
     exit(1);
 }
@@ -207,7 +207,7 @@ static inline int MG_check(spinor * const phi_new, spinor * const phi_old, const
 }
 
 static inline int MG_check_nd( spinor * const up_new, spinor * const dn_new, spinor * const up_old, spinor * const dn_old,
-			const int N, const double precision, matrix_mult_nd f) 
+                               const int N, const double precision, matrix_mult_nd f) 
 {
   double differ[2], residual;
   spinor ** check_vect = NULL;
@@ -328,7 +328,7 @@ static int MG_pre_solve( su3 **gf )
   if (mg_do_setup==1) {
     if( mg_setup_mu_set ) {
       if (g_proc_id == 0)
-	printf("DDalphaAMG using mu=%f during setup\n", mg_setup_mu);
+        printf("DDalphaAMG using mu=%f during setup\n", mg_setup_mu);
       MG_update_mu(mg_setup_mu, 0); 
     } else
       MG_update_mu(g_mu, 0);
@@ -337,7 +337,7 @@ static int MG_pre_solve( su3 **gf )
     DDalphaAMG_setup(&mg_status);
     mg_do_setup = 0;
     mg_tau = gauge_tau;
-    if (mg_status.success && g_proc_id == 0)	
+    if (mg_status.success && g_proc_id == 0)        
       printf("DDalphaAMG setup ran, time %.2f sec (%.2f %% on coarse grid)\n",
              mg_status.time, 100.*(mg_status.coarse_time/mg_status.time));
     else if ( g_proc_id == 0)
@@ -347,7 +347,7 @@ static int MG_pre_solve( su3 **gf )
   if (mg_update_setup>0) {
     if( mg_setup_mu_set ) {
       if (g_proc_id == 0)
-	printf("DDalphaAMG using mu=%f during setup\n", mg_setup_mu);
+        printf("DDalphaAMG using mu=%f during setup\n", mg_setup_mu);
       MG_update_mu(mg_setup_mu, 0); 
     } else
       MG_update_mu(g_mu, 0);
@@ -356,9 +356,9 @@ static int MG_pre_solve( su3 **gf )
     DDalphaAMG_update_setup(mg_update_setup, &mg_status);
     mg_update_setup = 0;
     mg_tau = gauge_tau;
-    if (mg_status.success && g_proc_id == 0)	
+    if (mg_status.success && g_proc_id == 0)        
       printf("DDalphaAMG setup ran, time %.2f sec (%.2f %% on coarse grid)\n",
-	     mg_status.time, 100.*(mg_status.coarse_time/mg_status.time));
+             mg_status.time, 100.*(mg_status.coarse_time/mg_status.time));
     else if ( g_proc_id == 0)
       printf("ERROR: setup updating did not run correctly");
   }
@@ -367,7 +367,7 @@ static int MG_pre_solve( su3 **gf )
 }
 
 static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double precision,
-		    const int N, matrix_mult f)
+                    const int N, matrix_mult f)
 {
   
   // for rescaling  convention in DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> rescale by 1/4+m
@@ -391,24 +391,24 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
   
   // Checking if the operator is in the list and compatible with N
   if (      f == Msw_psi ||       //          Schur complement with mu=0 on odd sites
-	    f == Qsw_psi ||       // Gamma5 - Schur complement with mu=0 on odd sites
-	    f == Mtm_plus_psi ||  //          Schur complement with plus mu 
-	    f == Msw_plus_psi ||  //          Schur complement with plus mu
-	    f == Qtm_plus_psi ||  // Gamma5 - Schur complement with plus mu 
-	    f == Qsw_plus_psi ||  // Gamma5 - Schur complement with plus mu
-	    f == Mtm_minus_psi || //          Schur complement with minus mu 
-	    f == Msw_minus_psi || //          Schur complement with minus mu
-	    f == Qtm_minus_psi || // Gamma5 - Schur complement with minus mu 
-	    f == Qsw_minus_psi || // Gamma5 - Schur complement with minus mu
-	    f == Qtm_pm_psi ||    //          Schur complement squared
-	    f == Qsw_pm_psi ) {   //          Schur complement squared
+            f == Qsw_psi ||       // Gamma5 - Schur complement with mu=0 on odd sites
+            f == Mtm_plus_psi ||  //          Schur complement with plus mu 
+            f == Msw_plus_psi ||  //          Schur complement with plus mu
+            f == Qtm_plus_psi ||  // Gamma5 - Schur complement with plus mu 
+            f == Qsw_plus_psi ||  // Gamma5 - Schur complement with plus mu
+            f == Mtm_minus_psi || //          Schur complement with minus mu 
+            f == Msw_minus_psi || //          Schur complement with minus mu
+            f == Qtm_minus_psi || // Gamma5 - Schur complement with minus mu 
+            f == Qsw_minus_psi || // Gamma5 - Schur complement with minus mu
+            f == Qtm_pm_psi ||    //          Schur complement squared
+            f == Qsw_pm_psi ) {   //          Schur complement squared
     if( N != VOLUME/2 && g_proc_id == 0 )
       printf("WARNING: expected N == VOLUME/2 for the required operator in MG_solve. Continuing with N == VOLUME\n");
   }
   else if ( f == D_psi ||         //          Full operator    with plus mu
-	    f == Q_plus_psi ||    // Gamma5 - Full operator    with plus mu 
-	    f == Q_minus_psi ||   // Gamma5 - Full operator    with minus mu
-	    f == Q_pm_psi ||      //          Full operator    squared
+            f == Q_plus_psi ||    // Gamma5 - Full operator    with plus mu 
+            f == Q_minus_psi ||   // Gamma5 - Full operator    with minus mu
+            f == Q_pm_psi ||      //          Full operator    squared
             f == Qsw_full_plus_psi || // Gamma5 - Full operator    with plus mu
             f == Qsw_full_minus_psi|| //Gamma5 - Full operator    with plus mu
             f == Qsw_full_pm_psi   || //          Full operator    squared
@@ -418,43 +418,43 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
   }
   else if( g_proc_id == 0 )
     printf("WARNING: required operator unknown for MG_solve. Using standard operator: %s.\n",
-	   N==VOLUME?"D_psi":"Msw_plus_psi");
+           N==VOLUME?"D_psi":"Msw_plus_psi");
 
   // Setting mu
   if (      f == Msw_psi ||       //          Schur complement with mu=0 on odd sites
-	    f == Qsw_psi )        // Gamma5 - Schur complement with mu=0 on odd sites
+            f == Qsw_psi )        // Gamma5 - Schur complement with mu=0 on odd sites
     MG_update_mu(g_mu, -g_mu);
   else if ( f == Mtm_minus_psi || //          Schur complement with minus mu 
-	    f == Msw_minus_psi || //          Schur complement with minus mu
-	    f == Qtm_minus_psi || // Gamma5 - Schur complement with minus mu 
-	    f == Qsw_minus_psi || // Gamma5 - Schur complement with minus mu
+            f == Msw_minus_psi || //          Schur complement with minus mu
+            f == Qtm_minus_psi || // Gamma5 - Schur complement with minus mu 
+            f == Qsw_minus_psi || // Gamma5 - Schur complement with minus mu
             f == Qsw_full_minus_psi|| //Gamma5 - Full operator    with plus mu
             f == Msw_full_minus_psi|| //         Full operator    with minus mu
-	    f == Q_minus_psi )    // Gamma5 - Full operator    with minus mu
+            f == Q_minus_psi )    // Gamma5 - Full operator    with minus mu
     MG_update_mu(-g_mu, -g_mu3);
   else if ( f == Mtm_plus_psi ||  //          Schur complement with plus mu 
-	    f == Msw_plus_psi ||  //          Schur complement with plus mu
-	    f == Qtm_plus_psi ||  // Gamma5 - Schur complement with plus mu 
-	    f == Qsw_plus_psi ||  // Gamma5 - Schur complement with plus mu
-	    f == D_psi ||         //          Full operator    with plus mu
-	    f == Q_plus_psi ||    // Gamma5 - Full operator    with plus mu 
-	    f == Qtm_pm_psi ||    //          Schur complement squared
-	    f == Qsw_pm_psi ||    //          Schur complement squared
+            f == Msw_plus_psi ||  //          Schur complement with plus mu
+            f == Qtm_plus_psi ||  // Gamma5 - Schur complement with plus mu 
+            f == Qsw_plus_psi ||  // Gamma5 - Schur complement with plus mu
+            f == D_psi ||         //          Full operator    with plus mu
+            f == Q_plus_psi ||    // Gamma5 - Full operator    with plus mu 
+            f == Qtm_pm_psi ||    //          Schur complement squared
+            f == Qsw_pm_psi ||    //          Schur complement squared
             f == Qsw_full_plus_psi || // Gamma5 - Full operator    with plus mu
             f == Qsw_full_pm_psi   || //          Full operator    squared
-	    f == Q_pm_psi )       //          Full operator    squared
+            f == Q_pm_psi )       //          Full operator    squared
     MG_update_mu(g_mu, g_mu3); 
   else
     MG_update_mu(g_mu, g_mu3); 
 
   //Solving
   if (      f == Qtm_plus_psi ||  // Gamma5 - Schur complement with plus mu 
-	    f == Qsw_plus_psi ||  // Gamma5 - Schur complement with plus mu
-	    f == Qtm_minus_psi || // Gamma5 - Schur complement with minus mu 
-	    f == Qsw_minus_psi || // Gamma5 - Schur complement with minus mu 
-	    f == Qsw_psi ||       // Gamma5 - Schur complement with mu=0 on odd sites
-	    f == Q_plus_psi ||    // Gamma5 - Full operator    with plus mu 
-	    f == Q_minus_psi ||   // Gamma5 - Full operator    with minus mu
+            f == Qsw_plus_psi ||  // Gamma5 - Schur complement with plus mu
+            f == Qtm_minus_psi || // Gamma5 - Schur complement with minus mu 
+            f == Qsw_minus_psi || // Gamma5 - Schur complement with minus mu 
+            f == Qsw_psi ||       // Gamma5 - Schur complement with mu=0 on odd sites
+            f == Q_plus_psi ||    // Gamma5 - Full operator    with plus mu 
+            f == Q_minus_psi ||   // Gamma5 - Full operator    with minus mu
             f == Qsw_full_plus_psi || // Gamma5 - Full operator    with plus mu
             f == Qsw_full_minus_psi|| //Gamma5 - Full operator    with plus mu
             f == Qsw_full_pm_psi ) {  //          Full operator    squared
@@ -464,7 +464,7 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
       mul_gamma5((spinor *const) old, VOLUME);
   }
   else if ( f == Qtm_pm_psi ||    //          Schur complement squared
-	    f == Qsw_pm_psi ) {   //          Schur complement squared
+            f == Qsw_pm_psi ) {   //          Schur complement squared
     mg_scale *= mg_scale;
     DDalphaAMG_solve_squared_odd( new, old, precision, &mg_status );
   }
@@ -473,11 +473,11 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
     DDalphaAMG_solve_squared( new, old, precision, &mg_status );
   }
   else if ( f == Mtm_plus_psi ||  //          Schur complement with plus mu 
-	    f == Msw_plus_psi ||  //          Schur complement with plus mu
-	    f == Mtm_minus_psi || //          Schur complement with minus mu 
-	    f == Msw_minus_psi || //          Schur complement with minus mu
-	    f == Msw_psi ||       //          Schur complement with mu=0 on odd sites
-	    f == D_psi ||         //          Full operator    with plus mu
+            f == Msw_plus_psi ||  //          Schur complement with plus mu
+            f == Mtm_minus_psi || //          Schur complement with minus mu 
+            f == Msw_minus_psi || //          Schur complement with minus mu
+            f == Msw_psi ||       //          Schur complement with mu=0 on odd sites
+            f == D_psi ||         //          Full operator    with plus mu
             f == Msw_full_minus_psi) {//         Full operator    with minus mu
     DDalphaAMG_solve( new, old, precision, &mg_status );
   }
@@ -493,7 +493,7 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
 
   if (g_proc_id == 0) {
     printf("Solving time %.2f sec (%.1f %% on coarse grid)\n", mg_status.time,
-	   100.*(mg_status.coarse_time/mg_status.time));
+           100.*(mg_status.coarse_time/mg_status.time));
     printf("Total iterations on fine grid %d\n", mg_status.iter_count);
     printf("Total iterations on coarse grids %d\n", mg_status.coarse_iter_count);
     if (!mg_status.success) 
@@ -504,7 +504,7 @@ static int MG_solve(spinor * const phi_new, spinor * const phi_old, const double
 }
 
 static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old, spinor * const dn_old,
-			const double precision, const int N, matrix_mult_nd f)
+                        const double precision, const int N, matrix_mult_nd f)
 {
   
   // for rescaling  convention in DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> rescale by 1/4+m
@@ -535,9 +535,9 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
   // In case of initial guess and squared operator, we do the inversion in two step and we need two more vectors
   if ( init_guess && (
             f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
-	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
-	    f == Qsw_pm_ndpsi_shift ))  // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+            f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
+            f == Qsw_pm_ndpsi_shift ))  // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     no_solver_field += 2;
 
   // Allocating and assigning fields
@@ -558,9 +558,9 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
 
   if ( init_guess && (
             f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
-	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
-	    f == Qsw_pm_ndpsi_shift )) {// (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+            f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
+            f == Qsw_pm_ndpsi_shift )) {// (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     new1tmp = solver_field[assign_solver_field++];
     new2tmp = solver_field[assign_solver_field++];
   }
@@ -587,9 +587,9 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
 
     /* Reconstruct the even sites                */
     if (    f == Qtm_pm_ndpsi       ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
-	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi       ||  // (Gamma5 Dh tau1)^2 - Schur complement squared
-	    f == Qsw_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+            f == Qsw_pm_ndpsi       ||  // (Gamma5 Dh tau1)^2 - Schur complement squared
+            f == Qsw_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
             f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
             f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
             f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
@@ -634,9 +634,9 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
     }
 
     if (    f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
-	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
-	    f == Qsw_pm_ndpsi_shift ){  // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+            f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
+            f == Qsw_pm_ndpsi_shift ){  // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
 
       // tau1 exchange new1tmp <-> new2tmp
       convert_odd_to_lexic( new2tmp, tmp11);
@@ -671,17 +671,17 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
 
   // Checking if the operator is in the list and compatible with N
   if (      f == Qtm_ndpsi ||           //  Gamma5 Dh    - Schur complement with csw = 0
-	    f == Qsw_ndpsi ||           //  Gamma5 Dh    - Schur complement
-	    f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar and csw = 0
-	    f == Qsw_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar
+            f == Qsw_ndpsi ||           //  Gamma5 Dh    - Schur complement
+            f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar and csw = 0
+            f == Qsw_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar
             f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
             f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
             f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
             f == Qsw_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with minus shift
-	    f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
-	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
-	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+            f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
+            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+            f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
+            f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     if( N != VOLUME/2 && g_proc_id == 0 )
       printf("WARNING: expected N == VOLUME/2 for the required operator in MG_solve. Continuing with N == VOLUME\n");
   }
@@ -691,11 +691,11 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
   }
   else if( g_proc_id == 0 )
     printf("WARNING: required operator unknown for MG_solve. Using standard operator: %s.\n",
-	   N==VOLUME?"":"Qsw_ndpsi");
+           N==VOLUME?"":"Qsw_ndpsi");
 
   // Setting mu and eps
   if (      f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+            f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     MG_update_mubar_epsbar( g_mubar, g_epsbar, sqrt(g_shift) );
   else if ( f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
             f == Qsw_tau1_ndpsi_add_Ishift )  // Gamma5 Dh tau1 - Schur complement with plus shift
@@ -704,10 +704,10 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
             f == Qsw_tau1_ndpsi_sub_Ishift )  // Gamma5 Dh tau1 - Schur complement with minus shift
     MG_update_mubar_epsbar( g_mubar, g_epsbar, -sqrt(g_shift) );
   else if ( f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar csw = 0
-	    f == Qsw_dagger_ndpsi )     //  Gamma5 Dh    - Schur complement with mu = -mubar
+            f == Qsw_dagger_ndpsi )     //  Gamma5 Dh    - Schur complement with mu = -mubar
     MG_update_mubar_epsbar( -g_mubar, g_epsbar, 0 );
   else if ( f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
-	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
+            f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
             f == D_ndpsi )              //  Dh
     MG_update_mubar_epsbar( g_mubar, g_epsbar, 0 );
   else
@@ -715,9 +715,9 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
   
   //Solving
   if (      f == Qtm_ndpsi ||           //  Gamma5 Dh    - Schur complement with csw = 0
-	    f == Qsw_ndpsi ||           //  Gamma5 Dh    - Schur complement
-	    f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar csw = 0
-	    f == Qsw_dagger_ndpsi ) {   //  Gamma5 Dh    - Schur complement with mu = -mubar
+            f == Qsw_ndpsi ||           //  Gamma5 Dh    - Schur complement
+            f == Qtm_dagger_ndpsi ||    //  Gamma5 Dh    - Schur complement with mu = -mubar csw = 0
+            f == Qsw_dagger_ndpsi ) {   //  Gamma5 Dh    - Schur complement with mu = -mubar
     mul_gamma5(old1, VOLUME);
     mul_gamma5(old2, VOLUME);
     if (init_guess) {
@@ -756,11 +756,11 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
       mul_gamma5(old1, VOLUME);
       mul_gamma5(old2, VOLUME);
     }
-  }	    
+  }            
   else if ( f == Qtm_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
-	    f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
-	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+            f == Qsw_pm_ndpsi ||        // (Gamma5 Dh tau1)^2 - Schur complement squared
+            f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     // DDalphaAMG: tau1 gamma5 Dh tau1 gamma5 Dh
     // tmLQCD:          gamma5 Dh tau1 gamma5 Dh tau1
     if (init_guess) {
@@ -828,7 +828,7 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
   
   if (g_proc_id == 0) {
     printf("Solving time %.2f sec (%.1f %% on coarse grid)\n", mg_status.time,
-	   100.*(mg_status.coarse_time/mg_status.time));
+           100.*(mg_status.coarse_time/mg_status.time));
     printf("Total iterations on fine grid %d\n", mg_status.iter_count);
     printf("Total iterations on coarse grids %d\n", mg_status.coarse_iter_count);
     if (!mg_status.success) 
@@ -883,12 +883,12 @@ static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new,
   }
 
   // Checking if the operator is in the list and compatible with N
-  if (	    f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
+  if (            f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
             f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
             f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
             f == Qsw_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with minus shift
             f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+            f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     if( N != VOLUME/2 ) {
       if( g_proc_id == 0 )
         printf("ERROR: expected N == VOLUME/2 for the required operator in MG_mms_solve_nd.\n");
@@ -896,13 +896,13 @@ static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new,
     }
   }  else if( g_proc_id == 0 )
     printf("WARNING: required operator unknown for MG_solve. Using standard operator: %s.\n",
-	   N==VOLUME?"":"Qsw_pm_ndpsi_shift");
+           N==VOLUME?"":"Qsw_pm_ndpsi_shift");
 
   // Setting mubar, epsbar and shifts
-  if (	    f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
+  if (            f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
             f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
             f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+            f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     MG_update_mubar_epsbar( g_mubar, g_epsbar, shifts[0] );
     for( int i = 0; i < no_shifts; i++ ) {
       mg_odd_shifts[i]  = shifts[i]*mg_scale;
@@ -932,9 +932,9 @@ static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new,
       mul_gamma5((spinor *const) old1, VOLUME);
       mul_gamma5((spinor *const) old2, VOLUME);
     }
-  }	    
+  }            
   else if ( f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
-	    f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+            f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
     mg_scale *= mg_scale;
     // DDalphaAMG: tau1 gamma5 Dh tau1 gamma5 Dh
     // tmLQCD:          gamma5 Dh tau1 gamma5 Dh tau1
@@ -960,7 +960,7 @@ static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new,
 
   if (g_proc_id == 0) {
     printf("Solving time %.2f sec (%.1f %% on coarse grid)\n", mg_status.time,
-	   100.*(mg_status.coarse_time/mg_status.time));
+           100.*(mg_status.coarse_time/mg_status.time));
     printf("Total iterations on fine grid %d\n", mg_status.iter_count);
     printf("Total iterations on coarse grids %d\n", mg_status.coarse_iter_count);
     if (!mg_status.success) 
@@ -989,7 +989,7 @@ void MG_init()
   for(int i = 0; i<4; i++)
     if(mg_blk[i]==0)
       mg_blk[i]=(((L/g_nproc_x)%2==0)?(((L/g_nproc_x)%4==0)?4:2):
-		 (((L/g_nproc_x)%3==0)?3:1));
+                 (((L/g_nproc_x)%3==0)?3:1));
   
   mg_init.block_lattice[0]=mg_blk[0];
   mg_init.block_lattice[1]=mg_blk[1];
@@ -1037,8 +1037,8 @@ void MG_init()
   
   if (mg_status.success!=mg_lvl) {
       if (g_proc_id == 0) {
-	  printf("MG WARNING: %d level initialized instead of %d\n",mg_status.success,mg_lvl);
-	  printf("MG WARNING: parameter: mg_lvl is changed to %d\n\n",mg_status.success);
+          printf("MG WARNING: %d level initialized instead of %d\n",mg_status.success,mg_lvl);
+          printf("MG WARNING: parameter: mg_lvl is changed to %d\n\n",mg_status.success);
       }
       mg_lvl=mg_status.success;
   }
@@ -1106,7 +1106,7 @@ void MG_update_mu(double mu_tmLQCD, double shift_tmLQCD)
     mg_params.epsbar_ig5_odd_shift = 0.0;
     mg_params.smoother_iterations = 4;
     DDalphaAMG_update_parameters(&mg_params, &mg_status);
-  }	 
+  }         
 }
 
 void MG_update_mubar_epsbar(double mubar_tmLQCD, double epsbar_tmLQCD, double shift_tmLQCD)
@@ -1135,7 +1135,7 @@ void MG_update_mubar_epsbar(double mubar_tmLQCD, double epsbar_tmLQCD, double sh
     mg_params.epsbar_ig5_odd_shift = shift;
     mg_params.smoother_iterations = 2;
     DDalphaAMG_update_parameters(&mg_params, &mg_status);
-  }	 
+  }         
 }
 
 void MG_reset() {
@@ -1157,8 +1157,8 @@ void MG_finalize()
 
 
 int MG_solver(spinor * const phi_new, spinor * const phi_old,
-	      const double precision, const int max_iter,const int rel_prec,
-	      const int N, su3 **gf, matrix_mult f)
+              const double precision, const int max_iter,const int rel_prec,
+              const int N, su3 **gf, matrix_mult f)
 {
   
   int success=0;
@@ -1198,9 +1198,9 @@ int MG_solver(spinor * const phi_new, spinor * const phi_old,
 }
 
 int MG_solver_eo(spinor * const Even_new, spinor * const Odd_new,
-		 spinor * const Even, spinor * const Odd,
-		 const double precision, const int max_iter, const int rel_prec,
-		 const int N, su3 **gf, matrix_mult_full f_full)
+                 spinor * const Even, spinor * const Odd,
+                 const double precision, const int max_iter, const int rel_prec,
+                 const int N, su3 **gf, matrix_mult_full f_full)
 {
   
   int iter_count;
@@ -1233,9 +1233,9 @@ int MG_solver_eo(spinor * const Even_new, spinor * const Odd_new,
 }
 
 int MG_solver_nd(spinor * const up_new, spinor * const dn_new,
-		 spinor * const up_old, spinor * const dn_old,
-		 const double precision, const int max_iter, const int rel_prec,
-		 const int N, su3 **gf, matrix_mult_nd f)
+                 spinor * const up_old, spinor * const dn_old,
+                 const double precision, const int max_iter, const int rel_prec,
+                 const int N, su3 **gf, matrix_mult_nd f)
 {
   
   int success=0;

From 0c00d7c3e3dc237753a76c4af02541d27a74bbd8 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 12 Dec 2017 15:58:21 +0200
Subject: [PATCH 42/85] Add initial guess from CGMMS

---
 solver/monomial_solve.c | 80 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 68 insertions(+), 12 deletions(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index c6fea6ce1..659942aa3 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -218,19 +218,45 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
         mg_no_shifts = solver_params->no_shifts;
         while (mg_mms_mass < solver_params->shifts[mg_no_shifts-1]) { mg_no_shifts--; }
       }
+      // Number of initial guesses provided by gcmms
+      // README: tunable value. 1 it's fine for now.
+      int no_cgmms_init_guess = 1;
+      if(no_cgmms_init_guess > mg_no_shifts) {
+        no_cgmms_init_guess = mg_no_shifts;
+      }
       int no_shifts = solver_params->no_shifts;
       if (mg_no_shifts < no_shifts) {
-        solver_params->no_shifts = no_shifts - mg_no_shifts;
-        solver_params->shifts += mg_no_shifts;
-        iteration_count = cg_mms_tm( P+mg_no_shifts, Q, solver_params );
+        spinor ** P_cg = P+(mg_no_shifts - no_cgmms_init_guess);
+        solver_params->no_shifts = no_shifts - (mg_no_shifts - no_cgmms_init_guess);
+        solver_params->shifts += (mg_no_shifts - no_cgmms_init_guess);
+        // switching last shift
+        if (no_cgmms_init_guess > 0) {
+          double tmp = solver_params->shifts[0];
+          solver_params->shifts[0] = solver_params->shifts[no_cgmms_init_guess];
+          solver_params->shifts[no_cgmms_init_guess] = tmp;
+          spinor * tmpP = P_cg[0];
+          P_cg[0] = P_cg[no_cgmms_init_guess];
+          P_cg[no_cgmms_init_guess] = tmpP;
+        }
+        iteration_count = cg_mms_tm( P_cg, Q, solver_params );
         // Restoring solver_params
+        // switching last shift
+        if (no_cgmms_init_guess > 0) {
+          spinor * tmpP = P_cg[0];
+          double tmp = solver_params->shifts[0];
+          solver_params->shifts[0] = solver_params->shifts[no_cgmms_init_guess];
+          solver_params->shifts[no_cgmms_init_guess] = tmp;
+          P_cg[0] = P_cg[no_cgmms_init_guess];
+          P_cg[no_cgmms_init_guess] = tmpP;
+        }
         solver_params->no_shifts = no_shifts;
-        solver_params->shifts -= mg_no_shifts;
+        solver_params->shifts -= (mg_no_shifts - no_cgmms_init_guess);
       }
 
       for(int i = mg_no_shifts-1; i>=0; i--){
-        // preparing initial guess                                                                                                                                                                       
-        init_guess_mms(P, Q, i, solver_params);
+        // preparing initial guess
+        if(i<mg_no_shifts-no_cgmms_init_guess)
+          init_guess_mms(P, Q, i, solver_params);
         g_mu3 = solver_params->shifts[i]; 
         iteration_count += MG_solver( P[i], Q, solver_params->squared_solver_prec, solver_params->max_iter,
                                          solver_params->rel_prec, solver_params->sdim, g_gauge_field, solver_params->M_psi );
@@ -357,14 +383,44 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
         mg_no_shifts = solver_params->no_shifts;
         while (mg_mms_mass < solver_params->shifts[mg_no_shifts-1]) { mg_no_shifts--; }
       }
+      // Number of initial guesses provided by gcmms
+      // README: tunable value. 1 it's fine for now.
+      int no_cgmms_init_guess = 2;
+      if(no_cgmms_init_guess > mg_no_shifts) {
+        no_cgmms_init_guess = mg_no_shifts;
+      }
       int no_shifts = solver_params->no_shifts;
       if (mg_no_shifts < no_shifts) {
-        solver_params->no_shifts = no_shifts - mg_no_shifts;
-        solver_params->shifts += mg_no_shifts;
-        iteration_count = cg_mms_tm_nd( Pup+mg_no_shifts, Pdn+mg_no_shifts, Qup, Qdn, solver_params );
+        spinor ** Pup_cg = Pup+(mg_no_shifts - no_cgmms_init_guess);
+        spinor ** Pdn_cg = Pdn+(mg_no_shifts - no_cgmms_init_guess);
+        solver_params->no_shifts = no_shifts - (mg_no_shifts - no_cgmms_init_guess);
+        solver_params->shifts += (mg_no_shifts - no_cgmms_init_guess);
+        if (no_cgmms_init_guess > 0) {
+          double tmp = solver_params->shifts[0];
+          solver_params->shifts[0] = solver_params->shifts[no_cgmms_init_guess];
+          solver_params->shifts[no_cgmms_init_guess] = tmp;
+          spinor * tmpP = Pup_cg[0];
+          Pup_cg[0] = Pup_cg[no_cgmms_init_guess];
+          Pup_cg[no_cgmms_init_guess] = tmpP;
+          tmpP = Pdn_cg[0];
+          Pdn_cg[0] = Pdn_cg[no_cgmms_init_guess];
+          Pdn_cg[no_cgmms_init_guess] = tmpP;
+        }
+        iteration_count = cg_mms_tm_nd( Pup_cg, Pdn_cg, Qup, Qdn, solver_params );
         // Restoring solver_params
+        if (no_cgmms_init_guess > 0) {
+          double tmp = solver_params->shifts[0];
+          solver_params->shifts[0] = solver_params->shifts[no_cgmms_init_guess];
+          solver_params->shifts[no_cgmms_init_guess] = tmp;
+          spinor * tmpP = Pup_cg[0];
+          Pup_cg[0] = Pup_cg[no_cgmms_init_guess];
+          Pup_cg[no_cgmms_init_guess] = tmpP;
+          tmpP = Pdn_cg[0];
+          Pdn_cg[0] = Pdn_cg[no_cgmms_init_guess];
+          Pdn_cg[no_cgmms_init_guess] = tmpP;
+        }
         solver_params->no_shifts = no_shifts;
-        solver_params->shifts -= mg_no_shifts;
+        solver_params->shifts -= (mg_no_shifts - no_cgmms_init_guess);
       }
 
       matrix_mult_nd f = Qtm_pm_ndpsi_shift;
@@ -373,8 +429,8 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
 
       for(int i = mg_no_shifts-1; i>=0; i--){
         // preparing initial guess
-        init_guess_mms_nd(Pup, Pdn, Qup, Qdn, i, solver_params);
-        
+        if(i<mg_no_shifts-no_cgmms_init_guess)
+          init_guess_mms_nd(Pup, Pdn, Qup, Qdn, i, solver_params);
         g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
         iteration_count += MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_params->squared_solver_prec, solver_params->max_iter,
                                          solver_params->rel_prec, solver_params->sdim, g_gauge_field, f );

From 6b36878f9f137c51369616a3e228dc5f4e02ea01 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 13 Dec 2017 11:56:34 +0200
Subject: [PATCH 43/85] Replacing flag OMP with correct one TM_USE_OMP

---
 DDalphaAMG_interface.c         |  4 ++--
 linalg/convert_even_to_lexic.c | 14 +++++++-------
 linalg/convert_odd_to_lexic.c  | 14 +++++++-------
 linalg/mul_gamma5.c            |  8 ++++----
 linalg/set_even_to_zero.c      |  8 ++++----
 5 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index e5c6bf9fd..c92ace427 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -337,7 +337,7 @@ static int MG_pre_solve( su3 **gf )
     DDalphaAMG_setup(&mg_status);
     mg_do_setup = 0;
     mg_tau = gauge_tau;
-    if (mg_status.success && g_proc_id == 0)        
+    if (mg_status.success && g_proc_id == 0)
       printf("DDalphaAMG setup ran, time %.2f sec (%.2f %% on coarse grid)\n",
              mg_status.time, 100.*(mg_status.coarse_time/mg_status.time));
     else if ( g_proc_id == 0)
@@ -356,7 +356,7 @@ static int MG_pre_solve( su3 **gf )
     DDalphaAMG_update_setup(mg_update_setup, &mg_status);
     mg_update_setup = 0;
     mg_tau = gauge_tau;
-    if (mg_status.success && g_proc_id == 0)        
+    if (mg_status.success && g_proc_id == 0)
       printf("DDalphaAMG setup ran, time %.2f sec (%.2f %% on coarse grid)\n",
              mg_status.time, 100.*(mg_status.coarse_time/mg_status.time));
     else if ( g_proc_id == 0)
diff --git a/linalg/convert_even_to_lexic.c b/linalg/convert_even_to_lexic.c
index 6c56748d5..1979e5deb 100644
--- a/linalg/convert_even_to_lexic.c
+++ b/linalg/convert_even_to_lexic.c
@@ -26,7 +26,7 @@
 #ifdef MPI
 # include <mpi.h>
 #endif
-#ifdef OMP
+#ifdef TM_USE_OMP
 # include <omp.h>
 #endif
 #include "global.h"
@@ -34,7 +34,7 @@
 #include "convert_even_to_lexic.h"
 
 void convert_even_to_lexic(spinor * const P, spinor * const r) {
-#ifdef OMP
+#ifdef TM_USE_OMP
 #pragma omp parallel
   {
 #endif
@@ -42,7 +42,7 @@ void convert_even_to_lexic(spinor * const P, spinor * const r) {
   int x, y, z, t, i, ix;
   spinor * p = NULL;
 
-#ifdef OMP
+#ifdef TM_USE_OMP
 #pragma omp for
 #endif
   for(x = 0; x < LX; x++) {
@@ -61,7 +61,7 @@ void convert_even_to_lexic(spinor * const P, spinor * const r) {
     }
   }
 
-#ifdef OMP
+#ifdef TM_USE_OMP
   } /*OpenMP closing brace */
 #endif
 
@@ -73,7 +73,7 @@ void convert_even_to_lexic(spinor * const P, spinor * const r) {
  *      r: new spinor even
  */
 void convert_lexic_to_even(spinor * const r, spinor * const P) {
-#ifdef OMP
+#ifdef TM_USE_OMP
 #pragma omp parallel
   {
 #endif
@@ -81,7 +81,7 @@ void convert_lexic_to_even(spinor * const r, spinor * const P) {
   int x, y, z, t, i, ix;
   spinor * p = NULL;
 
-#ifdef OMP
+#ifdef TM_USE_OMP
 #pragma omp for
 #endif
   for(x = 0; x < LX; x++) {
@@ -100,7 +100,7 @@ void convert_lexic_to_even(spinor * const r, spinor * const P) {
     }
   }
 
-#ifdef OMP
+#ifdef TM_USE_OMP
   } /* OpenMP closing brace */
 #endif
 
diff --git a/linalg/convert_odd_to_lexic.c b/linalg/convert_odd_to_lexic.c
index 4280dad20..84155a92f 100644
--- a/linalg/convert_odd_to_lexic.c
+++ b/linalg/convert_odd_to_lexic.c
@@ -26,7 +26,7 @@
 #ifdef MPI
 # include <mpi.h>
 #endif
-#ifdef OMP
+#ifdef TM_USE_OMP
 # include <omp.h>
 #endif
 #include "global.h"
@@ -34,7 +34,7 @@
 #include "convert_odd_to_lexic.h"
 
 void convert_odd_to_lexic(spinor * const P, spinor * const r) {
-#ifdef OMP
+#ifdef TM_USE_OMP
 #pragma omp parallel
   {
 #endif
@@ -42,7 +42,7 @@ void convert_odd_to_lexic(spinor * const P, spinor * const r) {
   int x, y, z, t, i, ix;
   spinor * p = NULL;
 
-#ifdef OMP
+#ifdef TM_USE_OMP
 #pragma omp for
 #endif
   for(x = 0; x < LX; x++) {
@@ -61,7 +61,7 @@ void convert_odd_to_lexic(spinor * const P, spinor * const r) {
     }
   }
 
-#ifdef OMP
+#ifdef TM_USE_OMP
   } /*OpenMP closing brace */
 #endif
 
@@ -73,7 +73,7 @@ void convert_odd_to_lexic(spinor * const P, spinor * const r) {
  *      r: new spinor odd 
  */
 void convert_lexic_to_odd(spinor * const r, spinor * const P) {
-#ifdef OMP
+#ifdef TM_USE_OMP
 #pragma omp parallel
   {
 #endif
@@ -81,7 +81,7 @@ void convert_lexic_to_odd(spinor * const r, spinor * const P) {
   int x, y, z, t, i, ix;
   spinor * p = NULL;
 
-#ifdef OMP
+#ifdef TM_USE_OMP
 #pragma omp for
 #endif
   for(x = 0; x < LX; x++) {
@@ -100,7 +100,7 @@ void convert_lexic_to_odd(spinor * const r, spinor * const P) {
     }
   }
 
-#ifdef OMP
+#ifdef TM_USE_OMP
   } /* OpenMP closing brace */
 #endif
 
diff --git a/linalg/mul_gamma5.c b/linalg/mul_gamma5.c
index 37c229b73..a8c77d1ac 100644
--- a/linalg/mul_gamma5.c
+++ b/linalg/mul_gamma5.c
@@ -28,7 +28,7 @@
 #ifdef HAVE_CONFIG_H
 # include<config.h>
 #endif
-#ifdef OMP
+#ifdef TM_USE_OMP
 # include <omp.h>
 #endif
 #include <stdlib.h>
@@ -38,7 +38,7 @@
 #include "mul_r.h"
 
 void mul_gamma5(spinor * const R, const int N){
-#ifdef OMP
+#ifdef TM_USE_OMP
 #pragma omp parallel
   {
 #endif
@@ -46,7 +46,7 @@ void mul_gamma5(spinor * const R, const int N){
   int ix;
   spinor *r;
   
-#ifdef OMP
+#ifdef TM_USE_OMP
 #pragma omp for
 #endif
   for (ix = 0; ix < N; ix++){
@@ -60,7 +60,7 @@ void mul_gamma5(spinor * const R, const int N){
     r->s3.c1 = -1.0*r->s3.c1;
     r->s3.c2 = -1.0*r->s3.c2;
   }
-#ifdef OMP
+#ifdef TM_USE_OMP
   } /*OpenMP closing brace */
 #endif
 
diff --git a/linalg/set_even_to_zero.c b/linalg/set_even_to_zero.c
index 1cebe3eb3..f0e39ac97 100644
--- a/linalg/set_even_to_zero.c
+++ b/linalg/set_even_to_zero.c
@@ -26,7 +26,7 @@
 #ifdef MPI
 # include <mpi.h>
 #endif
-#ifdef OMP
+#ifdef TM_USE_OMP
 # include <omp.h>
 #endif
 #include "global.h"
@@ -34,7 +34,7 @@
 #include "set_even_to_zero.h"
 
 void set_even_to_zero(spinor * const P) {
-#ifdef OMP
+#ifdef TM_USE_OMP
 #pragma omp parallel
   {
 #endif
@@ -42,7 +42,7 @@ void set_even_to_zero(spinor * const P) {
   int x, y, z, t, i, ix;
   spinor * p = NULL;
 
-#ifdef OMP
+#ifdef TM_USE_OMP
 #pragma omp for
 #endif
   for(x = 0; x < LX; x++) {
@@ -78,7 +78,7 @@ void set_even_to_zero(spinor * const P) {
     }
   }
 
-#ifdef OMP
+#ifdef TM_USE_OMP
   } /*OpenMP closing brace */
 #endif
 

From 15b58ec01acb8ce0dc98e6febd204b33ee83635e Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 14 Dec 2017 13:43:16 +0200
Subject: [PATCH 44/85] Add SWAP macro to global

---
 global.h                             | 11 +++++++++++
 solver/dirac_operator_eigenvectors.c | 22 +++++++++++-----------
 solver/dirac_operator_eigenvectors.h |  5 -----
 3 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/global.h b/global.h
index 14e4173e1..e57db79c6 100644
--- a/global.h
+++ b/global.h
@@ -282,3 +282,14 @@ void fatal_error(char const *error, char const *function);
 
 #endif
 
+/*
+ * Comments: generic macro for swapping values or pointers.
+ * We use memcpy because is optimal when the amount to copy is known at compilation time. 
+ * "sizeof(x) == sizeof(y) ? (signed)sizeof(x) : -1" is a compile time check that the types are compatible.
+ */
+#define SWAP(x,y) do \ 
+{ unsigned char swap_temp[sizeof(x) == sizeof(y) ? (signed)sizeof(x) : -1]; \
+  memcpy(swap_temp,&y,sizeof(x)); \
+  memcpy(&y,&x,       sizeof(x)); \
+  memcpy(&x,swap_temp,sizeof(x)); \
+} while(0)
diff --git a/solver/dirac_operator_eigenvectors.c b/solver/dirac_operator_eigenvectors.c
index bdef1ec24..102c944bc 100644
--- a/solver/dirac_operator_eigenvectors.c
+++ b/solver/dirac_operator_eigenvectors.c
@@ -1137,7 +1137,7 @@ void spinorStructEigenvecQtm(spinor *fv,double kappa,double mu,int epsilon,int k
   double q[8];
   double p_mu[4];
   double p_mu_t[4];
-  double psq,psq_tilde,M_wilson,prefactor,beta,norm_factor,swap_dummy;
+  double psq,psq_tilde,M_wilson,prefactor,beta,norm_factor;
   double *fv_=(double*)fv;
   int index;
 
@@ -1165,10 +1165,10 @@ void spinorStructEigenvecQtm(spinor *fv,double kappa,double mu,int epsilon,int k
 
   /* multiply with i ... */
   /* .. so first swap re <-> im .. */
-  SWAP(q[0],q[1],swap_dummy);
-  SWAP(q[2],q[3],swap_dummy);
-  SWAP(q[4],q[5],swap_dummy);
-  SWAP(q[6],q[7],swap_dummy);
+  SWAP(q[0],q[1]);
+  SWAP(q[2],q[3]);
+  SWAP(q[4],q[5]);
+  SWAP(q[6],q[7]);
 
   /* and multiply new real part (former imag part) with -1 */
   q[0]*=-prefactor; q[1]*=prefactor; q[2]*=-prefactor; q[3]*=prefactor;
@@ -1216,7 +1216,7 @@ void spinorStructEigenvecQtmSu3Vector(spinor *fv,double kappa,double mu,int epsi
   double q[8];
   double p_mu[4];
   double p_mu_t[4];
-  double psq,psq_tilde,M_wilson,prefactor,beta,norm_factor,swap_dummy;
+  double psq,psq_tilde,M_wilson,prefactor,beta,norm_factor;
 
   calcPmuLattice(rawp,p_mu,tt,ll);
   psq=p_mu[0]*p_mu[0]+
@@ -1242,10 +1242,10 @@ void spinorStructEigenvecQtmSu3Vector(spinor *fv,double kappa,double mu,int epsi
 
   /* multiply with i ... */
   /* .. so first swap re <-> im .. */
-  SWAP(q[0],q[1],swap_dummy);
-  SWAP(q[2],q[3],swap_dummy);
-  SWAP(q[4],q[5],swap_dummy);
-  SWAP(q[6],q[7],swap_dummy);
+  SWAP(q[0],q[1]);
+  SWAP(q[2],q[3]);
+  SWAP(q[4],q[5]);
+  SWAP(q[6],q[7]);
 
   /* and multiply new real part (former imag part) with -1 */
   q[0]*=-prefactor; q[1]*=prefactor; q[2]*=-prefactor; q[3]*=prefactor;
@@ -2092,7 +2092,7 @@ int * makeDiagFalloffPmuMap(int n,int maxdmanhat){
 
     for(int i = 0;i<10;i++){
       ranlxd(r,2);
-      SWAP(drawp[(int)(r[0]*4.)],drawp[(int)(r[1]*4.)],r[2]);
+      SWAP(drawp[(int)(r[0]*4.)],drawp[(int)(r[1]*4.)]);
 
   }
     fprintf(drawpStatFile," %d %d %d %d\n",drawp[0],drawp[1],drawp[2],drawp[3]);
diff --git a/solver/dirac_operator_eigenvectors.h b/solver/dirac_operator_eigenvectors.h
index da8f10187..cc27dc8f8 100644
--- a/solver/dirac_operator_eigenvectors.h
+++ b/solver/dirac_operator_eigenvectors.h
@@ -40,11 +40,6 @@
 #define M_PI  3.14159265358979323846
 #endif
 
-#define SWAP(x,y,d)\
-  d=x;\
-  x=y;\
-  y=d;
-
 #define min(x,y)\
   ((x<y)?x:y)
 #define max(x,y)\

From 8e3bc1ffbe32e3637e226cd5f3b61c2eda677b06 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 14 Dec 2017 13:43:59 +0200
Subject: [PATCH 45/85] Improving printing in initial guess

---
 solver/init_guess.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/solver/init_guess.c b/solver/init_guess.c
index d8d5e611d..9934759bf 100644
--- a/solver/init_guess.c
+++ b/solver/init_guess.c
@@ -79,7 +79,7 @@ int init_guess_mms(spinor ** const P, spinor * const Q,
       
     finalize_solver(temp, 1);
     if(g_proc_id == 0)
-        printf("INITIAL GUESS: shift: %d relative residual: %e\n",shift,res); 
+      printf("INITIAL GUESS: shift id=%d value=%e  relative residual: %e\n",shift,shifts[shift],res); 
   }
 
 }
@@ -133,7 +133,7 @@ int init_guess_mms_nd(spinor ** const Pup, spinor ** const Pdn,
       
     finalize_solver(temp, 2);
     if(g_proc_id == 0)
-        printf("INITIAL GUESS: shift: %d relative residual: %e\n",shift,res); 
+      printf("INITIAL GUESS ND: shift id=%d value=%e  relative residual: %e\n",shift,shifts[shift],res); 
   }
 }
 
@@ -185,6 +185,6 @@ int init_guess_mms_nd_plus(spinor ** const Pup, spinor ** const Pdn,
       
     finalize_solver(temp, 2);
     if(g_proc_id == 0)
-        printf("INITIAL GUESS: shift: %d relative residual: %e\n",shift,res); 
+      printf("INITIAL GUESS ND PLUS: shift id=%d value=%e  relative residual: %e\n",shift,shifts[shift],res); 
   }
 }

From 986aad79c64ba779c15da1d83f9c9ebf08e0e823 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 14 Dec 2017 13:44:57 +0200
Subject: [PATCH 46/85] Bug fixes and some additional improvements

---
 solver/cg_mms_tm_nd.c   |  10 ++--
 solver/monomial_solve.c | 120 +++++++++++++++++-----------------------
 2 files changed, 56 insertions(+), 74 deletions(-)

diff --git a/solver/cg_mms_tm_nd.c b/solver/cg_mms_tm_nd.c
index 3134da49f..55050041c 100644
--- a/solver/cg_mms_tm_nd.c
+++ b/solver/cg_mms_tm_nd.c
@@ -93,12 +93,12 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
   alphas[0] = 1.0;
   betas[0] = 0.0;
   sigma[0] = solver_params->shifts[0]*solver_params->shifts[0];
-  if(g_proc_id == 0 && g_debug_level > 2) printf("# CGMMSND: shift %d is %e\n", 0, sigma[0]);
+  if(g_proc_id == 0 && g_debug_level > 2) printf("# CGMMSND: shift %d is %e\n", 0, solver_params->shifts[0]);
 
   /* currently only implemented for P=0 */
   for(int im = 1; im < shifts; im++) {
     sigma[im] = solver_params->shifts[im]*solver_params->shifts[im] - sigma[0];
-    if(g_proc_id == 0 && g_debug_level > 2) printf("# CGMMSND: shift %d is %e\n", im, sigma[im]);
+    if(g_proc_id == 0 && g_debug_level > 2) printf("# CGMMSND: shift %d is %e\n", im, solver_params->shifts[im]);
     // these will be the result spinor fields
     zero_spinor_field(Pup[im], N);
     zero_spinor_field(Pdn[im], N);
@@ -163,6 +163,7 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
         err = alphas[shifts-1]*alphas[shifts-1]*sn;
 	while(((err <= solver_params->squared_solver_prec) && (solver_params->rel_prec == 0)) ||
               ((err <= solver_params->squared_solver_prec*squarenorm) && (solver_params->rel_prec > 0))) {
+	  shifts--;
           // for testing purpose
 	  if(g_debug_level > 3) {
 	    if (g_proc_id == 0) printf("# CGMMSND: residual of remaining shifts\n");
@@ -171,11 +172,10 @@ int cg_mms_tm_nd(spinor ** const Pup, spinor ** const Pdn,
               sn = square_norm(ps_mms_solver[2*is], N, 1);
               sn += square_norm(ps_mms_solver[2*is+1], N, 1);
               err = alphas[is]*alphas[is]*sn;
-              if (g_proc_id == 0) printf("#\t %d\t\t %e\t %e\n", is, sigma[is], solver_params->rel_prec ? err/squarenorm : err);
+              if (g_proc_id == 0) printf("#\t %d\t\t %e\t %e\n", is, solver_params->shifts[is], solver_params->rel_prec ? err/squarenorm : err);
             }
-            if (g_proc_id == 0) printf("#\t %d\t\t %e\t %e\n", 0, sigma[0], solver_params->rel_prec ? normsq/squarenorm : normsq);
+            if (g_proc_id == 0) printf("#\t %d\t\t %e\t %e\n", 0, solver_params->shifts[0], solver_params->rel_prec ? normsq/squarenorm : normsq);
 	  }
-	  shifts--;
 	  if(g_debug_level > 2 && g_proc_id == 0) {
 	    printf("# CGMMSND: at iteration %d removed one shift with residual %e. %d shifts remaining\n", iteration, solver_params->rel_prec ? err/squarenorm : err, shifts);
 	  }
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 659942aa3..a29a47f9f 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -212,50 +212,46 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
   else if (solver_params->type == MG) {
     // if the mg_mms_mass is larger than the smallest shift we use MG
     if (mg_no_shifts > 0 || mg_mms_mass >= solver_params->shifts[0]) { 
+      int nshifts = solver_params->no_shifts;
+      int mg_nshifts = mg_no_shifts > nshifts ? nshifts:mg_no_shifts;
       // if the mg_mms_mass is smaller than the larger shifts, we use CGMMS for those
       // in case mg_no_shifts is used, then mg_mms_mass = 0
       if(mg_mms_mass >= solver_params->shifts[0]) {
-        mg_no_shifts = solver_params->no_shifts;
-        while (mg_mms_mass < solver_params->shifts[mg_no_shifts-1]) { mg_no_shifts--; }
+        mg_nshifts = solver_params->no_shifts;
+        while (mg_mms_mass < solver_params->shifts[mg_nshifts-1]) { mg_nshifts--; }
       }
       // Number of initial guesses provided by gcmms
       // README: tunable value. 1 it's fine for now.
       int no_cgmms_init_guess = 1;
-      if(no_cgmms_init_guess > mg_no_shifts) {
-        no_cgmms_init_guess = mg_no_shifts;
+      if(no_cgmms_init_guess > mg_nshifts) {
+        no_cgmms_init_guess = mg_nshifts;
       }
-      int no_shifts = solver_params->no_shifts;
-      if (mg_no_shifts < no_shifts) {
-        spinor ** P_cg = P+(mg_no_shifts - no_cgmms_init_guess);
-        solver_params->no_shifts = no_shifts - (mg_no_shifts - no_cgmms_init_guess);
-        solver_params->shifts += (mg_no_shifts - no_cgmms_init_guess);
-        // switching last shift
+      if (mg_nshifts < nshifts) {
+        spinor ** P_cg = P+(mg_nshifts - no_cgmms_init_guess);
+        double * shifts_start = solver_params->shifts;
+        solver_params->no_shifts = nshifts - (mg_nshifts - no_cgmms_init_guess);
+        solver_params->shifts += (mg_nshifts - no_cgmms_init_guess);
+        // switching last shift. We run CGMMS for the shift we want to solve.
         if (no_cgmms_init_guess > 0) {
-          double tmp = solver_params->shifts[0];
-          solver_params->shifts[0] = solver_params->shifts[no_cgmms_init_guess];
-          solver_params->shifts[no_cgmms_init_guess] = tmp;
-          spinor * tmpP = P_cg[0];
-          P_cg[0] = P_cg[no_cgmms_init_guess];
-          P_cg[no_cgmms_init_guess] = tmpP;
+          SWAP(solver_params->shifts[0], solver_params->shifts[no_cgmms_init_guess]);
+          SWAP(P_cg[0], P_cg[no_cgmms_init_guess]);
         }
         iteration_count = cg_mms_tm( P_cg, Q, solver_params );
-        // Restoring solver_params
-        // switching last shift
+        // Switching back last shift
         if (no_cgmms_init_guess > 0) {
-          spinor * tmpP = P_cg[0];
-          double tmp = solver_params->shifts[0];
-          solver_params->shifts[0] = solver_params->shifts[no_cgmms_init_guess];
-          solver_params->shifts[no_cgmms_init_guess] = tmp;
-          P_cg[0] = P_cg[no_cgmms_init_guess];
-          P_cg[no_cgmms_init_guess] = tmpP;
+          SWAP(solver_params->shifts[0], solver_params->shifts[no_cgmms_init_guess]);
+          SWAP(P_cg[0], P_cg[no_cgmms_init_guess]);
         }
-        solver_params->no_shifts = no_shifts;
-        solver_params->shifts -= (mg_no_shifts - no_cgmms_init_guess);
+        // Restoring solver_params
+        solver_params->no_shifts = nshifts;
+        solver_params->shifts = shifts_start;
+      } else {
+        no_cgmms_init_guess = 0;
       }
 
-      for(int i = mg_no_shifts-1; i>=0; i--){
+      for(int i = mg_nshifts-1; i>=0; i--){
         // preparing initial guess
-        if(i<mg_no_shifts-no_cgmms_init_guess)
+        if(i<mg_nshifts-no_cgmms_init_guess)
           init_guess_mms(P, Q, i, solver_params);
         g_mu3 = solver_params->shifts[i]; 
         iteration_count += MG_solver( P[i], Q, solver_params->squared_solver_prec, solver_params->max_iter,
@@ -364,72 +360,58 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     }
   } else if (solver_params->type == CGMMSND){
     iteration_count = cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_params);
-    //Testing the initial guess
-    if( g_debug_level > 3 ){
-      for(int i = solver_params->no_shifts-1; i>=0; i--){
-        // preparing initial guess
-        init_guess_mms_nd(Pup, Pdn, Qup, Qdn, i, solver_params);
-      }
-    }
   }
 #ifdef DDalphaAMG
   else if (solver_params->type == MG) {
     // if the mg_mms_mass is larger than the smallest shift we use MG
     if (mg_no_shifts > 0 || mg_mms_mass >= solver_params->shifts[0]) { 
 
+      int nshifts = solver_params->no_shifts;
+      int mg_nshifts = mg_no_shifts > nshifts ? nshifts:mg_no_shifts;
       // if the mg_mms_mass is smaller than the larger shifts, we use CGMMS for those
       // in case mg_no_shifts is used, then mg_mms_mass = 0
       if(mg_mms_mass >= solver_params->shifts[0]) {
-        mg_no_shifts = solver_params->no_shifts;
-        while (mg_mms_mass < solver_params->shifts[mg_no_shifts-1]) { mg_no_shifts--; }
+        mg_nshifts = nshifts;
+        while (mg_mms_mass < solver_params->shifts[mg_nshifts-1]) { mg_nshifts--; }
       }
       // Number of initial guesses provided by gcmms
       // README: tunable value. 1 it's fine for now.
       int no_cgmms_init_guess = 2;
-      if(no_cgmms_init_guess > mg_no_shifts) {
-        no_cgmms_init_guess = mg_no_shifts;
+      if(no_cgmms_init_guess > mg_nshifts) {
+        no_cgmms_init_guess = mg_nshifts;
       }
-      int no_shifts = solver_params->no_shifts;
-      if (mg_no_shifts < no_shifts) {
-        spinor ** Pup_cg = Pup+(mg_no_shifts - no_cgmms_init_guess);
-        spinor ** Pdn_cg = Pdn+(mg_no_shifts - no_cgmms_init_guess);
-        solver_params->no_shifts = no_shifts - (mg_no_shifts - no_cgmms_init_guess);
-        solver_params->shifts += (mg_no_shifts - no_cgmms_init_guess);
+      if (mg_nshifts < nshifts) {
+        spinor ** Pup_cg = Pup+(mg_nshifts - no_cgmms_init_guess);
+        spinor ** Pdn_cg = Pdn+(mg_nshifts - no_cgmms_init_guess);
+        double * shifts_start = solver_params->shifts;
+        solver_params->no_shifts = nshifts - (mg_nshifts - no_cgmms_init_guess);
+        solver_params->shifts += (mg_nshifts - no_cgmms_init_guess);
         if (no_cgmms_init_guess > 0) {
-          double tmp = solver_params->shifts[0];
-          solver_params->shifts[0] = solver_params->shifts[no_cgmms_init_guess];
-          solver_params->shifts[no_cgmms_init_guess] = tmp;
-          spinor * tmpP = Pup_cg[0];
-          Pup_cg[0] = Pup_cg[no_cgmms_init_guess];
-          Pup_cg[no_cgmms_init_guess] = tmpP;
-          tmpP = Pdn_cg[0];
-          Pdn_cg[0] = Pdn_cg[no_cgmms_init_guess];
-          Pdn_cg[no_cgmms_init_guess] = tmpP;
+          SWAP(solver_params->shifts[0], solver_params->shifts[no_cgmms_init_guess]);
+          SWAP(Pup_cg[0], Pup_cg[no_cgmms_init_guess]);
+          SWAP(Pdn_cg[0], Pdn_cg[no_cgmms_init_guess]);
         }
         iteration_count = cg_mms_tm_nd( Pup_cg, Pdn_cg, Qup, Qdn, solver_params );
-        // Restoring solver_params
+        // Switching back last shift
         if (no_cgmms_init_guess > 0) {
-          double tmp = solver_params->shifts[0];
-          solver_params->shifts[0] = solver_params->shifts[no_cgmms_init_guess];
-          solver_params->shifts[no_cgmms_init_guess] = tmp;
-          spinor * tmpP = Pup_cg[0];
-          Pup_cg[0] = Pup_cg[no_cgmms_init_guess];
-          Pup_cg[no_cgmms_init_guess] = tmpP;
-          tmpP = Pdn_cg[0];
-          Pdn_cg[0] = Pdn_cg[no_cgmms_init_guess];
-          Pdn_cg[no_cgmms_init_guess] = tmpP;
+          SWAP(solver_params->shifts[0], solver_params->shifts[no_cgmms_init_guess]);
+          SWAP(Pup_cg[0], Pup_cg[no_cgmms_init_guess]);
+          SWAP(Pdn_cg[0], Pdn_cg[no_cgmms_init_guess]);
         }
-        solver_params->no_shifts = no_shifts;
-        solver_params->shifts -= (mg_no_shifts - no_cgmms_init_guess);
+        // Restoring solver_params
+        solver_params->no_shifts = nshifts;
+        solver_params->shifts = shifts_start;
+      } else {
+        no_cgmms_init_guess = 0;
       }
 
       matrix_mult_nd f = Qtm_pm_ndpsi_shift;
       if( solver_params->M_ndpsi == Qsw_pm_ndpsi ) 
         f = Qsw_pm_ndpsi_shift;
 
-      for(int i = mg_no_shifts-1; i>=0; i--){
+      for(int i = mg_nshifts-1; i>=0; i--){
         // preparing initial guess
-        if(i<mg_no_shifts-no_cgmms_init_guess)
+        if(i<mg_nshifts-no_cgmms_init_guess)
           init_guess_mms_nd(Pup, Pdn, Qup, Qdn, i, solver_params);
         g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
         iteration_count += MG_solver_nd( Pup[i], Pdn[i], Qup, Qdn, solver_params->squared_solver_prec, solver_params->max_iter,
@@ -510,7 +492,7 @@ int solve_mms_nd_plus(spinor ** const Pup, spinor ** const Pdn,
     if( solver_params->M_ndpsi == Qsw_pm_ndpsi )
       f = Qsw_tau1_ndpsi_sub_Ishift;
     spinor** temp;
-    init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
+    init_solver_field(&temp, VOLUMEPLUSRAND/2, 2);
     for(int i = solver_params->no_shifts-1; i>=0; i--){
       g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
       f(temp[0],temp[1],Pup[i],Pdn[i]);

From d99e87a9ad1cde5c79825d83163539f94afea204 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 15 Dec 2017 15:39:17 +0200
Subject: [PATCH 47/85] Add variant for shifted operator.

---
 DDalphaAMG_interface.c | 103 ++++++++++++++++++++++++++++++-----------
 1 file changed, 75 insertions(+), 28 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index c92ace427..2dfc06feb 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -104,6 +104,10 @@ int MG_solver_eo(spinor * const Even_new, spinor * const Odd_new,
 //Enable to test the solution. It cost an application more of the operator. 
 //TODO: test all the operators interfaced and then undefine this flag.
 #define MGTEST
+//Enable variant for shifted operator in the ND sector.
+//The variant is used in case of initial guess for the squared operator.
+//It is faster and tests prove also to be safe (read Appendix A of arxiv:1801.##### by S.Bacchio et al.)
+#define VARIANT_FOR_SHIFTED
 
 DDalphaAMG_init mg_init;
 DDalphaAMG_parameters mg_params;
@@ -212,7 +216,13 @@ static inline int MG_check_nd( spinor * const up_new, spinor * const dn_new, spi
   double differ[2], residual;
   spinor ** check_vect = NULL;
   double acc_factor = 4;
-  
+#ifdef VARIANT_FOR_SHIFTED
+  if((  f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+        f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+     && g_shift != 0 )
+    acc_factor = 1/sqrt(phmc_cheb_evmin/phmc_cheb_evmax + g_shift);
+#endif
+
   init_solver_field(&check_vect, VOLUMEPLUSRAND,2);
   f( check_vect[0], check_vect[1], up_new, dn_new);
   diff( check_vect[0], check_vect[0], up_old, N);
@@ -225,7 +235,7 @@ static inline int MG_check_nd( spinor * const up_new, spinor * const dn_new, spi
   
   if( residual > precision && residual < acc_factor*precision ) {
     if(g_proc_id == 0)
-      printf("WARNING: solution accepted even if the residual wasn't complitely acceptable (%e > %e) \n", residual, precision);
+      printf("WARNING: solution accepted even if the residual wasn't complitely acceptable (%e > %e). Max acc. factor %f.\n", residual, precision, acc_factor);
   } else if( residual > acc_factor*precision ) {
     if(g_proc_id == 0) {
       printf("ERROR: something bad happened... MG converged giving the wrong solution!! Trying to restart... \n");
@@ -587,25 +597,46 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
 
     /* Reconstruct the even sites                */
     if (    f == Qtm_pm_ndpsi       ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
-            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
             f == Qsw_pm_ndpsi       ||  // (Gamma5 Dh tau1)^2 - Schur complement squared
+            f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
             f == Qsw_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
             f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
             f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
             f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
             f == Qsw_tau1_ndpsi_sub_Ishift ) {// Gamma5 Dh tau1 - Schur complement with minus shift
-      // tau1 exchange tmp11 <-> tmp12
-      Hopping_Matrix(EO, tmp12, up_new);
-      Hopping_Matrix(EO, tmp11, dn_new);
-
-      Msw_ee_inv_ndpsi(tmp21, tmp22, tmp11, tmp12);
-
-      /* Assigning with plus sign for the even
-       * since in Hopping_Matrix the minus is missing
-       */
-      // tau1 exchange tmp22 <-> tmp21
-      convert_eo_to_lexic(new1, tmp22, up_new);
-      convert_eo_to_lexic(new2, tmp21, dn_new);
+#ifdef VARIANT_FOR_SHIFTED
+      if((  f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+            f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+         && g_shift != 0 ) {
+        if( f == Qtm_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+          Qtm_tau1_ndpsi_add_Ishift(tmp12, tmp11, up_new, dn_new); // tau1 exchange tmp11 <-> tmp12  
+        } else {                        // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+          Qsw_tau1_ndpsi_add_Ishift(tmp12, tmp11, up_new, dn_new); // tau1 exchange tmp11 <-> tmp12
+        }
+        // tau1 exchange new1tmp <-> new2tmp
+        convert_odd_to_lexic( new2, tmp11);
+        convert_odd_to_lexic( new1, tmp12);
+        Hopping_Matrix(EO, tmp21, tmp11);
+        Hopping_Matrix(EO, tmp22, tmp12);
+        Msw_ee_inv_ndpsi(tmp11, tmp12, tmp21, tmp22);
+        convert_even_to_lexic(new2, tmp11);
+        convert_even_to_lexic(new1, tmp12);
+      } else
+#endif
+      {
+        // tau1 exchange tmp11 <-> tmp12
+        Hopping_Matrix(EO, tmp12, up_new);
+        Hopping_Matrix(EO, tmp11, dn_new);
+
+        Msw_ee_inv_ndpsi(tmp21, tmp22, tmp11, tmp12);
+
+        /* Assigning with plus sign for the even
+         * since in Hopping_Matrix the minus is missing
+         */
+        // tau1 exchange tmp22 <-> tmp21
+        convert_eo_to_lexic(new1, tmp22, up_new);
+        convert_eo_to_lexic(new2, tmp21, dn_new);
+      }
     } else {
       Hopping_Matrix(EO, tmp11, up_new);
       Hopping_Matrix(EO, tmp12, dn_new);
@@ -771,23 +802,39 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
       mul_r(new2tmp, 1/mg_scale, new2tmp, VOLUME);
       DDalphaAMG_solve_doublet_with_guess( (double*) new2tmp, (double*) old1, (double*) new1tmp, (double*) old2,
                                            precision/2, &mg_status );
+#ifdef VARIANT_FOR_SHIFTED
+      if((  f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+            f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+         && g_shift != 0 ) {
+        // Removing normalization from initial guess
+        mul_r(new1, 1/mg_scale, new1, VOLUME);
+        mul_r(new2, 1/mg_scale, new2, VOLUME);
+        MG_update_mubar_epsbar( g_mubar, g_epsbar, -sqrt(g_shift) );
+        DDalphaAMG_solve_doublet_with_guess( (double*) new2, (double*) old1, (double*) new1, (double*) old2,
+                                             precision/2, &mg_status );
+        assign_mul_add_mul(new1, -_Complex_I/2./sqrt(g_shift), new1tmp, _Complex_I/2./sqrt(g_shift), VOLUME);
+        assign_mul_add_mul(new2, -_Complex_I/2./sqrt(g_shift), new2tmp, _Complex_I/2./sqrt(g_shift), VOLUME);
+      } else 
+#endif
+      {
+        mul_gamma5(new1tmp, VOLUME);
+        mul_gamma5(new2tmp, VOLUME);
+        set_even_to_zero(new1tmp);
+        set_even_to_zero(new2tmp);
+        // Removing normalization from initial guess
+        mg_scale *= mg_scale;
+        mul_r(new1, 1/mg_scale, new1, VOLUME);
+        mul_r(new2, 1/mg_scale, new2, VOLUME);
+        if (      f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
+                  f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
+          MG_update_mubar_epsbar( g_mubar, g_epsbar, -sqrt(g_shift) );
+        DDalphaAMG_solve_doublet_with_guess( (double*) new2, (double*) new1tmp, (double*) new1, (double*) new2tmp,
+                                             precision/2, &mg_status );      
+      }
       if( N == VOLUME ) { // in case of VOLUME/2 old is a just local vector
         mul_gamma5(old1, VOLUME);
         mul_gamma5(old2, VOLUME);
       }
-      mul_gamma5(new1tmp, VOLUME);
-      mul_gamma5(new2tmp, VOLUME);
-      set_even_to_zero(new1tmp);
-      set_even_to_zero(new2tmp);
-      // Removing normalization from initial guess
-      mg_scale *= mg_scale;
-      mul_r(new1, 1/mg_scale, new1, VOLUME);
-      mul_r(new2, 1/mg_scale, new2, VOLUME);
-      if (      f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
-                f == Qsw_pm_ndpsi_shift )   // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
-        MG_update_mubar_epsbar( g_mubar, g_epsbar, -sqrt(g_shift) );
-      DDalphaAMG_solve_doublet_with_guess( (double*) new2, (double*) new1tmp, (double*) new1, (double*) new2tmp,
-                                           precision/2, &mg_status );      
     } else {
       mg_scale *= mg_scale;
       DDalphaAMG_solve_doublet_squared_odd( (double*) new2, (double*) old2, (double*) new1, (double*) old1,

From e7d64b05aa1c6b5f06f1989e2041c9d831d22b20 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 19 Dec 2017 12:56:01 +0200
Subject: [PATCH 48/85] Enabling the use of QphiX

---
 solver/monomial_solve.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index a29a47f9f..a3e213f38 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -231,12 +231,13 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
         double * shifts_start = solver_params->shifts;
         solver_params->no_shifts = nshifts - (mg_nshifts - no_cgmms_init_guess);
         solver_params->shifts += (mg_nshifts - no_cgmms_init_guess);
+        solver_params->type = CGMMS;
         // switching last shift. We run CGMMS for the shift we want to solve.
         if (no_cgmms_init_guess > 0) {
           SWAP(solver_params->shifts[0], solver_params->shifts[no_cgmms_init_guess]);
           SWAP(P_cg[0], P_cg[no_cgmms_init_guess]);
         }
-        iteration_count = cg_mms_tm( P_cg, Q, solver_params );
+        iteration_count = solve_mms_tm( P_cg, Q, solver_params );
         // Switching back last shift
         if (no_cgmms_init_guess > 0) {
           SWAP(solver_params->shifts[0], solver_params->shifts[no_cgmms_init_guess]);
@@ -245,6 +246,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
         // Restoring solver_params
         solver_params->no_shifts = nshifts;
         solver_params->shifts = shifts_start;
+        solver_params->type = MG;
       } else {
         no_cgmms_init_guess = 0;
       }
@@ -329,7 +331,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
         matrix_mult_nd f = Qtm_pm_ndpsi_shift;
         if( solver_params->M_ndpsi == Qsw_pm_ndpsi ) 
           f = Qsw_pm_ndpsi_shift;
-        g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
+        g_shift = solver_params->shifts[shift]*solver_params->shifts[shift]; 
         f(temp[0], temp[1], Pup[shift], Pdn[shift]);
         g_shift = _default_g_shift;
         diff(temp[0], temp[0], Qup, VOLUME/2);
@@ -386,12 +388,13 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
         double * shifts_start = solver_params->shifts;
         solver_params->no_shifts = nshifts - (mg_nshifts - no_cgmms_init_guess);
         solver_params->shifts += (mg_nshifts - no_cgmms_init_guess);
+        solver_params-> type = CGMMSND;
         if (no_cgmms_init_guess > 0) {
           SWAP(solver_params->shifts[0], solver_params->shifts[no_cgmms_init_guess]);
           SWAP(Pup_cg[0], Pup_cg[no_cgmms_init_guess]);
           SWAP(Pdn_cg[0], Pdn_cg[no_cgmms_init_guess]);
         }
-        iteration_count = cg_mms_tm_nd( Pup_cg, Pdn_cg, Qup, Qdn, solver_params );
+        iteration_count = solve_mms_nd( Pup_cg, Pdn_cg, Qup, Qdn, solver_params );
         // Switching back last shift
         if (no_cgmms_init_guess > 0) {
           SWAP(solver_params->shifts[0], solver_params->shifts[no_cgmms_init_guess]);
@@ -401,6 +404,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
         // Restoring solver_params
         solver_params->no_shifts = nshifts;
         solver_params->shifts = shifts_start;
+        solver_params-> type = MG;
       } else {
         no_cgmms_init_guess = 0;
       }

From 738897521b908c2970ff82844972c9a761270a3a Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Tue, 19 Dec 2017 18:00:14 +0200
Subject: [PATCH 49/85] Restoring residual check

---
 solver/monomial_solve.c | 109 ++++++++++++++++++++++++----------------
 1 file changed, 66 insertions(+), 43 deletions(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index a3e213f38..bf36482e4 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -96,11 +96,14 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
                      const int N, matrix_mult f, int solver_type){
   int iteration_count = 0;
 
+  // temporary field required by the QPhiX solve or by residual check
+  spinor** temp;
+  if(g_debug_level > 2 || solver_params.external_inverter == QPHIX_INVERTER){
+    init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
+  }
+
 #ifdef TM_USE_QPHIX
   if(solver_params.external_inverter == QPHIX_INVERTER){
-    spinor** temp;
-    // temporary field required by the QPhiX solve
-    init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
     // using CG for the HMC, we always want to have the solution of (Q Q^dagger) x = b, which is equivalent to
     // gamma_5 (M M^dagger)^{-1} gamma_5 b
     // FIXME: this needs to be adjusted to also support BICGSTAB
@@ -108,15 +111,6 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
     iteration_count = invert_eo_qphix_oneflavour(P, temp[0], max_iter, eps_sq, solver_type, 
                                                  rel_prec, solver_params, solver_params.sloppy_precision, solver_params.compression_type);
     mul_gamma5(P, VOLUME/2);
-    if(g_debug_level > 0){
-      f(temp[0], P);
-      diff(temp[0], temp[0], Q, VOLUME/2);
-      double diffnorm = square_norm(temp[0], VOLUME/2, 1); 
-      if( g_proc_id == 0 ){
-        printf("# solve_degenerate residual check: %e\n", diffnorm);
-      }
-    }
-    finalize_solver(temp, 1);
   } else
 #endif
   if(solver_type == MIXEDCG || solver_type == RGMIXEDCG){
@@ -171,6 +165,18 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
     exit(2);
   }
 
+  if(g_debug_level > 2){
+    f(temp[0], P);
+    diff(temp[0], temp[0], Q, VOLUME/2);
+    double diffnorm = square_norm(temp[0], VOLUME/2, 1); 
+    if( g_proc_id == 0 ){
+      printf("# solve_degenerate residual check: %e\n", diffnorm);
+    }
+  }
+  if(g_debug_level > 2 || solver_params.external_inverter == QPHIX_INVERTER){
+    finalize_solver(temp, 1);
+  }
+
   return(iteration_count);
 }
 
@@ -178,10 +184,14 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
                  solver_params_t * solver_params){ 
   int iteration_count = 0; 
 
+  // temporary field required by the QPhiX solve or by residual check
+  spinor ** temp;
+  if(g_debug_level > 2 || solver_params->external_inverter == QPHIX_INVERTER){
+    init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
+  }
+
 #ifdef TM_USE_QPHIX
   if( solver_params->external_inverter == QPHIX_INVERTER ){
-    spinor ** temp;
-    init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
     gamma5(temp[0], Q, VOLUME/2);
     iteration_count = invert_eo_qphix_oneflavour_mshift(P, temp[0],
                                                         solver_params->max_iter, solver_params->squared_solver_prec,
@@ -191,18 +201,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
                                                         solver_params->compression_type);
     for( int shift = 0; shift < solver_params->no_shifts; shift++){
       mul_gamma5(P[shift], VOLUME/2);
-      if(g_debug_level > 0){
-        g_mu3 = solver_params->shifts[shift]; 
-        solver_params->M_psi(temp[0], P[shift]);
-        g_mu3 = _default_g_mu3;
-        diff(temp[0], temp[0], Q, VOLUME/2);
-        double diffnorm = square_norm(temp[0], VOLUME/2, 1); 
-        if( g_proc_id == 0 ){
-          printf("# solve_mms_tm residual check: shift %d, res. %e\n", i, diffnorm);
-        }
-      }
     }
-    finalize_solver(temp, 1);
   } else
 #endif // TM_USE_QPHIX
   if (solver_params->type == CGMMS){
@@ -297,6 +296,22 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
     exit(2);      
   }
 
+  if(g_debug_level > 2){
+    for( int shift = 0; shift < solver_params->no_shifts; shift++){
+      g_mu3 = solver_params->shifts[shift]; 
+      solver_params->M_psi(temp[0], P[shift]);
+      g_mu3 = _default_g_mu3;
+      diff(temp[0], temp[0], Q, VOLUME/2);
+      double diffnorm = square_norm(temp[0], VOLUME/2, 1); 
+      if( g_proc_id == 0 ){
+        printf("# solve_mms_tm residual check: shift %d, res. %e\n", shift, diffnorm);
+      }
+    }
+  }
+  if(g_debug_level > 2 || solver_params->external_inverter == QPHIX_INVERTER){
+    finalize_solver(temp, 1);
+  }
+
   return(iteration_count);
 }
 
@@ -305,10 +320,14 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
                  solver_params_t * solver_params){ 
   int iteration_count = 0; 
 
+  // temporary field required by the QPhiX solve or by residual check
+  spinor ** temp;
+  if(g_debug_level > 2 || solver_params->external_inverter == QPHIX_INVERTER){
+    init_solver_field(&temp, VOLUMEPLUSRAND/2, 2);
+  }
+
 #ifdef TM_USE_QPHIX
   if(solver_params->external_inverter == QPHIX_INVERTER){
-    spinor** temp;
-    init_solver_field(&temp, VOLUMEPLUSRAND/2, 2);
     //  gamma5 (M.M^dagger)^{-1} gamma5 = [ Q(+mu,eps) Q(-mu,eps) ]^{-1}
     gamma5(temp[0], Qup, VOLUME/2);
     gamma5(temp[1], Qdn, VOLUME/2);
@@ -327,23 +346,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     for( int shift = 0; shift < solver_params->no_shifts; shift++){
       mul_r_gamma5(Pup[shift], maxev_sq, VOLUME/2);
       mul_r_gamma5(Pdn[shift], maxev_sq, VOLUME/2);
-      if( g_debug_level > 0 ){
-        matrix_mult_nd f = Qtm_pm_ndpsi_shift;
-        if( solver_params->M_ndpsi == Qsw_pm_ndpsi ) 
-          f = Qsw_pm_ndpsi_shift;
-        g_shift = solver_params->shifts[shift]*solver_params->shifts[shift]; 
-        f(temp[0], temp[1], Pup[shift], Pdn[shift]);
-        g_shift = _default_g_shift;
-        diff(temp[0], temp[0], Qup, VOLUME/2);
-        diff(temp[1], temp[1], Qdn, VOLUME/2);
-        double diffnorm = square_norm(temp[0], VOLUME/2, 1) + square_norm(temp[1], VOLUME/2, 1); 
-        if( g_proc_id == 0 ){
-          printf("# solve_mms_nd residual check: %e\n", diffnorm);
-          printf("# NOTE that this currently repors the residual for the *unishfted* operator!\n");
-        }
-      }
     }
-    finalize_solver(temp, 2);
   } else
 #endif //TM_USE_QPHIX
   if(solver_params->type==MIXEDCGMMSND){
@@ -460,6 +463,26 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     exit(2);      
   }
 
+  if( g_debug_level > 2 ){
+    for( int shift = 0; shift < solver_params->no_shifts; shift++){
+      matrix_mult_nd f = Qtm_pm_ndpsi_shift;
+      if( solver_params->M_ndpsi == Qsw_pm_ndpsi ) 
+        f = Qsw_pm_ndpsi_shift;
+      g_shift = solver_params->shifts[shift]*solver_params->shifts[shift]; 
+      f(temp[0], temp[1], Pup[shift], Pdn[shift]);
+      g_shift = _default_g_shift;
+      diff(temp[0], temp[0], Qup, VOLUME/2);
+      diff(temp[1], temp[1], Qdn, VOLUME/2);
+      double diffnorm = square_norm(temp[0], VOLUME/2, 1) + square_norm(temp[1], VOLUME/2, 1); 
+      if( g_proc_id == 0 ){
+        printf("# solve_mms_nd residual check: %e\n", diffnorm);
+      }
+    }
+  }
+  if(g_debug_level > 2 || solver_params->external_inverter == QPHIX_INVERTER){
+    finalize_solver(temp, 2);
+  }
+
   return(iteration_count);
 }
 

From b5f3c012e8c2bfb831848b119c5cfbf5f6ff748c Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 20 Dec 2017 11:49:42 +0200
Subject: [PATCH 50/85] Running MG with DDalphaAMG as standard

---
 solver/monomial_solve.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index bf36482e4..e7d7c8f9f 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -186,12 +186,12 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
 
   // temporary field required by the QPhiX solve or by residual check
   spinor ** temp;
-  if(g_debug_level > 2 || solver_params->external_inverter == QPHIX_INVERTER){
+  if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER  && solver_params->type != MG)){
     init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
   }
 
 #ifdef TM_USE_QPHIX
-  if( solver_params->external_inverter == QPHIX_INVERTER ){
+  if( solver_params->external_inverter == QPHIX_INVERTER && solver_params->type != MG ){
     gamma5(temp[0], Q, VOLUME/2);
     iteration_count = invert_eo_qphix_oneflavour_mshift(P, temp[0],
                                                         solver_params->max_iter, solver_params->squared_solver_prec,
@@ -308,7 +308,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
       }
     }
   }
-  if(g_debug_level > 2 || solver_params->external_inverter == QPHIX_INVERTER){
+  if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER && solver_params->type != MG)){
     finalize_solver(temp, 1);
   }
 
@@ -322,12 +322,12 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
 
   // temporary field required by the QPhiX solve or by residual check
   spinor ** temp;
-  if(g_debug_level > 2 || solver_params->external_inverter == QPHIX_INVERTER){
+  if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER && solver_params->type != MG)){
     init_solver_field(&temp, VOLUMEPLUSRAND/2, 2);
   }
 
 #ifdef TM_USE_QPHIX
-  if(solver_params->external_inverter == QPHIX_INVERTER){
+  if(solver_params->external_inverter == QPHIX_INVERTER && solver_params->type != MG){
     //  gamma5 (M.M^dagger)^{-1} gamma5 = [ Q(+mu,eps) Q(-mu,eps) ]^{-1}
     gamma5(temp[0], Qup, VOLUME/2);
     gamma5(temp[1], Qdn, VOLUME/2);
@@ -479,7 +479,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
       }
     }
   }
-  if(g_debug_level > 2 || solver_params->external_inverter == QPHIX_INVERTER){
+  if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER  && solver_params->type != MG)){
     finalize_solver(temp, 2);
   }
 

From 2835afa89d0e5d4d6b1c455cb6fbd3dfe9178974 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 25 Apr 2018 12:18:11 +0300
Subject: [PATCH 51/85] Adding time printing to cg_her_nd

---
 solver/cg_her_nd.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/solver/cg_her_nd.c b/solver/cg_her_nd.c
index e0fe53411..fc46f6ce6 100644
--- a/solver/cg_her_nd.c
+++ b/solver/cg_her_nd.c
@@ -48,6 +48,7 @@
 #include "su3.h"
 #include "linalg_eo.h"
 #include "start.h"
+#include "gettime.h"
 #include "solver/matrix_mult_typedef_nd.h"
 #include "sub_low_ev.h"
 #include "solver_field.h"
@@ -60,6 +61,7 @@ int cg_her_nd(spinor * const P_up,spinor * P_dn, spinor * const Q_up, spinor * c
   double normsp, normsq, pro, err, alpha_cg, beta_cg, squarenorm;
   int iteration;
   double err1, err2;
+  double atime, etime, flops;
   spinor ** up_field = NULL;
   spinor ** dn_field = NULL;  
   const int nr_sf = 5;
@@ -67,6 +69,7 @@ int cg_her_nd(spinor * const P_up,spinor * P_dn, spinor * const Q_up, spinor * c
   init_solver_field(&up_field, VOLUMEPLUSRAND, nr_sf);
   init_solver_field(&dn_field, VOLUMEPLUSRAND, nr_sf);
 
+  atime = gettime();
   squarenorm = square_norm(Q_up, N, 1);
   squarenorm+= square_norm(Q_dn, N, 1);
   /*        !!!!   INITIALIZATION    !!!! */
@@ -130,12 +133,7 @@ int cg_her_nd(spinor * const P_up,spinor * P_dn, spinor * const Q_up, spinor * c
     }
 
     if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) {
-      assign(P_up, up_field[0], N);
-      assign(P_dn, dn_field[0], N);
-      g_sloppy_precision = 0;
-      finalize_solver(up_field, nr_sf);
-      finalize_solver(dn_field, nr_sf);
-      return(iteration+1);
+      break;
     }
 #ifdef _USE_HALFSPINOR
     if(((err*err <= eps_sq) && (rel_prec == 0)) || ((err*err <= eps_sq*squarenorm) && (rel_prec == 1))) {
@@ -156,10 +154,15 @@ int cg_her_nd(spinor * const P_up,spinor * P_dn, spinor * const Q_up, spinor * c
   assign(P_up, up_field[0], N);
   assign(P_dn, dn_field[0], N);
   g_sloppy_precision = 0;  
+
+  etime = gettime();
+  if(g_debug_level > 0 && g_proc_id == 0) {
+    printf("# CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime); 
+  }
   
   finalize_solver(up_field, nr_sf);
   finalize_solver(dn_field, nr_sf);
-  return(-1);
+  return(iteration+1);
 }
 
 

From 5d50f9b545165d0d5662d2a1b697b850e49d445b Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 25 Apr 2018 13:28:10 +0300
Subject: [PATCH 52/85] Adding clover operator to nddetratio

---
 monomial/nddetratio_monomial.c | 36 +++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/monomial/nddetratio_monomial.c b/monomial/nddetratio_monomial.c
index 81f96cfc1..773817599 100644
--- a/monomial/nddetratio_monomial.c
+++ b/monomial/nddetratio_monomial.c
@@ -38,6 +38,8 @@
 #include "operator/Hopping_Matrix.h"
 #include "phmc.h"
 #include "boundary.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
 #include "gamma.h"
 #include "operator/tm_operators_nd.h"
 #include "chebyshev_polynomial_nd.h"
@@ -47,6 +49,7 @@
 #include "monomial/monomial.h"
 #include "hamiltonian_field.h"
 #include "nddetratio_monomial.h"
+#include "DDalphaAMG_interface.h"
 
 
 
@@ -54,24 +57,43 @@ double nddetratio_acc(const int id, hamiltonian_field_t * const hf) {
   int iter;
   monomial * mnl = &monomial_list[id];
   double atime, etime;
+  matrix_mult_nd Q_pm_ndpsi = Qtm_pm_ndpsi, Q_dagger_ndpsi = Qtm_dagger_ndpsi, Q_ndpsi = Qtm_ndpsi;
   atime = gettime();
   
   g_mubar = mnl->mubar;
   g_epsbar = mnl->epsbar;
   boundary(mnl->kappa);
 
-  iter = cg_her_nd(mnl->w_fields[0], mnl->w_fields[1], mnl->pf, mnl->pf2,
-		   mnl->maxiter, mnl->accprec, g_relative_precision_flag, 
-		   VOLUME/2, &Qtm_pm_ndpsi);
-  Qtm_dagger_ndpsi(mnl->w_fields[2], mnl->w_fields[3],
-			mnl->w_fields[0], mnl->w_fields[1]);
+  if(mnl->type == NDCLOVERDETRATIO) {
+    Q_pm_ndpsi = Qsw_pm_ndpsi;
+    Q_dagger_ndpsi = Qsw_dagger_ndpsi;
+    Q_ndpsi = Qsw_ndpsi;
+    init_sw_fields();
+    sw_term((const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
+    sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
+  }
+  if( mnl->solver == MG ) {
+    iter = MG_solver_nd(mnl->w_fields[2], mnl->w_fields[3], mnl->pf, mnl->pf2,
+                        mnl->accprec, mnl->maxiter, g_relative_precision_flag, 
+                        VOLUME/2, g_gauge_field, Q_ndpsi);
+  } else {
+    iter = cg_her_nd(mnl->w_fields[0], mnl->w_fields[1], mnl->pf, mnl->pf2,
+                     mnl->maxiter, mnl->accprec, g_relative_precision_flag, 
+                     VOLUME/2, Q_pm_ndpsi);
+    Q_dagger_ndpsi(mnl->w_fields[2], mnl->w_fields[3],
+                   mnl->w_fields[0], mnl->w_fields[1]);
+  }
 
   g_mubar = mnl->mubar2;
   g_epsbar = mnl->epsbar2;
   boundary(mnl->kappa2);
 
-  Qtm_ndpsi(mnl->w_fields[0], mnl->w_fields[1],
-		  mnl->w_fields[2], mnl->w_fields[3]);
+  if(mnl->type == NDCLOVERDETRATIO) {
+    sw_term((const su3**) hf->gaugefield, mnl->kappa2, mnl->c_sw); 
+    sw_invert_nd(mnl->mubar2*mnl->mubar2 - mnl->epsbar2*mnl->epsbar2);
+  }
+  Q_ndpsi(mnl->w_fields[0], mnl->w_fields[1],
+            mnl->w_fields[2], mnl->w_fields[3]);
   
   mnl->energy1  = scalar_prod_r(mnl->pf , mnl->w_fields[0], VOLUME/2, 1);
   mnl->energy1 += scalar_prod_r(mnl->pf2, mnl->w_fields[1], VOLUME/2, 1);

From 9f2b568d38714a59833c3f084f409894c50e887d Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 25 Apr 2018 13:33:41 +0300
Subject: [PATCH 53/85] Enabling multiple measurements for CORRELATOR

---
 meas/measurements.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/meas/measurements.c b/meas/measurements.c
index 83b99334d..338ea7fea 100644
--- a/meas/measurements.c
+++ b/meas/measurements.c
@@ -61,15 +61,11 @@ int init_measurements(){
     if(measurement_list[i].type == ONLINE) {
       measurement_list[i].measurefunc = &correlators_measurement;
       measurement_list[i].max_source_slice = g_nproc_t*T;
-      measurement_list[i].no_samples = 1;
-      measurement_list[i].all_time_slices = 0;
     }
 
     if(measurement_list[i].type == PIONNORM) {
       measurement_list[i].measurefunc = &pion_norm_measurement;
       measurement_list[i].max_source_slice = g_nproc_z*LZ;
-      measurement_list[i].no_samples = 1;
-      measurement_list[i].all_time_slices = 0;
     }
     
     if(measurement_list[i].type == POLYAKOV) {

From 94f1f9a616723a002ef963166aaf4bc0e03e9a80 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 25 Apr 2018 13:34:31 +0300
Subject: [PATCH 54/85] Adding NDCLOVERDETRATIO to monomial

---
 monomial/monomial.c | 15 +++++++++++++--
 monomial/monomial.h |  1 +
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/monomial/monomial.c b/monomial/monomial.c
index ba206a934..3ab062210 100644
--- a/monomial/monomial.c
+++ b/monomial/monomial.c
@@ -144,7 +144,7 @@ int add_monomial(const int type) {
   monomial_list[no_monomials].rat.crange[1] = 11;
 
   monomial_list[no_monomials].initialised = 1;
-  if(monomial_list[no_monomials].type == NDDETRATIO || monomial_list[no_monomials].type == CLOVERDETRATIORW) {
+  if(monomial_list[no_monomials].type == NDDETRATIO || monomial_list[no_monomials].type == NDCLOVERDETRATIO || monomial_list[no_monomials].type == CLOVERDETRATIORW) {
     monomial_list[no_monomials].timescale = -5;
   }
 
@@ -162,7 +162,7 @@ int init_monomials(const int V, const int even_odd_flag) {
   for(int i = 0; i < no_monomials; i++) {
     if((monomial_list[i].type != GAUGE) && (monomial_list[i].type != SFGAUGE)) no++;
     /* non-degenerate monomials need two pseudo fermion fields */
-    if((monomial_list[i].type == NDPOLY) || (monomial_list[i].type == NDDETRATIO) || 
+    if((monomial_list[i].type == NDPOLY) || (monomial_list[i].type == NDDETRATIO) || (monomial_list[i].type == NDCLOVERDETRATIO) || 
        (monomial_list[i].type == NDCLOVER) || (monomial_list[i].type == NDRAT)||
        (monomial_list[i].type == NDRATCOR) || (monomial_list[i].type == NDCLOVERRATCOR) ||
        (monomial_list[i].type == NDCLOVERRAT)) no++;
@@ -460,6 +460,17 @@ int init_monomials(const int V, const int even_odd_flag) {
 	  printf("# Initialised monomial of type NDDETRATIO, no_monomials= %d, currently only available for reweighting!\n", no_monomials);
 	}
       }
+      else if(monomial_list[i].type == NDCLOVERDETRATIO) {
+	monomial_list[i].hbfunction = &dummy_heatbath;
+	monomial_list[i].accfunction = &nddetratio_acc;
+	monomial_list[i].derivativefunction = NULL;
+	monomial_list[i].pf2 = __pf+no*V;
+	monomial_list[i].timescale = -5;
+	no++;
+	if(g_proc_id == 0 && g_debug_level > 1) {
+	  printf("# Initialised monomial of type NDCLOVERDETRATIO, no_monomials= %d, currently only available for reweighting!\n", no_monomials);
+	}
+      }
     }
     else {
       monomial_list[i].pf = NULL;
diff --git a/monomial/monomial.h b/monomial/monomial.h
index c2321956f..00c25a1c8 100644
--- a/monomial/monomial.h
+++ b/monomial/monomial.h
@@ -50,6 +50,7 @@
 #define CLOVERRAT 19
 #define CLOVERRATCOR 20
 #define CLOVERDETRATIORW 21
+#define NDCLOVERDETRATIO 22
 
 #define max_no_monomials 30
 

From 5d8d4665ac08cbdb9472368f56d7fd0b2d1157ff Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 25 Apr 2018 13:36:23 +0300
Subject: [PATCH 55/85] Some changes to read_input

---
 read_input.h |  1 +
 read_input.l | 61 ++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/read_input.h b/read_input.h
index a4fa4281a..ce9fd9ccb 100644
--- a/read_input.h
+++ b/read_input.h
@@ -88,6 +88,7 @@ extern "C"
   extern int bc_flag;
   extern int online_measurement_flag;
   extern int online_measurement_freq;
+  extern int restoresu3_flag;
   extern int reweighting_flag;
   extern int reweighting_samples; 
   extern int no_samples;
diff --git a/read_input.l b/read_input.l
index 9bc182a78..f3539b325 100644
--- a/read_input.l
+++ b/read_input.l
@@ -147,6 +147,7 @@ static inline void rmQuotes(char *str){
   int bc_flag;
   int online_measurement_flag;
   int online_measurement_freq;
+  int restoresu3_flag;
   int reweighting_flag;
   int reweighting_samples;
   int no_samples;
@@ -287,6 +288,7 @@ static inline void rmQuotes(char *str){
 %x ORIENTEDPLAQUETTESMEAS
 %x GRADIENTFLOWMEAS
 
+%x RESTORESU3
 %x REWEIGH
 %x REWSAMPLES
 
@@ -328,6 +330,8 @@ static inline void rmQuotes(char *str){
 %x CLRATCORMONOMIAL
 %x NDCLRATMONOMIAL
 %x NDRATCORMONOMIAL
+%x NDDETRATMONOMIAL
+%x NDCLDETRATMONOMIAL
 %x NDCLRATCORMONOMIAL
 %x POLYMONOMIAL
 %x CLPOLYMONOMIAL
@@ -430,6 +434,7 @@ static inline void rmQuotes(char *str){
 ^PropagatorType{EQL}               BEGIN(WRPROPFLAG);
 ^RanluxdLevel{EQL}                 BEGIN(RLXDLEVEL);
 ^GCRPreconditioner{EQL}            BEGIN(PRECON);
+^RestoreSU3{EQL}                   BEGIN(RESTORESU3);
 ^ComputeReweightingFactor{EQL}     BEGIN(REWEIGH);
 ^NoReweightingSamples{EQL}         BEGIN(REWSAMPLES);
 ^SourceTimeSlice{EQL}              BEGIN(SOURCETS);
@@ -746,7 +751,7 @@ static inline void rmQuotes(char *str){
     sscanf(yytext, " %[a-zA-Z] = %d", name, &a);
     mg_no_shifts=a;
     mg_mms_mass=0;
-    if(myverbose) printf("  MG_Num_of_shifts set to %d line %d operator %d\n", mg_omp_num_threads, line_of_file, current_operator);
+    if(myverbose) printf("  MG_Num_of_shifts set to %d line %d operator %d\n", mg_no_shifts, line_of_file, current_operator);
   }
   {SPC}*MGMMSMass{EQL}{FLT}+ {
     sscanf(yytext, " %[a-zA-Z] = %lf", name, &c);
@@ -1432,6 +1437,11 @@ static inline void rmQuotes(char *str){
     strcpy((*mnl).name, "NDDETRATIO");
     g_running_phmc = 1;
   }
+  else if(strcmp(yytext, "NDCLOVERDETRATIO")==0) {
+    mnl->type = NDCLOVERDETRATIO;
+    strcpy((*mnl).name, "NDDCLOVERETRATIO");
+    g_running_phmc = 1;
+  }
   else if(strcmp(yytext, "NDPOLY")==0) {
     mnl->type = NDPOLY;
     strcpy((*mnl).name, "NDPOLY");
@@ -1514,10 +1524,13 @@ static inline void rmQuotes(char *str){
   else if(mnl->type == NDRAT) BEGIN(NDRATMONOMIAL);
   else if(mnl->type == RAT) BEGIN(RATMONOMIAL);
   else if(mnl->type == NDCLOVERRAT) BEGIN(NDCLRATMONOMIAL);
+  else if(mnl->type == NDDETRATIO) BEGIN(NDDETRATMONOMIAL);
   else if(mnl->type == CLOVERRAT) BEGIN(CLRATMONOMIAL);
   else if(mnl->type == NDRATCOR) BEGIN(NDRATCORMONOMIAL);
   else if(mnl->type == RATCOR) BEGIN(RATCORMONOMIAL);
   else if(mnl->type == NDCLOVERRATCOR) BEGIN(NDCLRATCORMONOMIAL);
+  else if(mnl->type == NDDETRATIO) BEGIN(NDDETRATMONOMIAL);
+  else if(mnl->type == NDCLOVERDETRATIO) BEGIN(NDCLDETRATMONOMIAL);
   else if(mnl->type == CLOVERRATCOR) BEGIN(CLRATCORMONOMIAL);
   else if(mnl->type == POLY || mnl->type == POLYDETRATIO)  {
           fprintf(stderr,"starting to parse poly(detratio) monomial\n");
@@ -1531,9 +1544,9 @@ static inline void rmQuotes(char *str){
 
 
 
-<DETMONOMIAL,GAUGEMONOMIAL,NDPOLYMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,CLPOLYMONOMIAL,NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,RATMONOMIAL,CLRATMONOMIAL,RATCORMONOMIAL,CLRATCORMONOMIAL>{
+<DETMONOMIAL,GAUGEMONOMIAL,NDPOLYMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,CLPOLYMONOMIAL,NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,RATMONOMIAL,CLRATMONOMIAL,RATCORMONOMIAL,CLRATCORMONOMIAL,NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{
   {SPC}*Timescale{EQL}{DIGIT}+ {
-    if(mnl->type == NDDETRATIO) {
+    if(mnl->type == NDDETRATIO || mnl->type == NDCLOVERDETRATIO) {
       mnl->timescale = -5;
       if(myverbose) printf("  timescales set to %d line %d monomial %d since NDDETRATIO is not for MD evolution\n", a, line_of_file, current_monomial);
     }
@@ -1553,7 +1566,7 @@ static inline void rmQuotes(char *str){
   }
 }
 
-<CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,CLPOLYMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL>{
+<CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,CLPOLYMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL,NDCLDETRATMONOMIAL>{
   {SPC}*CSW{EQL}{FLT} {
     sscanf(yytext, " %[a-zA-Z] = %lf", name, &c);
     mnl->c_sw = c;
@@ -1577,7 +1590,7 @@ static inline void rmQuotes(char *str){
   }
 }
 
-<DETMONOMIAL,POLYMONOMIAL,NDPOLYMONOMIAL,CLPOLYMONOMIAL,NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,RATMONOMIAL,CLRATMONOMIAL,RATCORMONOMIAL,CLRATCORMONOMIAL>{
+<DETMONOMIAL,POLYMONOMIAL,NDPOLYMONOMIAL,CLPOLYMONOMIAL,NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,RATMONOMIAL,CLRATMONOMIAL,RATCORMONOMIAL,CLRATCORMONOMIAL,NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{
   {SPC}*Kappa{EQL}{FLT} {
     sscanf(yytext, " %[a-zA-Z] = %lf", name, &c);
     mnl->kappa = c;
@@ -1585,7 +1598,7 @@ static inline void rmQuotes(char *str){
   }
 }
 
-<DETMONOMIAL,POLYMONOMIAL,CLDETRATRWMONOMIAL>{
+<DETMONOMIAL,POLYMONOMIAL,CLDETRATRWMONOMIAL,NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{
   {SPC}*2KappaMu2{EQL}{FLT} {
     sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
     mnl->mu2 = c;
@@ -1609,7 +1622,7 @@ static inline void rmQuotes(char *str){
   }
 }
 
-<NDPOLYMONOMIAL,CLPOLYMONOMIAL,NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL>{
+<NDPOLYMONOMIAL,CLPOLYMONOMIAL,NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{
   {SPC}*2KappaMubar{EQL}{FLT} {
     sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
     mnl->mubar = c;
@@ -1622,7 +1635,19 @@ static inline void rmQuotes(char *str){
   }
 }
 
-<DETMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL>{
+<NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{
+  {SPC}*2KappaMubar2{EQL}{FLT} {
+    sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
+    mnl->mubar2 = c;
+    if(myverbose) printf("  2KappaMubar2 set to %f line %d monomial %d\n", c, line_of_file, current_monomial);
+  }
+  {SPC}*2KappaEpsbar2{EQL}{FLT} {
+    sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
+    mnl->epsbar2 = c;
+    if(myverbose) printf("  2KappaEpsbar2 set to %f line %d monomial %d\n", c, line_of_file, current_monomial);
+  }
+}
+<DETMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL,NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{
   {SPC}*UseExternalInverter{EQL}quda {
     if(myverbose) printf("  Use Quda inverter line %d monomial %d\n", line_of_file, current_monomial);
     mnl->solver_params.external_inverter = QUDA_INVERTER;
@@ -1673,7 +1698,7 @@ static inline void rmQuotes(char *str){
   }
 }
 
-<DETMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL>{
+<DETMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL,NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{
   {SPC}*ForcePrecision{EQL}{FLT} {
     sscanf(yytext, " %[a-zA-Z] = %lf",name , &c);
     mnl->forceprec = c;
@@ -1710,7 +1735,7 @@ static inline void rmQuotes(char *str){
   }
 }
 
-<DETMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL>{
+<DETMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL,NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{
   {SPC}*2KappaMu{EQL}{FLT} {
     sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
     mnl->mu = c;
@@ -2727,15 +2752,23 @@ static inline void rmQuotes(char *str){
 }
 <REWEIGH>yes {
   reweighting_flag = 1;
-  if(myverbose!=0) fprintf(stderr, "Compute reweighting factor\n");
+  if(myverbose!=0) printf("Compute reweighting factor\n");
 }
 <REWEIGH>no {
   reweighting_flag = 0;
-  if(myverbose!=0) fprintf(stderr, "Do not compute reweighting factor\n");
+  if(myverbose!=0) printf("Do not compute reweighting factor\n");
+}
+<RESTORESU3>yes {
+  restoresu3_flag = 1;
+  if(myverbose!=0) printf("Restore SU(3) gauge-field\n");
+}
+<RESTORESU3>no {
+  restoresu3_flag = 0;
+  if(myverbose!=0) printf("Do not restore SU(3) gauge-field\n");
 }
 <REWSAMPLES>{DIGIT}+ {
   reweighting_samples = atoi(yytext);
-  if(myverbose!=0) fprintf(stderr, "Number of reweighting samples set to %d\n", reweighting_samples);
+  if(myverbose!=0) printf("Number of reweighting samples set to %d\n", reweighting_samples);
 }
 
 <MIXCGIT>{DIGIT}+ {
@@ -2760,7 +2793,7 @@ static inline void rmQuotes(char *str){
   BEGIN(comment_caller);
 }
 
-<INITMONOMIAL,DETMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,NDPOLYMONOMIAL,NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,CLPOLYMONOMIAL,GAUGEMONOMIAL,INTEGRATOR,INITINTEGRATOR,INITMEASUREMENT,PIONNORMMEAS,ONLINEMEAS,ORIENTEDPLAQUETTESMEAS,GRADIENTFLOWMEAS,INITOPERATOR,TMOP,DBTMOP,OVERLAPOP,WILSONOP,CLOVEROP,DBCLOVEROP,POLYMONOMIAL,PLOOP,INITGPU,GPU,RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL,INITDEFLATION,DEFLATION,INITMULTIGRID,MULTIGRID,INITEXTERNALINVERTER,QUDAINVERTER,QPHIXINVERTER>{SPC}*\n   {
+<INITMONOMIAL,DETMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,NDPOLYMONOMIAL,NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,CLPOLYMONOMIAL,GAUGEMONOMIAL,INTEGRATOR,INITINTEGRATOR,INITMEASUREMENT,PIONNORMMEAS,ONLINEMEAS,ORIENTEDPLAQUETTESMEAS,GRADIENTFLOWMEAS,INITOPERATOR,TMOP,DBTMOP,OVERLAPOP,WILSONOP,CLOVEROP,DBCLOVEROP,POLYMONOMIAL,PLOOP,INITGPU,GPU,RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL,INITDEFLATION,DEFLATION,INITMULTIGRID,MULTIGRID,INITEXTERNALINVERTER,QUDAINVERTER,QPHIXINVERTER,NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{SPC}*\n   {
   line_of_file++;
 }
 <*>{SPC}*\n                       {

From e64478ede023a3046db1cff6d1a8a1bb025dcac9 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 25 Apr 2018 13:41:29 +0300
Subject: [PATCH 56/85] Replacing MGTEST with debug_level > 2

---
 DDalphaAMG_interface.c | 76 +++++++++++++-----------------------------
 DDalphaAMG_interface.h |  1 +
 2 files changed, 25 insertions(+), 52 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 2dfc06feb..277342379 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -28,6 +28,7 @@
 int mg_setup_iter;
 int mg_coarse_setup_iter;
 int mg_update_setup_iter;
+int mg_update_gauge;
 int mg_omp_num_threads;
 int mg_Nvec;
 int mg_lvl;
@@ -101,9 +102,6 @@ int MG_solver_eo(spinor * const Even_new, spinor * const Odd_new,
 #include "operator/clovertm_operators.h"
 #include "operator/Hopping_Matrix.h"
 
-//Enable to test the solution. It cost an application more of the operator. 
-//TODO: test all the operators interfaced and then undefine this flag.
-#define MGTEST
 //Enable variant for shifted operator in the ND sector.
 //The variant is used in case of initial guess for the squared operator.
 //It is faster and tests prove also to be safe (read Appendix A of arxiv:1801.##### by S.Bacchio et al.)
@@ -540,7 +538,7 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
   // Checking if initial guess is given
   sqnorm = square_norm(up_new, N, 1);
   sqnorm += square_norm(dn_new, N, 1);
-  if ( sqnorm>0 ) init_guess = 1;
+  if ( sqnorm > 1e-14 ) init_guess = 1;
 
   // In case of initial guess and squared operator, we do the inversion in two step and we need two more vectors
   if ( init_guess && (
@@ -575,7 +573,7 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
     new2tmp = solver_field[assign_solver_field++];
   }
 
-  // Reconstracting initial guess in case of oe
+  // Reconstructing initial guess in case of oe
   if ( init_guess && N==VOLUME/2 ) {
     init_solver_field(&oe_solver_field, VOLUMEPLUSRAND, 4);
     spinor* tmp11 = oe_solver_field[0];
@@ -583,17 +581,17 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
     spinor* tmp12 = oe_solver_field[2];
     spinor* tmp22 = oe_solver_field[3];
 
-#ifdef MGTEST
-    double differ[2];
-    f( tmp11, tmp12, up_new, dn_new);
-    diff( tmp11, tmp11, up_old, N);
-    diff( tmp12, tmp12, dn_old, N);
-    differ[0] = sqrt(square_norm(tmp11, N, 1)+square_norm(tmp12, N, 1));
-    differ[1] = sqrt(square_norm(up_old, N, 1)+square_norm(dn_old, N, 1));
+    if (g_debug_level > 2) {
+      double differ[2];
+      f( tmp11, tmp12, up_new, dn_new);
+      diff( tmp11, tmp11, up_old, N);
+      diff( tmp12, tmp12, dn_old, N);
+      differ[0] = sqrt(square_norm(tmp11, N, 1)+square_norm(tmp12, N, 1));
+      differ[1] = sqrt(square_norm(up_old, N, 1)+square_norm(dn_old, N, 1));
   
-    if(g_proc_id == 0)
-      printf("MG TEST: using initial guess. Relative residual = %e  \n", differ[0]/differ[1]);
-#endif
+      if(g_proc_id == 0)
+        printf("MG TEST: using initial guess. Relative residual = %e  \n", differ[0]/differ[1]);
+    }
 
     /* Reconstruct the even sites                */
     if (    f == Qtm_pm_ndpsi       ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0
@@ -680,25 +678,6 @@ static int MG_solve_nd( spinor * up_new, spinor * dn_new, spinor * const up_old,
     } 
     finalize_solver(oe_solver_field, 4);
   } 
-#ifdef MGTEST
-  else {
-    init_solver_field(&oe_solver_field, VOLUMEPLUSRAND, 2);
-    spinor* tmp1 = oe_solver_field[0];
-    spinor* tmp2 = oe_solver_field[1];
-
-    double differ[2];
-    f( tmp1, tmp2, up_new, dn_new);
-    diff( tmp1, tmp1, up_old, N);
-    diff( tmp2, tmp2, dn_old, N);
-    differ[0] = sqrt(square_norm(tmp1, N, 1)+square_norm(tmp2, N, 1));
-    differ[1] = sqrt(square_norm(up_old, N, 1)+square_norm(dn_old, N, 1));
-  
-    if(g_proc_id == 0)
-      printf("MG TEST: using initial guess. Relative residual = %e  \n", differ[0]/differ[1]);
-    finalize_solver(oe_solver_field, 2);
-  }
-#endif
-
 
   // Checking if the operator is in the list and compatible with N
   if (      f == Qtm_ndpsi ||           //  Gamma5 Dh    - Schur complement with csw = 0
@@ -1013,6 +992,11 @@ static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new,
     if (!mg_status.success) 
       printf("ERROR: the solver did not converge!\n");
   }
+
+  free(new1);
+  free(new2);
+  free(mg_odd_shifts);
+  free(mg_even_shifts);
   
   return mg_status.success;
 }
@@ -1215,20 +1199,16 @@ int MG_solver(spinor * const phi_new, spinor * const phi_old,
 
   success = MG_solve( phi_new, phi_old, mg_prec, N, f );
 
-#ifdef MGTEST
-  if(success) 
+  if(success && g_debug_level > 2) 
     success = MG_check( phi_new, phi_old, N, mg_prec, f );
-#endif
   
   if(!success) {
     MG_reset();
     MG_pre_solve(gf);
     success = MG_solve( phi_new, phi_old, mg_prec, N, f);
     
-#ifdef MGTEST
-    if(success) 
+    if(success && g_debug_level > 2) 
       success = MG_check( phi_new, phi_old, N, mg_prec, f );
-#endif
   }
   
   if(!success) {
@@ -1292,8 +1272,7 @@ int MG_solver_nd(spinor * const up_new, spinor * const dn_new,
 
   success = MG_solve_nd( up_new, dn_new, up_old, dn_old, mg_prec, N, f );
   
-#ifdef MGTEST
-  if(success) {
+  if(success && g_debug_level > 2) {
     success = MG_check_nd( up_new, dn_new, up_old, dn_old, N, mg_prec, f );
 
     if(!success) {
@@ -1303,17 +1282,14 @@ int MG_solver_nd(spinor * const up_new, spinor * const dn_new,
         success = MG_check_nd( up_new, dn_new, up_old, dn_old, N, mg_prec, f );
     }
   }
-#endif
   
   if(!success) {
     MG_reset();
     MG_pre_solve(gf);
     success = MG_solve_nd( up_new, dn_new, up_old, dn_old, mg_prec, N, f);
     
-#ifdef MGTEST
-    if(success) 
+    if(success && g_debug_level > 2) 
       success = MG_check_nd( up_new, dn_new, up_old, dn_old, N, mg_prec, f );
-#endif
   }
   
   if(!success) {
@@ -1387,20 +1363,16 @@ int MG_mms_solver_nd(spinor **const up_new, spinor **const dn_new,
 
   success = MG_mms_solve_nd( up_new, dn_new, up_old, dn_old, shifts, no_shifts, mg_prec, N, f );
   
-#ifdef MGTEST
-  if(success) 
+  if(success && g_debug_level > 2) 
     success = MG_mms_check_nd( up_new, dn_new, up_old, dn_old, shifts, no_shifts, N, mg_prec, f );
-#endif
   
   if(!success) {
     MG_reset();
     MG_pre_solve(gf);
     success = MG_mms_solve_nd( up_new, dn_new, up_old, dn_old, shifts, no_shifts, mg_prec, N, f);
     
-#ifdef MGTEST
-    if(success) 
+    if(success && g_debug_level > 2) 
       success = MG_mms_check_nd( up_new, dn_new, up_old, dn_old, shifts, no_shifts, N, mg_prec, f );
-#endif
   }
   
   if(!success) {
diff --git a/DDalphaAMG_interface.h b/DDalphaAMG_interface.h
index f7e3b094a..0fa8e75ca 100644
--- a/DDalphaAMG_interface.h
+++ b/DDalphaAMG_interface.h
@@ -31,6 +31,7 @@
 extern int mg_setup_iter;
 extern int mg_coarse_setup_iter;
 extern int mg_update_setup_iter;
+extern int mg_update_gauge;
 extern int mg_omp_num_threads;
 extern int mg_Nvec;
 extern int mg_lvl;

From 22f557fdc3a8b397e09dc18781291fcb34c51703 Mon Sep 17 00:00:00 2001
From: Finkenrath <j.finkenrath@cyi.ac.cy>
Date: Wed, 25 Apr 2018 13:46:58 +0300
Subject: [PATCH 57/85] Adding extra restoresu3

---
 update_gauge.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/update_gauge.c b/update_gauge.c
index d45123b4a..b8d91d99e 100644
--- a/update_gauge.c
+++ b/update_gauge.c
@@ -85,7 +85,8 @@ void update_gauge(const double step, hamiltonian_field_t * const hf) {
       exposu3(&w,&deriv);
       restoresu3(&v,&w);
       _su3_times_su3(w, v, *z);
-      _su3_assign(*z, w);
+      restoresu3(&v,&w);
+      _su3_assign(*z, v);
     }
   }
 

From 86a49aad16b8d2c616d47c1398dae78be1cb46aa Mon Sep 17 00:00:00 2001
From: Finkenrath <j.finkenrath@cyi.ac.cy>
Date: Wed, 25 Apr 2018 14:16:08 +0300
Subject: [PATCH 58/85] Making exposu3 and restoresu3 function similar to
 openQCD

---
 expo.c | 167 +++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 133 insertions(+), 34 deletions(-)

diff --git a/expo.c b/expo.c
index dcac9a983..f8581b63d 100644
--- a/expo.c
+++ b/expo.c
@@ -52,48 +52,135 @@
 #include "su3.h"
 #include "su3adj.h"
 #include "expo.h"
+#include "float.h"
+
+static double imag_det(const su3adj* p) {
+  double d,tos3,o3,os3;
+  tos3=2.0/sqrt(3.0);
+  o3=1.0/3.0;
+  os3=1.0/sqrt(3.0);
+  
+  d=tos3*(*p).d8*(o3*(*p).d8*(*p).d8-(*p).d3*(*p).d3)+2*((*p).d2*(*p).d4*(*p).d7-(*p).d1*(*p).d4*(*p).d6-(*p).d2*(*p).d5*(*p).d6-(*p).d1*(*p).d5*(*p).d7);
+  d+=(os3*(*p).d8-(*p).d3)*((*p).d4*(*p).d4+(*p).d5*(*p).d5)+(os3*(*p).d8+(*p).d3)*((*p).d6*(*p).d6+(*p).d7*(*p).d7)-tos3*(*p).d8*((*p).d1*(*p).d1+(*p).d2*(*p).d2);	
+  return d;
+}
+
+static void mul_su3alg(su3adj* p,double d) {
+  (*p).d1*=d;
+  (*p).d2*=d;
+  (*p).d3*=d;
+  (*p).d4*=d;
+  (*p).d5*=d;
+  (*p).d6*=d;
+  (*p).d7*=d;
+  (*p).d8*=d;
+}
 
 void exposu3(su3* const vr, const su3adj* const p) {
-  int i;
-  su3 ALIGN v,v2;
-  double ALIGN fac,r;
-  double ALIGN a,b;
-  _Complex double ALIGN a0,a1,a2,a1p;
+  int n,m,mm;
+  su3 ALIGN v,v2,vt;
+  su3adj pa;
+  double ALIGN d,tc;
+  _Complex double t;
+  _Complex double ALIGN p0,p1,p2;
+  _Complex double ALIGN q0,q1,q2;
+  static int init_flag=0, no_c;
+  static double *c;
+
+  if (init_flag==0) {
+    int k;
+    double fctr = 1.0;
+    no_c = 0;
 
-  /* it writes 'p=vec(h_{j,mu})' in matrix form 'v' */  
+    while (fctr>DBL_EPSILON) {
+      no_c++;
+      fctr/=(double)(no_c);
+    }
+    no_c += 7;
+    no_c += (no_c%2);
+ 
+    c=malloc((no_c+1)*sizeof(*c));
+   
+   c[0]=1.0;
+   for (k=0; k < no_c; k++)
+     c[k+1]=c[k]/(double)(k+1);
+
+    init_flag=1;
+  }
+  
   _make_su3(v,*p);
+  _su3_times_su3(v2,v,v);
+  tc = -2.0*(v2.c00 +v2.c11+v2.c22);
+  
+  pa.d1=(*p).d1;
+  pa.d2=(*p).d2;
+  pa.d3=(*p).d3;
+  pa.d4=(*p).d4;
+  pa.d5=(*p).d5;
+  pa.d6=(*p).d6;
+  pa.d7=(*p).d7;
+  pa.d8=(*p).d8;
+  
+  mm=0;
+  while (tc>1.0) {
+    mul_su3alg(&pa,0.5);
+    tc*=0.5;
+    mm+=1;
+  }
+  
+  /* it writes 'p=vec(h_{j,mu})' in matrix form 'v'  */
+  _make_su3(v,pa);
   /* calculates v^2 */
   _su3_times_su3(v2,v,v);
-  /* */
-  a = 0.5 * (creal(v2.c00) + creal(v2.c11) + creal(v2.c22));
-  /* 1/3 imaginary part of tr v*v2 */
-  b = 0.33333333333333333 * cimag(v.c00 * v2.c00 + v.c01 * v2.c10 + v.c02 * v2.c20 +
-                                  v.c10 * v2.c01 + v.c11 * v2.c11 + v.c12 * v2.c21 +
-                                  v.c20 * v2.c02 + v.c21 * v2.c12 + v.c22 * v2.c22  );
-  a0  = 0.16059043836821615e-9;
-  a1  = 0.11470745597729725e-10;
-  a2  = 0.76471637318198165e-12;
-  fac = 0.20876756987868099e-8;      /*  1/12! */
-  r   = 12.0;
-  for(i = 3; i <= 15; ++i)
-  {
-    a1p = a0 + a * a2;
-    a0 = fac + b * I * a2;
-    a2 = a1;
-    a1 = a1p;
-    fac *= r;
-    r -= 1.0;
+  /* t= -tr(X^2)/2*/
+  t = -0.5*(v2.c00 +v2.c11+v2.c22);
+  /* d= -1i * det(X)*/
+  d=-imag_det(p);
+ /*  printf(" d= %.16f and t=%.16f + 1i %.16f \n",d,creal(t),cimag(t));*/
+  
+  if(fabs(d)>(1.000001*(1.000002-fabs(t))))
+    printf("The norm of X is larger than 1 and N = %d \n", no_c);
+  
+  
+  p0=c[no_c];
+  p1=0.0;
+  p2=0.0;
+  
+  for (n=(no_c-1);n>=0;n--) {
+    q0=p0;
+    q1=p1;
+    q2=p2;
+    
+    p0=c[n]-I*d*q2;
+    p1=q0-t*q2;
+    p2=q1;
   }
+   
   /* vr = a0 + a1*v + a2*v2 */
-  vr->c00 = a0 + a1 * v.c00 + a2 * v2.c00;
-  vr->c01 =      a1 * v.c01 + a2 * v2.c01;
-  vr->c02 =      a1 * v.c02 + a2 * v2.c02;
-  vr->c10 =      a1 * v.c10 + a2 * v2.c10;
-  vr->c11 = a0 + a1 * v.c11 + a2 * v2.c11;
-  vr->c12 =      a1 * v.c12 + a2 * v2.c12;
-  vr->c20 =      a1 * v.c20 + a2 * v2.c20;
-  vr->c21 =      a1 * v.c21 + a2 * v2.c21;
-  vr->c22 = a0 + a1 * v.c22 + a2 * v2.c22;
+  vt.c00 = p0 + p1 * v.c00 + p2 * v2.c00;
+  vt.c01 =      p1 * v.c01 + p2 * v2.c01;
+  vt.c02 =      p1 * v.c02 + p2 * v2.c02;
+  vt.c10 =      p1 * v.c10 + p2 * v2.c10;
+  vt.c11 = p0 + p1 * v.c11 + p2 * v2.c11;
+  vt.c12 =      p1 * v.c12 + p2 * v2.c12;
+  vt.c20 =      p1 * v.c20 + p2 * v2.c20;
+  vt.c21 =      p1 * v.c21 + p2 * v2.c21;
+  vt.c22 = p0 + p1 * v.c22 + p2 * v2.c22;
+  
+  for(m=0;m<mm;m++) {
+    _su3_times_su3(v2,vt,vt);
+    vt=v2;
+  }
+  
+  vr->c00=vt.c00;
+  vr->c01=vt.c01; 
+  vr->c02=vt.c02; 
+  vr->c10=vt.c10;
+  vr->c11=vt.c11;
+  vr->c12=vt.c12;
+  vr->c20=vt.c20;
+  vr->c21=vt.c21;
+  vr->c22=vt.c22;
 }
 
 void exposu3_check(su3* const vr, const su3adj* const p, int im) {
@@ -135,6 +222,12 @@ void restoresu3(su3* const vr, const su3* const u) {
   vr->c20 = conj(vr->c01 * vr->c12 - vr->c02 * vr->c11);
   vr->c21 = conj(vr->c02 * vr->c10 - vr->c00 * vr->c12);
   vr->c22 = conj(vr->c00 * vr->c11 - vr->c01 * vr->c10);
+
+  /* compute  row 2 as the conjugate of the cross-product of 3 and 1 */
+  vr->c10 = conj(vr->c21 * vr->c02 - vr->c22 * vr->c01);
+  vr->c11 = conj(vr->c22 * vr->c00 - vr->c20 * vr->c02);
+  vr->c12 = conj(vr->c20 * vr->c01 - vr->c21 * vr->c00);
+
 }
 
 void restoresu3_in_place(su3* const u) {
@@ -156,6 +249,12 @@ void restoresu3_in_place(su3* const u) {
   u->c20 = conj(u->c01 * u->c12 - u->c02 * u->c11);
   u->c21 = conj(u->c02 * u->c10 - u->c00 * u->c12);
   u->c22 = conj(u->c00 * u->c11 - u->c01 * u->c10);
+
+  /* compute  row 2 as the conjugate of the cross-product of 3 and 1 */
+  u->c10 = conj(u->c21 * u->c02 - u->c22 * u->c01);
+  u->c11 = conj(u->c22 * u->c00 - u->c20 * u->c02);
+  u->c12 = conj(u->c20 * u->c01 - u->c21 * u->c00);
+
 }
                                 
 /* Exponentiates a hermitian 3x3 matrix Q */

From 867b24afd15a3e199b1f21d84f035c1d7d3d1b39 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Wed, 25 Apr 2018 14:18:56 +0300
Subject: [PATCH 59/85] Adding reweighting option for testing the restore of
 su3

---
 invert.c                             |  33 +++-
 meas/correlators.c                   | 229 +++++++++++++++++----------
 monomial/cloverdetratio_rwmonomial.c |  34 +++-
 3 files changed, 213 insertions(+), 83 deletions(-)

diff --git a/invert.c b/invert.c
index 366702761..365a25c5d 100644
--- a/invert.c
+++ b/invert.c
@@ -59,6 +59,7 @@
 #include "boundary.h"
 #include "solver/solver.h"
 #include "init/init.h"
+#include "init/init_gauge_tmp.h"
 #include "smearing/stout.h"
 #include "invert_eo.h"
 #include "monomial/monomial.h"
@@ -94,7 +95,7 @@
 #endif
 #include "meas/measurements.h"
 #include "source_generation.h"
-
+#include "expo.h"
 
 
 extern int nstore;
@@ -179,6 +180,9 @@ int main(int argc, char *argv[])
   j = init_gauge_field(VOLUMEPLUSRAND, 0);
   j += init_gauge_field_32(VOLUMEPLUSRAND, 0);  
 #endif
+  if(restoresu3_flag) {
+    j += init_gauge_tmp(VOLUMEPLUSRAND);
+  }
  
   if (j != 0) {
     fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n");
@@ -286,7 +290,19 @@ int main(int argc, char *argv[])
       fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename);
       exit(-2);
     }
-
+    if (restoresu3_flag) {
+      if (g_cart_id == 0) 
+        printf("# Restoring SU(3) matrices.\n");
+      for(int ix=0;ix<VOLUME;ix++) {
+        for(int mu=0;mu<4;mu++){
+          su3 *v, *w;
+          v=&(g_gauge_field[ix][mu]);
+          w=&(gauge_tmp[ix][mu]);
+          _su3_assign(*w,*v);
+          restoresu3_in_place(v);
+        }
+      }
+    }
 
     if (g_cart_id == 0) {
       printf("# Finished reading gauge field.\n");
@@ -294,6 +310,9 @@ int main(int argc, char *argv[])
     }
 #ifdef TM_USE_MPI
     xchange_gauge(g_gauge_field);
+    if (restoresu3_flag) {
+      xchange_gauge(gauge_tmp);
+    }
 #endif
     /*Convert to a 32 bit gauge field, after xchange*/
     convert_32_gauge_field(g_gauge_field_32, g_gauge_field, VOLUMEPLUSRAND);
@@ -305,6 +324,15 @@ int main(int argc, char *argv[])
       fflush(stdout);
     }
 
+    if (restoresu3_flag) {
+      double plaquette_old = measure_plaquette( (const su3**) gauge_tmp);
+      if (g_cart_id == 0) {
+        printf("# The computed plaquette value before restoring SU(3) is %e\n which differ from the new one of %e.\n",
+               plaquette_old / (6.*VOLUME*g_nproc), (plaquette_energy-plaquette_old) / (6.*VOLUME*g_nproc));
+        fflush(stdout);
+      }
+    }
+
     if (use_stout_flag == 1){
       params_smear.rho = stout_rho;
       params_smear.iterations = stout_no_iter;
@@ -436,6 +464,7 @@ int main(int argc, char *argv[])
 #endif
   free_blocks();
   free_dfl_subspace();
+  free_gauge_tmp();
   free_gauge_field();
   free_gauge_field_32();
   free_geometry_indices();
diff --git a/meas/correlators.c b/meas/correlators.c
index 9d6089acc..950557569 100644
--- a/meas/correlators.c
+++ b/meas/correlators.c
@@ -38,7 +38,9 @@
 #include "measurements.h"
 #include "correlators.h"
 #include "gettime.h"
-
+#include "DDalphaAMG_interface.h"
+#include "read_input.h"
+#include "init/init_gauge_tmp.h"
 
 /******************************************************
  *
@@ -67,9 +69,11 @@ void correlators_measurement(const int traj, const int id, const int ieo) {
 #endif
   FILE *ofs;
   char *filename;
-  char buf[100];
+  char *filename_tmp;
+  char buf[100], buf2[100];
   spinor phi;
   filename=buf;
+  filename_tmp = buf2;
 
   init_operators();
   if(no_operators < 1) {
@@ -106,7 +110,7 @@ void correlators_measurement(const int traj, const int id, const int ieo) {
     for(int ts = 0; ts < max_time_slices; ts++){
 
       if( max_samples == 1 && max_time_slices == 1 ){
-        sprintf(filename,"%s%06d", "onlinemeas." ,traj);
+        sprintf(filename,"%s%06d", "onlinemeas.", traj);
       } else if ( max_samples == 1 && max_time_slices > 1){
         sprintf(filename,"%s%06d.t%03d", "onlinemeas.", traj, ts );
       } else {
@@ -127,19 +131,22 @@ void correlators_measurement(const int traj, const int id, const int ieo) {
       }
       atime = gettime();
 
+      int runs = 1;
+      if (restoresu3_flag) runs = 2;
+
 #ifdef TM_USE_MPI
       sCpp = (double*) calloc(T, sizeof(double));
       sCpa = (double*) calloc(T, sizeof(double));
       sCp4 = (double*) calloc(T, sizeof(double));
       if(g_mpi_time_rank == 0) {
-        Cpp = (double*) calloc(g_nproc_t*T, sizeof(double));
-        Cpa = (double*) calloc(g_nproc_t*T, sizeof(double));
-        Cp4 = (double*) calloc(g_nproc_t*T, sizeof(double));
+        Cpp = (double*) calloc(g_nproc_t*T*runs, sizeof(double));
+        Cpa = (double*) calloc(g_nproc_t*T*runs, sizeof(double));
+        Cp4 = (double*) calloc(g_nproc_t*T*runs, sizeof(double));
       }
 #else
-      Cpp = (double*) calloc(T, sizeof(double));
-      Cpa = (double*) calloc(T, sizeof(double));
-      Cp4 = (double*) calloc(T, sizeof(double));
+      Cpp = (double*) calloc(T*runs, sizeof(double));
+      Cpa = (double*) calloc(T*runs, sizeof(double));
+      Cp4 = (double*) calloc(T*runs, sizeof(double));
 #endif
       source_generation_pion_only(g_spinor_field[0], g_spinor_field[1], 
 	    		      t0, sample, traj, measurement_list[id].seed);
@@ -148,87 +155,149 @@ void correlators_measurement(const int traj, const int id, const int ieo) {
       optr->prop0 = g_spinor_field[2];
       optr->prop1 = g_spinor_field[3];
 
-      // op_id = 0, index_start = 0, write_prop = 0
-      optr->inverter(0, 0, 0);
-
-      /* now we bring it to normal format */
-      /* here we use implicitly DUM_MATRIX and DUM_MATRIX+1 */
-      convert_eo_to_lexic(g_spinor_field[DUM_MATRIX], g_spinor_field[2], g_spinor_field[3]);
-      
-      /* now we sum only over local space for every t */
-      for(t = 0; t < T; t++) {
-        j = g_ipt[t][0][0][0];
-        res = 0.;
-        respa = 0.;
-        resp4 = 0.;
-        for(i = j; i < j+LX*LY*LZ; i++) {
-          res += _spinor_prod_re(g_spinor_field[DUM_MATRIX][i], g_spinor_field[DUM_MATRIX][i]);
-          _gamma0(phi, g_spinor_field[DUM_MATRIX][i]);
-          respa += _spinor_prod_re(g_spinor_field[DUM_MATRIX][i], phi);
-          _gamma5(phi, phi);
-          resp4 += _spinor_prod_im(g_spinor_field[DUM_MATRIX][i], phi);
+      for( int r = 0; r<runs; r++) {
+        
+        if (restoresu3_flag) {
+          for(int ix=0;ix<VOLUME;ix++) {
+            for(int mu=0;mu<4;mu++){
+              su3 *v, *w;
+              v=&(g_gauge_field[ix][mu]);
+              w=&(gauge_tmp[ix][mu]);
+              if(r == 0){
+                _su3_assign(*v,*w);
+              } else {
+                restoresu3_in_place(v);
+              }
+            }
+          }
+#ifdef TM_USE_MPI
+          xchange_gauge(g_gauge_field);
+#endif
+          mg_update_gauge = 1;
         }
 
+        // op_id = 0, index_start = 0, write_prop = 0
+        optr->inverter(0, 0, 0);
+
+        /* now we bring it to normal format */
+        /* here we use implicitly DUM_MATRIX and DUM_MATRIX+1 */
+        convert_eo_to_lexic(g_spinor_field[DUM_MATRIX], g_spinor_field[2], g_spinor_field[3]);
+      
+        /* now we sum only over local space for every t */
+        for(t = 0; t < T; t++) {
+          j = g_ipt[t][0][0][0];
+          res = 0.;
+          respa = 0.;
+          resp4 = 0.;
+          for(i = j; i < j+LX*LY*LZ; i++) {
+            res += _spinor_prod_re(g_spinor_field[DUM_MATRIX][i], g_spinor_field[DUM_MATRIX][i]);
+            _gamma0(phi, g_spinor_field[DUM_MATRIX][i]);
+            respa += _spinor_prod_re(g_spinor_field[DUM_MATRIX][i], phi);
+            _gamma5(phi, phi);
+            resp4 += _spinor_prod_im(g_spinor_field[DUM_MATRIX][i], phi);
+          }
+          
 #if defined TM_USE_MPI
-        MPI_Reduce(&res, &mpi_res, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
-        res = mpi_res;
-        MPI_Reduce(&respa, &mpi_respa, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
-        respa = mpi_respa;
-        MPI_Reduce(&resp4, &mpi_resp4, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
-        resp4 = mpi_resp4;
-        sCpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
-        sCpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
-        sCp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+          MPI_Reduce(&res, &mpi_res, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
+          res = mpi_res;
+          MPI_Reduce(&respa, &mpi_respa, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
+          respa = mpi_respa;
+          MPI_Reduce(&resp4, &mpi_resp4, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
+          resp4 = mpi_resp4;
+          sCpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+          sCpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+          sCp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
 #else
-        Cpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
-        Cpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
-        Cp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+          Cpp[t+g_nproc_t*T*r] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+          Cpa[t+g_nproc_t*T*r] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+          Cp4[t+g_nproc_t*T*r] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
 #endif
-      }
-
+        }
+        
 #ifdef TM_USE_MPI
-      /* some gymnastics needed in case of parallelisation */
-      if(g_mpi_time_rank == 0) {
-        MPI_Gather(sCpp, T, MPI_DOUBLE, Cpp, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
-        MPI_Gather(sCpa, T, MPI_DOUBLE, Cpa, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
-        MPI_Gather(sCp4, T, MPI_DOUBLE, Cp4, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
-      }
+        /* some gymnastics needed in case of parallelisation */
+        if(g_mpi_time_rank == 0) {
+          MPI_Gather(sCpp, T, MPI_DOUBLE, Cpp+g_nproc_t*T*r, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
+          MPI_Gather(sCpa, T, MPI_DOUBLE, Cpa+g_nproc_t*T*r, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
+          MPI_Gather(sCp4, T, MPI_DOUBLE, Cp4+g_nproc_t*T*r, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
+        }
 #endif
-
-      /* and write everything into a file */
-      if(g_mpi_time_rank == 0 && g_proc_coords[0] == 0) {
-        ofs = fopen(filename, "w");
-        fprintf( ofs, "1  1  0  %e  %e\n", Cpp[t0], 0.);
-        for(t = 1; t < g_nproc_t*T/2; t++) {
-          tt = (t0+t)%(g_nproc_t*T);
-          fprintf( ofs, "1  1  %d  %e  ", t, Cpp[tt]);
-          tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
-          fprintf( ofs, "%e\n", Cpp[tt]);
+        
+        /* and write everything into a file */
+        if(g_mpi_time_rank == 0 && g_proc_coords[0] == 0) {
+          if(runs > 1) {
+            sprintf(filename_tmp,"%s.r%02d", filename, r);
+            ofs = fopen(filename_tmp, "w");
+          } else {
+            ofs = fopen(filename, "w");
+          }
+          fprintf( ofs, "1  1  0  %e  %e\n", Cpp[t0+g_nproc_t*T*r], 0.);
+          for(t = 1; t < g_nproc_t*T/2; t++) {
+            tt = (t0+t)%(g_nproc_t*T);
+            fprintf( ofs, "1  1  %d  %e  ", t, Cpp[tt+g_nproc_t*T*r]);
+            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
+            fprintf( ofs, "%e\n", Cpp[tt+g_nproc_t*T*r]);
+          }
+          tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
+          fprintf( ofs, "1  1  %d  %e  %e\n", t, Cpp[tt+g_nproc_t*T*r], 0.);
+          
+          fprintf( ofs, "2  1  0  %e  %e\n", Cpa[t0+g_nproc_t*T*r], 0.);
+          for(t = 1; t < g_nproc_t*T/2; t++) {
+            tt = (t0+t)%(g_nproc_t*T);
+            fprintf( ofs, "2  1  %d  %e  ", t, Cpa[tt+g_nproc_t*T*r]);
+            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
+            fprintf( ofs, "%e\n", Cpa[tt+g_nproc_t*T*r]);
+          }
+          tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
+          fprintf( ofs, "2  1  %d  %e  %e\n", t, Cpa[tt+g_nproc_t*T*r], 0.);
+          
+          fprintf( ofs, "6  1  0  %e  %e\n", Cp4[t0+g_nproc_t*T*r], 0.);
+          for(t = 1; t < g_nproc_t*T/2; t++) {
+            tt = (t0+t)%(g_nproc_t*T);
+            fprintf( ofs, "6  1  %d  %e  ", t, Cp4[tt+g_nproc_t*T*r]);
+            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
+            fprintf( ofs, "%e\n", Cp4[tt+g_nproc_t*T*r]);
+          }
+          tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
+          fprintf( ofs, "6  1  %d  %e  %e\n", t, Cp4[tt+g_nproc_t*T*r], 0.);
+          fclose(ofs);
         }
-        tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
-        fprintf( ofs, "1  1  %d  %e  %e\n", t, Cpp[tt], 0.);
-
-        fprintf( ofs, "2  1  0  %e  %e\n", Cpa[t0], 0.);
-        for(t = 1; t < g_nproc_t*T/2; t++) {
-          tt = (t0+t)%(g_nproc_t*T);
-          fprintf( ofs, "2  1  %d  %e  ", t, Cpa[tt]);
-          tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
-          fprintf( ofs, "%e\n", Cpa[tt]);
+      }
+      if(g_mpi_time_rank == 0 && g_proc_coords[0] == 0 && runs == 2) {
+        sprintf(filename_tmp,"%s.diff", filename);
+        ofs = fopen(filename_tmp, "w");
+        fprintf( ofs, "1  1  0  %e  %e\n", Cpp[t0] - Cpp[t0+g_nproc_t*T], 0.);
+          for(t = 1; t < g_nproc_t*T/2; t++) {
+            tt = (t0+t)%(g_nproc_t*T);
+            fprintf( ofs, "1  1  %d  %e  ", t, Cpp[tt] - Cpp[tt+g_nproc_t*T]);
+            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
+            fprintf( ofs, "%e\n", Cpp[tt] - Cpp[tt+g_nproc_t*T]);
+          }
+          tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
+          fprintf( ofs, "1  1  %d  %e  %e\n", t, Cpp[tt] - Cpp[tt+g_nproc_t*T], 0.);
+          
+          fprintf( ofs, "2  1  0  %e  %e\n", Cpa[t0] - Cpa[t0+g_nproc_t*T], 0.);
+          for(t = 1; t < g_nproc_t*T/2; t++) {
+            tt = (t0+t)%(g_nproc_t*T);
+            fprintf( ofs, "2  1  %d  %e  ", t, Cpa[tt] - Cpa[tt+g_nproc_t*T]);
+            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
+            fprintf( ofs, "%e\n", Cpa[tt] - Cpa[tt+g_nproc_t*T]);
+          }
+          tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
+          fprintf( ofs, "2  1  %d  %e  %e\n", t, Cpa[tt] - Cpa[tt+g_nproc_t*T], 0.);
+          
+          fprintf( ofs, "6  1  0  %e  %e\n", Cp4[t0] - Cp4[t0+g_nproc_t*T], 0.);
+          for(t = 1; t < g_nproc_t*T/2; t++) {
+            tt = (t0+t)%(g_nproc_t*T);
+            fprintf( ofs, "6  1  %d  %e  ", t, Cp4[tt] - Cp4[tt+g_nproc_t*T]);
+            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
+            fprintf( ofs, "%e\n", Cp4[tt] - Cp4[tt+g_nproc_t*T]);
+          }
+          tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
+          fprintf( ofs, "6  1  %d  %e  %e\n", t, Cp4[tt] - Cp4[tt+g_nproc_t*T], 0.);
+          fclose(ofs);
         }
-        tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
-        fprintf( ofs, "2  1  %d  %e  %e\n", t, Cpa[tt], 0.);
 
-        fprintf( ofs, "6  1  0  %e  %e\n", Cp4[t0], 0.);
-        for(t = 1; t < g_nproc_t*T/2; t++) {
-          tt = (t0+t)%(g_nproc_t*T);
-          fprintf( ofs, "6  1  %d  %e  ", t, Cp4[tt]);
-          tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
-          fprintf( ofs, "%e\n", Cp4[tt]);
-        }
-        tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
-        fprintf( ofs, "6  1  %d  %e  %e\n", t, Cp4[tt], 0.);
-        fclose(ofs);
-      }
 #ifdef TM_USE_MPI
       if(g_mpi_time_rank == 0) {
         free(Cpp); free(Cpa); free(Cp4);
diff --git a/monomial/cloverdetratio_rwmonomial.c b/monomial/cloverdetratio_rwmonomial.c
index 6653a047b..11e1cc7b8 100644
--- a/monomial/cloverdetratio_rwmonomial.c
+++ b/monomial/cloverdetratio_rwmonomial.c
@@ -45,7 +45,10 @@
 #include "monomial/monomial.h"
 #include "boundary.h"
 #include "cloverdetratio_rwmonomial.h"
-
+#include "expo.h"
+#include "xchange/xchange.h"
+#include "init/init_gauge_tmp.h"
+#include "DDalphaAMG_interface.h"
 
 double cloverdetratio_rwacc(const int id, hamiltonian_field_t * const hf) {
   monomial * mnl = &monomial_list[id];
@@ -53,6 +56,20 @@ double cloverdetratio_rwacc(const int id, hamiltonian_field_t * const hf) {
   double atime, etime;
   atime = gettime();
 
+  if (restoresu3_flag) {
+    for(int ix=0;ix<VOLUME;ix++) {
+      for(int mu=0;mu<4;mu++){
+        su3 *v, *w;
+        v=&(hf->gaugefield[ix][mu]);
+        w=&(gauge_tmp[ix][mu]);
+        _su3_assign(*v,*w);
+      }
+    }
+#ifdef TM_USE_MPI
+    xchange_gauge(hf->gaugefield);
+#endif
+  }
+
   g_mu = mnl->mu2;
   boundary(mnl->kappa2);
 
@@ -62,6 +79,21 @@ double cloverdetratio_rwacc(const int id, hamiltonian_field_t * const hf) {
   g_mu3 = 0.;
   mnl->Qp(mnl->w_fields[1], mnl->pf);
 
+
+  if (restoresu3_flag) {
+    for(int ix=0;ix<VOLUME;ix++) {
+      for(int mu=0;mu<4;mu++){
+        su3 *v;
+        v=&(hf->gaugefield[ix][mu]);
+        restoresu3_in_place(v);
+      }
+    }
+#ifdef TM_USE_MPI
+    xchange_gauge(hf->gaugefield);
+#endif
+    mg_update_gauge = 1;
+  }
+
   g_mu3 = 0.;
   g_mu = mnl->mu;
   boundary(mnl->kappa);

From ed55735437802ba55ebd58a135d4f9b5201873a3 Mon Sep 17 00:00:00 2001
From: Finkenrath <j.finkenrath@cyi.ac.cy>
Date: Fri, 27 Apr 2018 16:37:26 +0300
Subject: [PATCH 60/85] Adding extra restoresu3

---
 update_momenta_fg.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/update_momenta_fg.c b/update_momenta_fg.c
index df89f5de1..b3a54194a 100644
--- a/update_momenta_fg.c
+++ b/update_momenta_fg.c
@@ -128,6 +128,7 @@ void update_momenta_fg(int * mnllist, double step, const int no,
       exposu3(&w,&deriv);
       restoresu3(&v,&w);
       _su3_times_su3(w, v, *z);
+      restoresu3(&v,&w);
       _su3_assign(*z, w);
     }
   }

From da53162f630dcdb2b0b217e2f57dd8de1ac8c072 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 27 Apr 2018 16:38:00 +0300
Subject: [PATCH 61/85] Solving undefined reference in case of compilation
 without DDalphaAMG

---
 DDalphaAMG_interface.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 277342379..4f37ceb0b 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -82,6 +82,14 @@ int MG_solver_eo(spinor * const Even_new, spinor * const Odd_new,
     exit(1);
 }
 
+int MG_solver_nd(spinor * const up_new, spinor * const dn_new,
+		 spinor * const up_old, spinor * const dn_old,
+		 const double precision, const int max_iter, const int rel_prec,
+		 const int N, su3 **gf, matrix_mult_nd f) {
+    printf("ERROR: MG_solver_nd called but DDalphaAMG library not included.\n");
+    exit(1);
+}
+
 #else
 #include <stdio.h>
 #include <stdlib.h>

From 94b4494e13ee35e6dbf85c273299c5265b7e2540 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 18 May 2018 19:49:10 +0300
Subject: [PATCH 62/85] Making exposu3 thread safe

---
 expo.c                            | 51 +++++++++++++++----------------
 expo.h                            | 11 ++++---
 global.h                          |  4 +++
 smearing/hex_stout_exclude_none.c |  1 +
 smearing/hex_stout_exclude_one.c  |  4 ++-
 smearing/hex_stout_exclude_two.c  |  1 +
 smearing/stout_stout_smear.c      |  1 +
 update_gauge.c                    |  2 ++
 update_momenta_fg.c               |  1 +
 9 files changed, 43 insertions(+), 33 deletions(-)

diff --git a/expo.c b/expo.c
index f8581b63d..6cc72b734 100644
--- a/expo.c
+++ b/expo.c
@@ -53,6 +53,7 @@
 #include "su3adj.h"
 #include "expo.h"
 #include "float.h"
+#include "global.h"
 
 static double imag_det(const su3adj* p) {
   double d,tos3,o3,os3;
@@ -76,6 +77,25 @@ static void mul_su3alg(su3adj* p,double d) {
   (*p).d8*=d;
 }
 
+void init_exposu3() {
+  int k;
+  double fctr = 1.0;
+  g_exposu3_no_c = 0;
+  
+  while (fctr>DBL_EPSILON) {
+    g_exposu3_no_c++;
+    fctr/=(double)(g_exposu3_no_c);
+  }
+  g_exposu3_no_c += 7;
+  g_exposu3_no_c += (g_exposu3_no_c%2);
+  
+  g_exposu3_c=malloc((g_exposu3_no_c+1)*sizeof(*g_exposu3_c));
+  
+  g_exposu3_c[0]=1.0;
+  for (k=0; k < g_exposu3_no_c; k++)
+    g_exposu3_c[k+1]=g_exposu3_c[k]/(double)(k+1);
+}
+
 void exposu3(su3* const vr, const su3adj* const p) {
   int n,m,mm;
   su3 ALIGN v,v2,vt;
@@ -84,29 +104,6 @@ void exposu3(su3* const vr, const su3adj* const p) {
   _Complex double t;
   _Complex double ALIGN p0,p1,p2;
   _Complex double ALIGN q0,q1,q2;
-  static int init_flag=0, no_c;
-  static double *c;
-
-  if (init_flag==0) {
-    int k;
-    double fctr = 1.0;
-    no_c = 0;
-
-    while (fctr>DBL_EPSILON) {
-      no_c++;
-      fctr/=(double)(no_c);
-    }
-    no_c += 7;
-    no_c += (no_c%2);
- 
-    c=malloc((no_c+1)*sizeof(*c));
-   
-   c[0]=1.0;
-   for (k=0; k < no_c; k++)
-     c[k+1]=c[k]/(double)(k+1);
-
-    init_flag=1;
-  }
   
   _make_su3(v,*p);
   _su3_times_su3(v2,v,v);
@@ -139,19 +136,19 @@ void exposu3(su3* const vr, const su3adj* const p) {
  /*  printf(" d= %.16f and t=%.16f + 1i %.16f \n",d,creal(t),cimag(t));*/
   
   if(fabs(d)>(1.000001*(1.000002-fabs(t))))
-    printf("The norm of X is larger than 1 and N = %d \n", no_c);
+    printf("The norm of X is larger than 1 and N = %d \n", g_exposu3_no_c);
   
   
-  p0=c[no_c];
+  p0=g_exposu3_c[g_exposu3_no_c];
   p1=0.0;
   p2=0.0;
   
-  for (n=(no_c-1);n>=0;n--) {
+  for (n=(g_exposu3_no_c-1);n>=0;n--) {
     q0=p0;
     q1=p1;
     q2=p2;
     
-    p0=c[n]-I*d*q2;
+    p0=g_exposu3_c[n]-I*d*q2;
     p1=q0-t*q2;
     p2=q1;
   }
diff --git a/expo.h b/expo.h
index dd0c3657f..8e5c1eef3 100644
--- a/expo.h
+++ b/expo.h
@@ -19,10 +19,11 @@
 #ifndef _EXPO_H
 #define _EXPO_H
 
-extern void exposu3(su3* const vr, const su3adj* const p);
-extern void exposu3_check(su3* const vr, const su3adj* const p, int im);
-extern void restoresu3(su3* const vr, const su3* const u);
-extern void restoresu3_in_place(su3* const u);
-extern void exposu3_in_place(su3* const u);
+void init_exposu3();
+void exposu3(su3* const vr, const su3adj* const p);
+void exposu3_check(su3* const vr, const su3adj* const p, int im);
+void restoresu3(su3* const vr, const su3* const u);
+void restoresu3_in_place(su3* const u);
+void exposu3_in_place(su3* const u);
 
 #endif
diff --git a/global.h b/global.h
index e57db79c6..1d13387ce 100644
--- a/global.h
+++ b/global.h
@@ -212,6 +212,10 @@ EXTERN int g_mpi_z_rank;
 EXTERN int g_mpi_ST_rank;
 EXTERN int g_nb_list[8];
 
+/* Variables for exposu3 */
+EXTERN int g_exposu3_no_c;
+EXTERN double * g_exposu3_c;
+
 /* OpenMP Kahan accumulation arrays */
 EXTERN _Complex double *g_omp_acc_cp;
 EXTERN double* g_omp_acc_re;
diff --git a/smearing/hex_stout_exclude_none.c b/smearing/hex_stout_exclude_none.c
index 0d696ac70..b94007d4d 100644
--- a/smearing/hex_stout_exclude_none.c
+++ b/smearing/hex_stout_exclude_none.c
@@ -3,6 +3,7 @@
 void stout_exclude_none(su3_tuple *buff_out, double const coeff, su3_tuple **staples, su3_tuple *buff_in)
 {
   static su3 tmp;
+  if (g_exposu3_no_c == 0) init_exposu3();
 
 #define _MULTIPLY_AND_EXPONENTIATE(x, principal) \
   { \
diff --git a/smearing/hex_stout_exclude_one.c b/smearing/hex_stout_exclude_one.c
index 4071e29a8..88b30dbfc 100644
--- a/smearing/hex_stout_exclude_one.c
+++ b/smearing/hex_stout_exclude_one.c
@@ -1,9 +1,11 @@
 #include "hex.ih"
+#include "global.h"
 
 void stout_exclude_one(su3_tuple **buff_out, double const coeff, su3_tuple **staples, su3_tuple *buff_in)
 {
   static su3 tmp;
-  
+  if (g_exposu3_no_c == 0) init_exposu3();
+
 #define _MULTIPLY_AND_EXPONENTIATE(x, principal, component) \
   { \
     _su3_times_su3d(tmp, staples[component / 4][x][component % 4], buff_in[x][principal]); \
diff --git a/smearing/hex_stout_exclude_two.c b/smearing/hex_stout_exclude_two.c
index 921457bbe..a2052daae 100644
--- a/smearing/hex_stout_exclude_two.c
+++ b/smearing/hex_stout_exclude_two.c
@@ -3,6 +3,7 @@
 void stout_exclude_two(su3_tuple **buff_out, double const coeff, su3_tuple **staples, su3_tuple *buff_in)
 {
   static su3 tmp;
+  if (g_exposu3_no_c == 0) init_exposu3();
 
 #define _MULTIPLY_AND_EXPONENTIATE(x, principal, component) \
   { \
diff --git a/smearing/stout_stout_smear.c b/smearing/stout_stout_smear.c
index e0a752913..cd92c7e95 100644
--- a/smearing/stout_stout_smear.c
+++ b/smearing/stout_stout_smear.c
@@ -14,6 +14,7 @@ int stout_smear(su3_tuple *m_field_out, struct stout_parameters const *params, s
     buffer = (su3_tuple*)(((unsigned long int)(buffer) + ALIGN_BASE) & ~ALIGN_BASE);
 #endif
     
+    if (g_exposu3_no_c == 0) init_exposu3();
     if (buffer == (su3_tuple*)NULL)
       return -1;
     initialized = 1;
diff --git a/update_gauge.c b/update_gauge.c
index b8d91d99e..5b45eaa2e 100644
--- a/update_gauge.c
+++ b/update_gauge.c
@@ -73,6 +73,8 @@ void update_gauge(const double step, hamiltonian_field_t * const hf) {
 #undef static
 #endif
 
+  if (g_exposu3_no_c == 0) init_exposu3();
+
 #ifdef TM_USE_OMP
 #pragma omp for
 #endif
diff --git a/update_momenta_fg.c b/update_momenta_fg.c
index b3a54194a..0b79f7c36 100644
--- a/update_momenta_fg.c
+++ b/update_momenta_fg.c
@@ -107,6 +107,7 @@ void update_momenta_fg(int * mnllist, double step, const int no,
   xchange_deri(hf->derivative);
 #endif
 
+  if (g_exposu3_no_c == 0) init_exposu3();
 
   /* #ifdef TM_USE_OMP
      #pragma omp parallel for

From 551b949746f6e9e615ffcb33142d29be58123920 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz_kostrzewa@fastmail.com>
Date: Wed, 11 Jul 2018 14:29:30 +0200
Subject: [PATCH 63/85] in update_gauge, init_exposu3 must be called OUTSIDE of
 the parallel section

---
 update_gauge.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/update_gauge.c b/update_gauge.c
index 5b45eaa2e..9957c1a84 100644
--- a/update_gauge.c
+++ b/update_gauge.c
@@ -54,6 +54,7 @@ void update_gauge(const double step, hamiltonian_field_t * const hf) {
 #ifdef DDalphaAMG
   MG_update_gauge(step);
 #endif
+  if (g_exposu3_no_c == 0) init_exposu3();
 
 #ifdef TM_USE_OMP
 #define static
@@ -73,8 +74,6 @@ void update_gauge(const double step, hamiltonian_field_t * const hf) {
 #undef static
 #endif
 
-  if (g_exposu3_no_c == 0) init_exposu3();
-
 #ifdef TM_USE_OMP
 #pragma omp for
 #endif

From b13408215b9db92b103df033aa54aef80f8ee543 Mon Sep 17 00:00:00 2001
From: Finkenrath <j.finkenrath@cyi.ac.cy>
Date: Wed, 25 Jul 2018 09:23:45 +0300
Subject: [PATCH 64/85] Bug Fix, fix in expo.c for 2tr(X)>1, fix in
 ndratcor_monomial.c the ev-call in case of clovernd

---
 expo.c                       | 2 +-
 monomial/ndratcor_monomial.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/expo.c b/expo.c
index 6cc72b734..93f99cc9d 100644
--- a/expo.c
+++ b/expo.c
@@ -132,7 +132,7 @@ void exposu3(su3* const vr, const su3adj* const p) {
   /* t= -tr(X^2)/2*/
   t = -0.5*(v2.c00 +v2.c11+v2.c22);
   /* d= -1i * det(X)*/
-  d=-imag_det(p);
+  d=-imag_det(&pa);
  /*  printf(" d= %.16f and t=%.16f + 1i %.16f \n",d,creal(t),cimag(t));*/
   
   if(fabs(d)>(1.000001*(1.000002-fabs(t))))
diff --git a/monomial/ndratcor_monomial.c b/monomial/ndratcor_monomial.c
index 01f9b69c5..e0f7aaf55 100644
--- a/monomial/ndratcor_monomial.c
+++ b/monomial/ndratcor_monomial.c
@@ -81,7 +81,7 @@ void ndratcor_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
   // we measure before the trajectory!
   if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) {
-    if(mnl->type != NDCLOVERRAT) phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi);
+    if(mnl->type != NDCLOVERRATCOR) phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi);
     else phmc_compute_ev(hf->traj_counter-1, id, &Qsw_pm_ndbipsi);
   }
 

From d206e97aee79d2515f78330695c54fe4e4e84ad7 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 4 Oct 2018 13:38:33 +0300
Subject: [PATCH 65/85] Hack for allowing the multishift inversion using QphiX
 AND DDalphaAMG. Only interpolated initial guess is used and no CGMMS initial
 guess is given from QphiX.

---
 solver/monomial_solve.c | 64 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index e7d7c8f9f..6ca441812 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -221,10 +221,35 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
       }
       // Number of initial guesses provided by gcmms
       // README: tunable value. 1 it's fine for now.
-      int no_cgmms_init_guess = 1;
+      int  no_cgmms_init_guess = 1;
       if(no_cgmms_init_guess > mg_nshifts) {
         no_cgmms_init_guess = mg_nshifts;
       }
+#ifdef TM_USE_QPHIX
+      if( solver_params->external_inverter == QPHIX_INVERTER && mg_nshifts < nshifts ) {
+        // TODO: no initial guess option with QphiX
+        no_cgmms_init_guess = 0;
+        spinor ** P_cg = P+(mg_nshifts - no_cgmms_init_guess);
+        double * shifts_start = solver_params->shifts;
+        solver_params->no_shifts = nshifts - (mg_nshifts - no_cgmms_init_guess);
+        solver_params->shifts += (mg_nshifts - no_cgmms_init_guess);
+        solver_params->type = CGMMS;
+        gamma5(temp[0], Q, VOLUME/2);
+        iteration_count = invert_eo_qphix_oneflavour_mshift(P, temp[0],
+                                                            solver_params->max_iter, solver_params->squared_solver_prec,
+                                                            solver_params->type, solver_params->rel_prec,
+                                                            *solver_params,
+                                                            solver_params->sloppy_precision,
+                                                            solver_params->compression_type);
+        for( int shift = 0; shift < solver_params->no_shifts; shift++) {
+          mul_gamma5(P[shift], VOLUME/2);
+        }
+        // Restoring solver_params
+        solver_params->no_shifts = nshifts;
+        solver_params->shifts = shifts_start;
+        solver_params->type = MG;
+        } else
+#endif // TM_USE_QPHIX  
       if (mg_nshifts < nshifts) {
         spinor ** P_cg = P+(mg_nshifts - no_cgmms_init_guess);
         double * shifts_start = solver_params->shifts;
@@ -380,11 +405,46 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
         while (mg_mms_mass < solver_params->shifts[mg_nshifts-1]) { mg_nshifts--; }
       }
       // Number of initial guesses provided by gcmms
-      // README: tunable value. 1 it's fine for now.
+      // README: tunable value. 2 it's fine for now.
       int no_cgmms_init_guess = 2;
       if(no_cgmms_init_guess > mg_nshifts) {
         no_cgmms_init_guess = mg_nshifts;
       }
+#ifdef TM_USE_QPHIX
+      if(solver_params->external_inverter == QPHIX_INVERTER && mg_nshifts < nshifts){
+        // TODO: no initial guess option with QphiX
+        no_cgmms_init_guess = 0;
+        spinor ** Pup_cg = Pup+(mg_nshifts - no_cgmms_init_guess);
+        spinor ** Pdn_cg = Pdn+(mg_nshifts - no_cgmms_init_guess);
+        double * shifts_start = solver_params->shifts;
+        solver_params->no_shifts = nshifts - (mg_nshifts - no_cgmms_init_guess);
+        solver_params->shifts += (mg_nshifts - no_cgmms_init_guess);
+        solver_params-> type = CGMMSND;
+        //  gamma5 (M.M^dagger)^{-1} gamma5 = [ Q(+mu,eps) Q(-mu,eps) ]^{-1}
+        gamma5(temp[0], Qup, VOLUME/2);
+        gamma5(temp[1], Qdn, VOLUME/2);
+        iteration_count = invert_eo_qphix_twoflavour_mshift(Pup_cg, Pdn_cg, temp[0], temp[1],
+                                                            solver_params->max_iter, solver_params->squared_solver_prec,
+                                                            solver_params->type, solver_params->rel_prec,
+                                                            *solver_params,
+                                                            solver_params->sloppy_precision,
+                                                            solver_params->compression_type);
+    
+        // the tmLQCD ND operator used for HMC is normalised by the inverse of the maximum eigenvalue
+        // so the inverse of Q^2 is normalised by the square of the maximum eigenvalue
+        // or, equivalently, the square of the inverse of the inverse
+        // note that in the QPhiX interface, we also correctly normalise the shifts
+        const double maxev_sq = (1.0/phmc_invmaxev)*(1.0/phmc_invmaxev);
+        for( int shift = 0; shift < solver_params->no_shifts; shift++){
+          mul_r_gamma5(Pup[shift], maxev_sq, VOLUME/2);
+          mul_r_gamma5(Pdn[shift], maxev_sq, VOLUME/2);
+        }
+        // Restoring solver_params
+        solver_params->no_shifts = nshifts;
+        solver_params->shifts = shifts_start;
+        solver_params-> type = MG;
+      } else
+#endif //TM_USE_QPHIX
       if (mg_nshifts < nshifts) {
         spinor ** Pup_cg = Pup+(mg_nshifts - no_cgmms_init_guess);
         spinor ** Pdn_cg = Pdn+(mg_nshifts - no_cgmms_init_guess);

From aa2de0a5f458d5e4d7e1949b7c7f2c5928268b50 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Thu, 4 Oct 2018 14:58:20 +0300
Subject: [PATCH 66/85] Removing space after new line in macro

---
 global.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/global.h b/global.h
index 1d13387ce..e814818f1 100644
--- a/global.h
+++ b/global.h
@@ -291,7 +291,7 @@ void fatal_error(char const *error, char const *function);
  * We use memcpy because is optimal when the amount to copy is known at compilation time. 
  * "sizeof(x) == sizeof(y) ? (signed)sizeof(x) : -1" is a compile time check that the types are compatible.
  */
-#define SWAP(x,y) do \ 
+#define SWAP(x,y) do \
 { unsigned char swap_temp[sizeof(x) == sizeof(y) ? (signed)sizeof(x) : -1]; \
   memcpy(swap_temp,&y,sizeof(x)); \
   memcpy(&y,&x,       sizeof(x)); \

From 35fb270fac5ca535de261c5fa3734413e9381d58 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz_kostrzewa@fastmail.com>
Date: Thu, 4 Oct 2018 14:50:00 +0200
Subject: [PATCH 67/85] in parsing the DDalphaAMG input parameters, clarify the
 intent when parsing mg_no_shifts and mg_mms_mass

---
 read_input.l | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/read_input.l b/read_input.l
index c6c81148f..523fb1f05 100644
--- a/read_input.l
+++ b/read_input.l
@@ -807,12 +807,15 @@ static inline double fltlist_next_token(int * const list_end){
   {SPC}*MGNumberOfShifts{EQL}{DIGIT}+ {
     sscanf(yytext, " %[a-zA-Z] = %d", name, &a);
     mg_no_shifts=a;
+    // when the number of shifts is specified, mg_mss_mass must be set to zero!
     mg_mms_mass=0;
     if(myverbose) printf("  MG_Num_of_shifts set to %d line %d operator %d\n", mg_no_shifts, line_of_file, current_operator);
   }
   {SPC}*MGMMSMass{EQL}{FLT}+ {
     sscanf(yytext, " %[a-zA-Z] = %lf", name, &c);
     mg_mms_mass=c;
+    // when mg_mms_mass is specified, mg_no_shifts should be set to zero!
+    mg_no_shifts=0;
     if(myverbose) printf("  MG_MMS_Mass set to %f line %d operator %d\n", mg_mms_mass, line_of_file, current_operator);
   }
   EndDDalphaAMG{SPC}* {

From 825f3bb8431d54669768349b685a5404fda5bbcf Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 12 Oct 2018 11:10:21 +0300
Subject: [PATCH 68/85] Fixed typo

---
 update_momenta_fg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/update_momenta_fg.c b/update_momenta_fg.c
index 0b79f7c36..d9cda48de 100644
--- a/update_momenta_fg.c
+++ b/update_momenta_fg.c
@@ -130,7 +130,7 @@ void update_momenta_fg(int * mnllist, double step, const int no,
       restoresu3(&v,&w);
       _su3_times_su3(w, v, *z);
       restoresu3(&v,&w);
-      _su3_assign(*z, w);
+      _su3_assign(*z, v);
     }
   }
 

From 7a52cb9b651c24415eec7f9cdf08d3e20029d173 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 12 Oct 2018 11:18:16 +0300
Subject: [PATCH 69/85] Updated documentation of DDalphaAMG with new
 parameters.

---
 doc/DDalphaAMG.tex | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/DDalphaAMG.tex b/doc/DDalphaAMG.tex
index 7fac1eda2..0a5f2cd64 100644
--- a/doc/DDalphaAMG.tex
+++ b/doc/DDalphaAMG.tex
@@ -88,6 +88,8 @@ \subsubsection{More advanced settings}
 	\item[\texttt{MGdtauUpdate:}] for HMC, $d\tau$ interval after that the setup is updated. If 0, it will be updated every time the configuration is changed.
 	\item[\texttt{MGrhoUpdate:}] for HMC, rho value of the monomial at which the setup have to be updated. It can be combined with \texttt{MGdtauUpdate} or used standalone.
 	\item[\texttt{MGUpdateSetupIter:}] for HMC, number of setup iterations to do on the fine level when the setup has to be updated.
+	\item[\texttt{MGNumberOfShifts:}] for MG in multi-shift systems, number of shifted linear systems, N, to be solved by DDalphaAMG. MG will solve the N smaller shifts.
+	\item[\texttt{MGMMSMass:}] for MG in multi-shift systems, alternative to the previous. MG will solve all the mass-shifts smaller than the given value.
 \end{description}
 \subsubsection{Output analysis\label{sec:DDalphaAMG_output}}
 Running tmLQCD programs with the option \texttt{-v}, the full output of DDalphaAMG is shown. Here some hints on the informations given. Just before the setup, the full set of parameters is printed, with an output similar to the following:

From 27445a94ccd39fa5598efbdcb81843972937e30e Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 12 Oct 2018 11:54:11 +0300
Subject: [PATCH 70/85] Fixing read_input

---
 default_input_values.h |  1 +
 read_input.l           | 31 ++++++++++++++++++++++++++++---
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/default_input_values.h b/default_input_values.h
index 60832e11f..22d3ea79c 100644
--- a/default_input_values.h
+++ b/default_input_values.h
@@ -155,6 +155,7 @@
 #define _default_timescale 1
 #define _default_reweighting_flag 0
 #define _default_reweighting_samples 10
+#define _default_restoresu3_flag 0
 #define _default_source_type_flag 0
 #define _default_no_samples 1
 #define _default_online_measurement_flag 1
diff --git a/read_input.l b/read_input.l
index 523fb1f05..4762f2362 100644
--- a/read_input.l
+++ b/read_input.l
@@ -395,6 +395,7 @@ static inline double fltlist_next_token(int * const list_end){
 %x MNAME
 %x MCSTR
 %x MSOLVER
+%x RATMSOLVER
 %x NDMSOLVER
 %x GTYPE
 
@@ -1924,7 +1925,7 @@ static inline double fltlist_next_token(int * const list_end){
   }
 }
 
-<NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL>{
+<NDRATMONOMIAL,NDRATCORMONOMIAL,NDCLRATMONOMIAL,NDCLRATCORMONOMIAL,NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{
   {SPC}*Solver{EQL} {
    solver_caller=YY_START;
    BEGIN(NDMSOLVER);
@@ -1934,11 +1935,11 @@ static inline double fltlist_next_token(int * const list_end){
 <RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL>{
   {SPC}*Solver{EQL} {
    solver_caller=YY_START;
-   BEGIN(MSOLVER);
+   BEGIN(RATMSOLVER);
   }
 }
 
-<DETMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL,NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{
+<DETMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLDETRATRWMONOMIAL,RATMONOMIAL,RATCORMONOMIAL,CLRATMONOMIAL,CLRATCORMONOMIAL>{
   {SPC}*2KappaMu{EQL}{FLT} {
     sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
     mnl->mu = c;
@@ -2176,6 +2177,29 @@ static inline double fltlist_next_token(int * const list_end){
   }
 }
 
+<RATMSOLVER>{
+  rgmixedCG {
+    if(myverbose) printf("  Solver set to \"%s\" line %d monomial %d\n", yytext, line_of_file, current_monomial);
+    mnl->solver = RGMIXEDCG;
+    BEGIN(solver_caller);
+  }
+  cgmms {
+    if(myverbose) printf("  Solver set to \"%s\" line %d monomial %d\n", yytext, line_of_file, current_monomial);
+    mnl->solver = CGMMS;
+    BEGIN(solver_caller);
+  }
+  DDalphaAMG {
+#ifdef DDalphaAMG
+    if(myverbose) printf("  Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator);
+    mnl->solver = MG;
+    BEGIN(solver_caller);
+#else
+    printf("ERROR line %d operator %d: DDalphaAMG library not included\n", line_of_file, current_operator);
+    exit(1);
+#endif
+  }
+}
+
 <NDMSOLVER>{
   cgmmsnd {
     if(myverbose) printf("  Solver set to \"%s\" line %d monomial %d\n", yytext, line_of_file, current_monomial);
@@ -3164,6 +3188,7 @@ int read_input(char * conf_file){
   use_preconditioning = _default_use_preconditioning;
   stout_rho = _default_stout_rho;
   stout_no_iter = _default_stout_no_iter;
+  restoresu3_flag = _default_restoresu3_flag;
 
   /* check for reread ! */ 
   phmc_compute_evs = _default_phmc_compute_evs;

From 92beb930e687974b1bc0cc6c9dc42de1c746a2bd Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 12 Oct 2018 12:34:27 +0300
Subject: [PATCH 71/85] Giving init_exposu3 a better place in the world.

---
 init/init_gauge_field.c           | 3 +++
 init/init_stout_smear_vars.c      | 3 +++
 monomial/monomial.c               | 3 +++
 smearing/hex_stout_exclude_none.c | 1 -
 smearing/hex_stout_exclude_one.c  | 1 -
 smearing/hex_stout_exclude_two.c  | 1 -
 smearing/stout_stout_smear.c      | 1 -
 update_gauge.c                    | 1 -
 update_momenta_fg.c               | 2 --
 9 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/init/init_gauge_field.c b/init/init_gauge_field.c
index a4cdade3c..98eae6f2d 100644
--- a/init/init_gauge_field.c
+++ b/init/init_gauge_field.c
@@ -27,6 +27,7 @@
 #include "su3.h"
 #include "sse.h"
 #include "init_gauge_field.h"
+#include "expo.h"
 
 su3 * gauge_field = NULL;
 su3_32 * gauge_field_32 = NULL;
@@ -48,6 +49,8 @@ int init_gauge_field(const int V, const int back) {
   g_gauge_field_copy = NULL;
 #endif
 
+  if (g_exposu3_no_c == 0) init_exposu3();
+
   if((void*)(g_gauge_field = (su3**)calloc(V, sizeof(su3*))) == NULL) {
     printf ("malloc errno : %d\n",errno); 
     errno = 0;
diff --git a/init/init_stout_smear_vars.c b/init/init_stout_smear_vars.c
index 3bbb986a9..23ae299f8 100644
--- a/init/init_stout_smear_vars.c
+++ b/init/init_stout_smear_vars.c
@@ -28,6 +28,7 @@
 #include "global.h"
 #include "su3.h"
 #include "sse.h"
+#include "expo.h"
 #include "init_stout_smear_vars.h"
 
 su3 * gauge_field_saved;
@@ -91,6 +92,8 @@ int init_stout_smear_vars(const int V, const int stout_no_iter)
   k = 0;
   mu = 0;
 
+  if (g_exposu3_no_c == 0) init_exposu3();
+
   /*
    *  this is the field where we store the smeared force matrices \Sigma^{(k)}_\mu(x)
    *  eqtn (44) hep-lat/0311018
diff --git a/monomial/monomial.c b/monomial/monomial.c
index 3ab062210..ced50be24 100644
--- a/monomial/monomial.c
+++ b/monomial/monomial.c
@@ -159,6 +159,9 @@ int init_monomials(const int V, const int even_odd_flag) {
   spinor * __pf = NULL;
   double sw_mu=0., sw_k=0., sw_c=0.;
   double swn_mubar=0., swn_epsbar = 0., swn_k=0., swn_c=0.;
+
+  if (g_exposu3_no_c == 0) init_exposu3();
+  
   for(int i = 0; i < no_monomials; i++) {
     if((monomial_list[i].type != GAUGE) && (monomial_list[i].type != SFGAUGE)) no++;
     /* non-degenerate monomials need two pseudo fermion fields */
diff --git a/smearing/hex_stout_exclude_none.c b/smearing/hex_stout_exclude_none.c
index b94007d4d..0d696ac70 100644
--- a/smearing/hex_stout_exclude_none.c
+++ b/smearing/hex_stout_exclude_none.c
@@ -3,7 +3,6 @@
 void stout_exclude_none(su3_tuple *buff_out, double const coeff, su3_tuple **staples, su3_tuple *buff_in)
 {
   static su3 tmp;
-  if (g_exposu3_no_c == 0) init_exposu3();
 
 #define _MULTIPLY_AND_EXPONENTIATE(x, principal) \
   { \
diff --git a/smearing/hex_stout_exclude_one.c b/smearing/hex_stout_exclude_one.c
index 88b30dbfc..5327fde2e 100644
--- a/smearing/hex_stout_exclude_one.c
+++ b/smearing/hex_stout_exclude_one.c
@@ -4,7 +4,6 @@
 void stout_exclude_one(su3_tuple **buff_out, double const coeff, su3_tuple **staples, su3_tuple *buff_in)
 {
   static su3 tmp;
-  if (g_exposu3_no_c == 0) init_exposu3();
 
 #define _MULTIPLY_AND_EXPONENTIATE(x, principal, component) \
   { \
diff --git a/smearing/hex_stout_exclude_two.c b/smearing/hex_stout_exclude_two.c
index a2052daae..921457bbe 100644
--- a/smearing/hex_stout_exclude_two.c
+++ b/smearing/hex_stout_exclude_two.c
@@ -3,7 +3,6 @@
 void stout_exclude_two(su3_tuple **buff_out, double const coeff, su3_tuple **staples, su3_tuple *buff_in)
 {
   static su3 tmp;
-  if (g_exposu3_no_c == 0) init_exposu3();
 
 #define _MULTIPLY_AND_EXPONENTIATE(x, principal, component) \
   { \
diff --git a/smearing/stout_stout_smear.c b/smearing/stout_stout_smear.c
index cd92c7e95..e0a752913 100644
--- a/smearing/stout_stout_smear.c
+++ b/smearing/stout_stout_smear.c
@@ -14,7 +14,6 @@ int stout_smear(su3_tuple *m_field_out, struct stout_parameters const *params, s
     buffer = (su3_tuple*)(((unsigned long int)(buffer) + ALIGN_BASE) & ~ALIGN_BASE);
 #endif
     
-    if (g_exposu3_no_c == 0) init_exposu3();
     if (buffer == (su3_tuple*)NULL)
       return -1;
     initialized = 1;
diff --git a/update_gauge.c b/update_gauge.c
index 9957c1a84..b8d91d99e 100644
--- a/update_gauge.c
+++ b/update_gauge.c
@@ -54,7 +54,6 @@ void update_gauge(const double step, hamiltonian_field_t * const hf) {
 #ifdef DDalphaAMG
   MG_update_gauge(step);
 #endif
-  if (g_exposu3_no_c == 0) init_exposu3();
 
 #ifdef TM_USE_OMP
 #define static
diff --git a/update_momenta_fg.c b/update_momenta_fg.c
index d9cda48de..6819bf8c8 100644
--- a/update_momenta_fg.c
+++ b/update_momenta_fg.c
@@ -107,8 +107,6 @@ void update_momenta_fg(int * mnllist, double step, const int no,
   xchange_deri(hf->derivative);
 #endif
 
-  if (g_exposu3_no_c == 0) init_exposu3();
-
   /* #ifdef TM_USE_OMP
      #pragma omp parallel for
      #endif

From 5d65fec757cccb35f59d7ef55f0e81757fa46687 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 12 Oct 2018 12:50:26 +0300
Subject: [PATCH 72/85] Adding solver parameter for initial guess

---
 solver/monomial_solve.c     | 13 +++++++++++--
 solver/rg_mixed_cg_her_nd.c |  5 +----
 solver/solver_params.h      |  3 ++-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 6ca441812..bc8cbfe5a 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -102,6 +102,8 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
     init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
   }
 
+  solver_params.use_initial_guess = 0;
+
 #ifdef TM_USE_QPHIX
   if(solver_params.external_inverter == QPHIX_INVERTER){
     // using CG for the HMC, we always want to have the solution of (Q Q^dagger) x = b, which is equivalent to
@@ -184,6 +186,8 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
                  solver_params_t * solver_params){ 
   int iteration_count = 0; 
 
+  solver_params->use_initial_guess = 0;
+
   // temporary field required by the QPhiX solve or by residual check
   spinor ** temp;
   if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER  && solver_params->type != MG)){
@@ -302,14 +306,16 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
     double iter_local = 0;
     for(int i = solver_params->no_shifts-1; i>=0; i--){
       // preparing initial guess
-      init_guess_mms(P, Q, i, solver_params);
-      
+      init_guess_mms(P, Q, i, solver_params); 
+      solver_params.use_initial_guess = 1;
+     
       // inverting
       g_mu3 = solver_params->shifts[i]; 
       iter_local = rg_mixed_cg_her( P[i], Q, temp_params, solver_params->max_iter,
                                     solver_params->squared_solver_prec, solver_params->rel_prec, solver_params->sdim,
                                     solver_params->M_psi, f32);
       g_mu3 = _default_g_mu3;
+      solver_params.use_initial_guess = 0;
       if(iter_local == -1){
         return(-1);
       } else {
@@ -344,6 +350,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
                  spinor * const Qup, spinor * const Qdn, 
                  solver_params_t * solver_params){ 
   int iteration_count = 0; 
+  solver_params->use_initial_guess = 0;
 
   // temporary field required by the QPhiX solve or by residual check
   spinor ** temp;
@@ -506,12 +513,14 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     for(int i = solver_params->no_shifts-1; i>=0; i--){
       // preparing initial guess
       init_guess_mms_nd(Pup, Pdn, Qup, Qdn, i, solver_params);
+      solver_params->use_initial_guess = 1;
       
       // inverting
       g_shift = solver_params->shifts[i]*solver_params->shifts[i]; 
       iter_local = rg_mixed_cg_her_nd( Pup[i], Pdn[i], Qup, Qdn, temp_params, solver_params->max_iter,
                                        solver_params->squared_solver_prec, solver_params->rel_prec, solver_params->sdim, f, f32);
       g_shift = _default_g_shift;
+      solver_params->use_initial_guess = 0;
       if(iter_local == -1){
         return(-1);
       } else {
diff --git a/solver/rg_mixed_cg_her_nd.c b/solver/rg_mixed_cg_her_nd.c
index e2541d163..cf30c1eb2 100644
--- a/solver/rg_mixed_cg_her_nd.c
+++ b/solver/rg_mixed_cg_her_nd.c
@@ -250,10 +250,7 @@ int rg_mixed_cg_her_nd(spinor * const P_up, spinor * const P_dn, spinor * const
   
   zero_spinor_field_32(x_up,N); zero_spinor_field_32(x_dn,N);
 
-  guesssquarenorm = square_norm(P_up, N, 1);
-  guesssquarenorm += square_norm(P_dn, N, 1);
-
-  if(guesssquarenorm == 0) {
+  if(solver_params.use_initial_guess == 0) {
     assign(phigh_up,Q_up,N); assign(phigh_dn,Q_dn,N);
     assign(rhigh_up,Q_up,N); assign(rhigh_dn,Q_dn,N);
     rho_dp = sourcesquarenorm;
diff --git a/solver/solver_params.h b/solver/solver_params.h
index fac350515..8c301e46d 100644
--- a/solver/solver_params.h
+++ b/solver/solver_params.h
@@ -95,7 +95,8 @@ typedef struct {
   CompressionType compression_type;
   SloppyPrecision sloppy_precision;
   ExternalInverter external_inverter;
-  
+
+  int use_initial_guess;  
 } solver_params_t;
 
 #endif

From 011bf6b794e3ff0851546d8bc088cefc88d4a2fe Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 12 Oct 2018 12:50:56 +0300
Subject: [PATCH 73/85] Adding return of -1 in case of non-cnvergence

---
 solver/cg_her_nd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/solver/cg_her_nd.c b/solver/cg_her_nd.c
index fc46f6ce6..ab3bfe608 100644
--- a/solver/cg_her_nd.c
+++ b/solver/cg_her_nd.c
@@ -162,7 +162,8 @@ int cg_her_nd(spinor * const P_up,spinor * P_dn, spinor * const Q_up, spinor * c
   
   finalize_solver(up_field, nr_sf);
   finalize_solver(dn_field, nr_sf);
-  return(iteration+1);
+  if(iteration > max_iter) return(-1);
+  return(iteration);
 }
 
 

From 8b0377999eb48645acfef0a69f2ea5f416a85501 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 12 Oct 2018 12:55:58 +0300
Subject: [PATCH 74/85] Adding kappa2 for NDDETRATMONOMIAL,NDCLDETRATMONOMIAL

---
 read_input.l | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/read_input.l b/read_input.l
index 4762f2362..3a217357d 100644
--- a/read_input.l
+++ b/read_input.l
@@ -1802,7 +1802,7 @@ static inline double fltlist_next_token(int * const list_end){
   }
 }
 
-<DETMONOMIAL,POLYMONOMIAL,CLDETRATRWMONOMIAL,NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{
+<DETMONOMIAL,POLYMONOMIAL,CLDETRATRWMONOMIAL>{
   {SPC}*2KappaMu2{EQL}{FLT} {
     sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
     mnl->mu2 = c;
@@ -1815,6 +1815,14 @@ static inline double fltlist_next_token(int * const list_end){
   }
 }
 
+<NDDETRATMONOMIAL,NDCLDETRATMONOMIAL>{
+  {SPC}*Kappa2{EQL}{FLT} {
+    sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
+    mnl->kappa2 = c;
+    if(myverbose) printf("  kappa2 set to %f line %d monomial %d\n", c, line_of_file, current_monomial);
+  }
+}
+
 <NDCLRATMONOMIAL,CLRATMONOMIAL>{
   {SPC}*AddTrLog{EQL}yes {
     mnl->trlog = 1;

From 527f13cdf10ce4e858c79e739f35a0d43321c54e Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 12 Oct 2018 13:04:02 +0300
Subject: [PATCH 75/85] just typos

---
 DDalphaAMG_interface.c  | 4 ++--
 solver/monomial_solve.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/DDalphaAMG_interface.c b/DDalphaAMG_interface.c
index 4f37ceb0b..bc655806e 100644
--- a/DDalphaAMG_interface.c
+++ b/DDalphaAMG_interface.c
@@ -917,7 +917,7 @@ static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new,
   }
 
   // Checking if the operator is in the list and compatible with N
-  if (            f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
+  if (      f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
             f == Qtm_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and minus shift
             f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
             f == Qsw_tau1_ndpsi_sub_Ishift || // Gamma5 Dh tau1 - Schur complement with minus shift
@@ -933,7 +933,7 @@ static int MG_mms_solve_nd( spinor **const up_new, spinor **const dn_new,
            N==VOLUME?"":"Qsw_pm_ndpsi_shift");
 
   // Setting mubar, epsbar and shifts
-  if (            f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
+  if (      f == Qtm_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with csw = 0 and plus shift
             f == Qsw_tau1_ndpsi_add_Ishift || // Gamma5 Dh tau1 - Schur complement with plus shift
             f == Qtm_pm_ndpsi_shift ||  // (Gamma5 Dh tau1)^2 - Schur complement squared with csw = 0 and shift
             f == Qsw_pm_ndpsi_shift ) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift
diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index bc8cbfe5a..4c43e93db 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -307,7 +307,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
     for(int i = solver_params->no_shifts-1; i>=0; i--){
       // preparing initial guess
       init_guess_mms(P, Q, i, solver_params); 
-      solver_params.use_initial_guess = 1;
+      solver_params->use_initial_guess = 1;
      
       // inverting
       g_mu3 = solver_params->shifts[i]; 
@@ -315,7 +315,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
                                     solver_params->squared_solver_prec, solver_params->rel_prec, solver_params->sdim,
                                     solver_params->M_psi, f32);
       g_mu3 = _default_g_mu3;
-      solver_params.use_initial_guess = 0;
+      solver_params->use_initial_guess = 0;
       if(iter_local == -1){
         return(-1);
       } else {

From 770abe18a5d778c89b76ff27467c82e785a830b9 Mon Sep 17 00:00:00 2001
From: sbacchio <s.bacchio@gmail.com>
Date: Fri, 12 Oct 2018 13:16:52 +0300
Subject: [PATCH 76/85] removing CGMMS from MSOLVER

---
 read_input.l | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/read_input.l b/read_input.l
index 3a217357d..20581321b 100644
--- a/read_input.l
+++ b/read_input.l
@@ -2163,11 +2163,6 @@ static inline double fltlist_next_token(int * const list_end){
     mnl->solver = BICGSTAB;
     BEGIN(solver_caller);
   }
-  cgmms {
-    if(myverbose) printf("  Solver set to \"%s\" line %d monomial %d\n", yytext, line_of_file, current_monomial);
-    mnl->solver = CGMMS;
-    BEGIN(solver_caller);
-  }
   mixedbicgstab {
     if(myverbose) printf("  Solver set to \"%s\" line %d monomial %d\n", yytext, line_of_file, current_monomial);
     mnl->solver=MIXEDBICGSTAB;

From fe4bef4432804ea4b9c2a1d0a1e5cba590bd995c Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz_kostrzewa@fastmail.com>
Date: Wed, 11 Jul 2018 14:30:28 +0200
Subject: [PATCH 77/85] introduce multi-threaded update_momenta_fg

---
 update_momenta_fg.c | 176 ++++++++++++++++++++++----------------------
 1 file changed, 88 insertions(+), 88 deletions(-)

diff --git a/update_momenta_fg.c b/update_momenta_fg.c
index 6819bf8c8..8905c7c2f 100644
--- a/update_momenta_fg.c
+++ b/update_momenta_fg.c
@@ -1,8 +1,7 @@
 /***********************************************************************
  *
- * Copyright (C) 2001 Martin Hasebusch
- *
- * some changes by C. Urbach 2002-2008,2012
+ * Copyright (C) 2017 Jacob Finkenrath
+ *               2018 Bartosz Kostrzewa
  *
  * This file is part of tmLQCD.
  *
@@ -49,6 +48,77 @@
 #ifdef DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
+
+inline void calculate_fg(const double step_fg,
+                         hamiltonian_field_t * const hf){
+#ifdef TM_USE_OMP
+#define static
+#pragma omp parallel
+  {
+#endif
+
+  static su3 v,w;
+  su3 *z;
+  su3 *ztmp;
+  static su3adj deriv;
+  su3adj *Fm;
+
+#ifdef TM_USE_OMP
+#pragma omp for
+#endif
+  for(int i = 0; i < VOLUME; i++) { 
+    for(int mu = 0; mu < 4; mu++){
+      /* Cope gauge field to be temporarily updated */
+      z = &hf->gaugefield[i][mu];
+      ztmp = &gauge_fg[i][mu];
+      _su3_assign(*ztmp,*z);  
+ 
+      /* Calculate approximated force gradient term and update temporary gauge field */
+      Fm = &hf->derivative[i][mu];
+      _zero_su3adj(deriv);
+      _su3adj_assign_const_times_su3adj(deriv, step_fg, *Fm);
+      /*_su3adj_assign_const_times_su3adj(deriv, 0.0, *Fm);*/
+      exposu3(&w,&deriv);
+      restoresu3(&v,&w);
+      _su3_times_su3(w, v, *z);
+      restoresu3(&v,&w);
+      _su3_assign(*z, v);
+    }
+  }
+#ifdef TM_USE_OMP
+  } // OpenMP parallel section closing brace
+#undef static
+#endif
+}
+
+inline void fg_update_momenta_reset_gaugefield(const double step,
+                                               hamiltonian_field_t * const hf){
+#ifdef TM_USE_OMP
+#pragma omp parallel
+  {
+#endif
+  su3 *z;
+  su3 *ztmp;
+#ifdef TM_USE_OMP
+#pragma omp for
+#endif
+  for(int i = 0; i < VOLUME; i++) { 
+    for(int mu = 0; mu < 4; mu++){
+      /* Update momenta (the minus comes from an extra minus in trace_lambda)
+       and restore initial gauge field */
+      _su3adj_minus_const_times_su3adj(hf->momenta[i][mu], step, hf->derivative[i][mu]);
+  
+      z = &hf->gaugefield[i][mu];
+      ztmp = &gauge_fg[i][mu];
+      _su3_assign(*z,*ztmp);
+  
+    }
+  }
+#ifdef TM_USE_OMP
+  } // OpenMP parallel section closing brace
+#endif
+}
+
 /*******************************************************
  *
  * Temporarily updates the gauge field corresponding to 
@@ -56,7 +126,6 @@
  * the momenta
  *
  *******************************************************/
-
 void update_momenta_fg(int * mnllist, double step, const int no,
 		       hamiltonian_field_t * const hf, double step0) {
   double atime, etime;
@@ -64,39 +133,20 @@ void update_momenta_fg(int * mnllist, double step, const int no,
 #ifdef DDalphaAMG
   MG_update_gauge(0.0);
 #endif
+  if (g_exposu3_no_c == 0) init_exposu3();
 
-  /* #ifdef TM_USE_OMP
-     #define static
-     #pragma omp parallel
-     {
-     #endif
-  */
-
-  int i,mu;
-  double step_fg;
-  static su3 v,w;
-  su3 *z;
-  su3 *ztmp;
-  static su3adj deriv;
-  su3adj *Fm;
-
-  step_fg=-step0*step0/24;
-  /*
-     #ifdef _KOJAK_INST
-     #pragma pomp inst begin(updategauge)
-     #endif
-
-     #ifdef TM_USE_OMP
-     #pragma omp parallel for
-     #endif
-  */
+  double step_fg=-step0*step0/24;
 
+#ifdef TM_USE_OMP
+#pragma omp parallel for
+#endif
   for(int i = 0; i < (VOLUMEPLUSRAND + g_dbw2rand);i++) {
     for(int mu=0;mu<4;mu++) {
       _zero_su3adj(hf->derivative[i][mu]);
     }
   }
 
+  // calculate derivatives to estimate force gradient
   for(int k = 0; k < no; k++) {
     if(monomial_list[ mnllist[k] ].derivativefunction != NULL) {
       monomial_list[ mnllist[k] ].derivativefunction(mnllist[k], hf);
@@ -106,31 +156,8 @@ void update_momenta_fg(int * mnllist, double step, const int no,
 #ifdef TM_USE_MPI
   xchange_deri(hf->derivative);
 #endif
-
-  /* #ifdef TM_USE_OMP
-     #pragma omp parallel for
-     #endif
-  */
-
-  for(i = 0; i < VOLUME; i++) { 
-    for(mu = 0; mu < 4; mu++){
-      /* Cope gauge field to be temporarily updated */
-      z = &hf->gaugefield[i][mu];
-      ztmp = &gauge_fg[i][mu];
-      _su3_assign(*ztmp,*z);  
- 
-      /* Calculate approximated force gradient term and update temporary gauge field */
-      Fm = &hf->derivative[i][mu];
-      _zero_su3adj(deriv);
-      _su3adj_assign_const_times_su3adj(deriv, step_fg, *Fm);
-      /*_su3adj_assign_const_times_su3adj(deriv, 0.0, *Fm);*/
-      exposu3(&w,&deriv);
-      restoresu3(&v,&w);
-      _su3_times_su3(w, v, *z);
-      restoresu3(&v,&w);
-      _su3_assign(*z, v);
-    }
-  }
+  // estimate force gradient and propagate to gauge field
+  calculate_fg(step_fg, hf);
 
 #ifdef TM_USE_MPI
      /* for parallelization */
@@ -148,17 +175,10 @@ void update_momenta_fg(int * mnllist, double step, const int no,
    g_update_gauge_copy = 1;
    g_update_gauge_copy_32 = 1;
 
-
-   /* #ifdef TM_USE_OMP
-      #pragma omp parallel for
-      #endif
-   */
-   /* Calculate derivate based on the temporary updated
-      gauge field U'=ztmp:
-      1) Set derivative to zero
-      2) Recalcuate derivate
-   */
-    
+  // calculate forces with force-gradient updated gauge field
+#ifdef TM_USE_OMP
+#pragma omp parallel for
+#endif
   for(int i = 0; i < (VOLUMEPLUSRAND + g_dbw2rand);i++) {
     for(int mu=0;mu<4;mu++) {
       _zero_su3adj(hf->derivative[i][mu]);
@@ -174,25 +194,10 @@ void update_momenta_fg(int * mnllist, double step, const int no,
 #ifdef TM_USE_MPI
   xchange_deri(hf->derivative);
 #endif
-
-  for(i = 0; i < VOLUME; i++) { 
-    for(mu = 0; mu < 4; mu++){
-      /* Update momenta (the minus comes from an extra minus in trace_lambda)
-	 and restore initial gauge field */
-      _su3adj_minus_const_times_su3adj(hf->momenta[i][mu], step, hf->derivative[i][mu]);
-
-      z = &hf->gaugefield[i][mu];
-      ztmp = &gauge_fg[i][mu];
-      _su3_assign(*z,*ztmp);
-
-    }
-  }
-
-  /* #ifdef TM_USE_OMP
-     } /* OpenMP parallel closing brace /
-     #endif
-  */
   
+  // and finally update the momenta and reset the gauge field 
+  fg_update_momenta_reset_gaugefield(step, hf);
+
 #ifdef TM_USE_MPI
   /* for parallelization */
   xchange_gauge(hf->gaugefield);
@@ -218,9 +223,4 @@ void update_momenta_fg(int * mnllist, double step, const int no,
     printf("# Time gauge update: %e s\n", etime-atime); 
   } 
   return;
-
-  /* #ifdef _KOJAK_INST
-     #pragma pomp inst end(updategauge)
-     #endif
-  */
 }

From fe296d6e6f4ddf33e90f954c38bee83c5bc3245b Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz_kostrzewa@fastmail.com>
Date: Mon, 5 Nov 2018 11:27:48 +0100
Subject: [PATCH 78/85] remove a commented out line in update_momenta_fg

---
 update_momenta_fg.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/update_momenta_fg.c b/update_momenta_fg.c
index 8905c7c2f..15bb92fa5 100644
--- a/update_momenta_fg.c
+++ b/update_momenta_fg.c
@@ -77,7 +77,6 @@ inline void calculate_fg(const double step_fg,
       Fm = &hf->derivative[i][mu];
       _zero_su3adj(deriv);
       _su3adj_assign_const_times_su3adj(deriv, step_fg, *Fm);
-      /*_su3adj_assign_const_times_su3adj(deriv, 0.0, *Fm);*/
       exposu3(&w,&deriv);
       restoresu3(&v,&w);
       _su3_times_su3(w, v, *z);

From e9972663412a52ebceb8b2185a04927ba7ed9159 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz_kostrzewa@fastmail.com>
Date: Wed, 7 Nov 2018 13:38:17 +0100
Subject: [PATCH 79/85] use fatal_error in monomial_solve.c

---
 solver/monomial_solve.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 4c43e93db..8b060e36e 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -74,6 +74,7 @@
 #ifdef TM_USE_QPHIX
 #include "qphix_interface.h"
 #endif
+#include "fatal_error.h"
 
 #include <io/params.h>
 #include <io/spinor.h>
@@ -163,8 +164,7 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
     iteration_count =  MG_solver(P, Q, eps_sq, max_iter,rel_prec, N , g_gauge_field, f);
 #endif     
   else{
-    if(g_proc_id==0) printf("Error: solver not allowed for degenerate solve. Aborting...\n");
-    exit(2);
+    fatal_error("Error: solver not allowed for degenerate solve. Aborting...\n", "solve_degenerate");
   }
 
   if(g_debug_level > 2){
@@ -323,8 +323,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
       }
     }
   } else {
-    if(g_proc_id==0) printf("Error: solver not allowed for TM mms solve. Aborting...\n");
-    exit(2);      
+    fatal_error("Error: solver not allowed for TM mms solve. Aborting...\n", "solve_mms_tm");
   }
 
   if(g_debug_level > 2){
@@ -528,8 +527,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
       }
     }
   } else {
-    if(g_proc_id==0) printf("Error: solver not allowed for ND mms solve. Aborting...\n");
-    exit(2);      
+    fatal_error("Error: solver not allowed for ND mms solve. Aborting...\n", "solve_mss_nd");
   }
 
   if( g_debug_level > 2 ){

From 0ebb357c2e52a8e54b2d892cff50be52393f3a9a Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz_kostrzewa@fastmail.com>
Date: Wed, 7 Nov 2018 15:11:02 +0100
Subject: [PATCH 80/85] linkage of addup_ddummy was wrong

---
 xchange/xchange_deri.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xchange/xchange_deri.c b/xchange/xchange_deri.c
index e11441c1a..7edab924c 100644
--- a/xchange/xchange_deri.c
+++ b/xchange/xchange_deri.c
@@ -38,7 +38,7 @@
 #include "su3adj.h"
 #include "xchange_deri.h"
 
-inline void addup_ddummy(su3adj** const df, const int ix, const int iy) {
+static inline void addup_ddummy(su3adj** const df, const int ix, const int iy) {
   for(int mu = 0; mu < 4; mu++) {
     df[ix][mu].d1 += ddummy[iy][mu].d1;
     df[ix][mu].d2 += ddummy[iy][mu].d2;

From 9a5f4ad9003f4faf8281f7811928802b50117a93 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz_kostrzewa@fastmail.com>
Date: Wed, 7 Nov 2018 16:35:08 +0100
Subject: [PATCH 81/85] monomial_solve: perform residual check from debug level
 2 onwards

---
 solver/monomial_solve.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 8b060e36e..86e83a8e1 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -99,7 +99,7 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
 
   // temporary field required by the QPhiX solve or by residual check
   spinor** temp;
-  if(g_debug_level > 2 || solver_params.external_inverter == QPHIX_INVERTER){
+  if(g_debug_level > 1 || solver_params.external_inverter == QPHIX_INVERTER){
     init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
   }
 
@@ -167,7 +167,7 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
     fatal_error("Error: solver not allowed for degenerate solve. Aborting...\n", "solve_degenerate");
   }
 
-  if(g_debug_level > 2){
+  if(g_debug_level > 1){
     f(temp[0], P);
     diff(temp[0], temp[0], Q, VOLUME/2);
     double diffnorm = square_norm(temp[0], VOLUME/2, 1); 
@@ -175,7 +175,7 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
       printf("# solve_degenerate residual check: %e\n", diffnorm);
     }
   }
-  if(g_debug_level > 2 || solver_params.external_inverter == QPHIX_INVERTER){
+  if(g_debug_level > 1 || solver_params.external_inverter == QPHIX_INVERTER){
     finalize_solver(temp, 1);
   }
 
@@ -190,7 +190,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
 
   // temporary field required by the QPhiX solve or by residual check
   spinor ** temp;
-  if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER  && solver_params->type != MG)){
+  if(g_debug_level > 1 || (solver_params->external_inverter == QPHIX_INVERTER  && solver_params->type != MG)){
     init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
   }
 
@@ -326,7 +326,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
     fatal_error("Error: solver not allowed for TM mms solve. Aborting...\n", "solve_mms_tm");
   }
 
-  if(g_debug_level > 2){
+  if(g_debug_level > 1){
     for( int shift = 0; shift < solver_params->no_shifts; shift++){
       g_mu3 = solver_params->shifts[shift]; 
       solver_params->M_psi(temp[0], P[shift]);
@@ -338,7 +338,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
       }
     }
   }
-  if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER && solver_params->type != MG)){
+  if(g_debug_level > 1 || (solver_params->external_inverter == QPHIX_INVERTER && solver_params->type != MG)){
     finalize_solver(temp, 1);
   }
 
@@ -353,7 +353,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
 
   // temporary field required by the QPhiX solve or by residual check
   spinor ** temp;
-  if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER && solver_params->type != MG)){
+  if(g_debug_level > 1 || (solver_params->external_inverter == QPHIX_INVERTER && solver_params->type != MG)){
     init_solver_field(&temp, VOLUMEPLUSRAND/2, 2);
   }
 
@@ -530,7 +530,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     fatal_error("Error: solver not allowed for ND mms solve. Aborting...\n", "solve_mss_nd");
   }
 
-  if( g_debug_level > 2 ){
+  if( g_debug_level > 1 ){
     for( int shift = 0; shift < solver_params->no_shifts; shift++){
       matrix_mult_nd f = Qtm_pm_ndpsi_shift;
       if( solver_params->M_ndpsi == Qsw_pm_ndpsi ) 
@@ -546,7 +546,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
       }
     }
   }
-  if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER  && solver_params->type != MG)){
+  if(g_debug_level > 1 || (solver_params->external_inverter == QPHIX_INVERTER  && solver_params->type != MG)){
     finalize_solver(temp, 2);
   }
 

From 03290892c178dae59308123348e3427fdb16cdcd Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz_kostrzewa@fastmail.com>
Date: Tue, 1 Jan 2019 09:59:18 +0100
Subject: [PATCH 82/85] Revert "monomial_solve: perform residual check from
 debug level 2 onwards"

This reverts commit 9a5f4ad9003f4faf8281f7811928802b50117a93.
---
 solver/monomial_solve.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/solver/monomial_solve.c b/solver/monomial_solve.c
index 86e83a8e1..8b060e36e 100644
--- a/solver/monomial_solve.c
+++ b/solver/monomial_solve.c
@@ -99,7 +99,7 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
 
   // temporary field required by the QPhiX solve or by residual check
   spinor** temp;
-  if(g_debug_level > 1 || solver_params.external_inverter == QPHIX_INVERTER){
+  if(g_debug_level > 2 || solver_params.external_inverter == QPHIX_INVERTER){
     init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
   }
 
@@ -167,7 +167,7 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
     fatal_error("Error: solver not allowed for degenerate solve. Aborting...\n", "solve_degenerate");
   }
 
-  if(g_debug_level > 1){
+  if(g_debug_level > 2){
     f(temp[0], P);
     diff(temp[0], temp[0], Q, VOLUME/2);
     double diffnorm = square_norm(temp[0], VOLUME/2, 1); 
@@ -175,7 +175,7 @@ int solve_degenerate(spinor * const P, spinor * const Q, solver_params_t solver_
       printf("# solve_degenerate residual check: %e\n", diffnorm);
     }
   }
-  if(g_debug_level > 1 || solver_params.external_inverter == QPHIX_INVERTER){
+  if(g_debug_level > 2 || solver_params.external_inverter == QPHIX_INVERTER){
     finalize_solver(temp, 1);
   }
 
@@ -190,7 +190,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
 
   // temporary field required by the QPhiX solve or by residual check
   spinor ** temp;
-  if(g_debug_level > 1 || (solver_params->external_inverter == QPHIX_INVERTER  && solver_params->type != MG)){
+  if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER  && solver_params->type != MG)){
     init_solver_field(&temp, VOLUMEPLUSRAND/2, 1);
   }
 
@@ -326,7 +326,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
     fatal_error("Error: solver not allowed for TM mms solve. Aborting...\n", "solve_mms_tm");
   }
 
-  if(g_debug_level > 1){
+  if(g_debug_level > 2){
     for( int shift = 0; shift < solver_params->no_shifts; shift++){
       g_mu3 = solver_params->shifts[shift]; 
       solver_params->M_psi(temp[0], P[shift]);
@@ -338,7 +338,7 @@ int solve_mms_tm(spinor ** const P, spinor * const Q,
       }
     }
   }
-  if(g_debug_level > 1 || (solver_params->external_inverter == QPHIX_INVERTER && solver_params->type != MG)){
+  if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER && solver_params->type != MG)){
     finalize_solver(temp, 1);
   }
 
@@ -353,7 +353,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
 
   // temporary field required by the QPhiX solve or by residual check
   spinor ** temp;
-  if(g_debug_level > 1 || (solver_params->external_inverter == QPHIX_INVERTER && solver_params->type != MG)){
+  if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER && solver_params->type != MG)){
     init_solver_field(&temp, VOLUMEPLUSRAND/2, 2);
   }
 
@@ -530,7 +530,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
     fatal_error("Error: solver not allowed for ND mms solve. Aborting...\n", "solve_mss_nd");
   }
 
-  if( g_debug_level > 1 ){
+  if( g_debug_level > 2 ){
     for( int shift = 0; shift < solver_params->no_shifts; shift++){
       matrix_mult_nd f = Qtm_pm_ndpsi_shift;
       if( solver_params->M_ndpsi == Qsw_pm_ndpsi ) 
@@ -546,7 +546,7 @@ int solve_mms_nd(spinor ** const Pup, spinor ** const Pdn,
       }
     }
   }
-  if(g_debug_level > 1 || (solver_params->external_inverter == QPHIX_INVERTER  && solver_params->type != MG)){
+  if(g_debug_level > 2 || (solver_params->external_inverter == QPHIX_INVERTER  && solver_params->type != MG)){
     finalize_solver(temp, 2);
   }
 

From 876cc61542d1b28cd97afcd9bcf7362daeaa34df Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz_kostrzewa@fastmail.com>
Date: Thu, 2 May 2019 10:31:50 +0200
Subject: [PATCH 83/85] remove 'restoresu3_flag' from meas/correlators.c

---
 meas/correlators.c | 181 ++++++++++++++-------------------------------
 1 file changed, 56 insertions(+), 125 deletions(-)

diff --git a/meas/correlators.c b/meas/correlators.c
index 6bc2b1110..3f9cae151 100644
--- a/meas/correlators.c
+++ b/meas/correlators.c
@@ -68,12 +68,8 @@ void correlators_measurement(const int traj, const int id, const int ieo) {
   double *sCpp = NULL, *sCpa = NULL, *sCp4 = NULL;
 #endif
   FILE *ofs;
-  char *filename;
-  char *filename_tmp;
-  char buf[100], buf2[100];
+  char filename[100];
   spinor phi;
-  filename=buf;
-  filename_tmp = buf2;
 
   init_operators();
   if(no_operators < 1) {
@@ -131,22 +127,19 @@ void correlators_measurement(const int traj, const int id, const int ieo) {
       }
       atime = gettime();
 
-      int runs = 1;
-      if (restoresu3_flag) runs = 2;
-
 #ifdef TM_USE_MPI
       sCpp = (double*) calloc(T, sizeof(double));
       sCpa = (double*) calloc(T, sizeof(double));
       sCp4 = (double*) calloc(T, sizeof(double));
       if(g_mpi_time_rank == 0) {
-        Cpp = (double*) calloc(g_nproc_t*T*runs, sizeof(double));
-        Cpa = (double*) calloc(g_nproc_t*T*runs, sizeof(double));
-        Cp4 = (double*) calloc(g_nproc_t*T*runs, sizeof(double));
+        Cpp = (double*) calloc(g_nproc_t*T, sizeof(double));
+        Cpa = (double*) calloc(g_nproc_t*T, sizeof(double));
+        Cp4 = (double*) calloc(g_nproc_t*T, sizeof(double));
       }
 #else
-      Cpp = (double*) calloc(T*runs, sizeof(double));
-      Cpa = (double*) calloc(T*runs, sizeof(double));
-      Cp4 = (double*) calloc(T*runs, sizeof(double));
+      Cpp = (double*) calloc(T, sizeof(double));
+      Cpa = (double*) calloc(T, sizeof(double));
+      Cp4 = (double*) calloc(T, sizeof(double));
 #endif
       source_generation_pion_only(g_spinor_field[0], g_spinor_field[1], 
 	    		      t0, sample, traj, measurement_list[id].seed);
@@ -155,149 +148,87 @@ void correlators_measurement(const int traj, const int id, const int ieo) {
       optr->prop0 = g_spinor_field[2];
       optr->prop1 = g_spinor_field[3];
 
-      for( int r = 0; r<runs; r++) {
-        
-        if (restoresu3_flag) {
-          for(int ix=0;ix<VOLUME;ix++) {
-            for(int mu=0;mu<4;mu++){
-              su3 *v, *w;
-              v=&(g_gauge_field[ix][mu]);
-              w=&(gauge_tmp[ix][mu]);
-              if(r == 0){
-                _su3_assign(*v,*w);
-              } else {
-                restoresu3_in_place(v);
-              }
-            }
-          }
-#ifdef TM_USE_MPI
-          xchange_gauge(g_gauge_field);
-#endif
-          mg_update_gauge = 1;
-        }
-
-        // op_id = 0, index_start = 0, write_prop = 0
-        optr->inverter(0, 0, 0);
+      // op_id = 0, index_start = 0, write_prop = 0
+      optr->inverter(0, 0, 0);
 
-        /* now we bring it to normal format */
-        /* here we use implicitly DUM_MATRIX and DUM_MATRIX+1 */
-        convert_eo_to_lexic(g_spinor_field[DUM_MATRIX], g_spinor_field[2], g_spinor_field[3]);
+      /* now we bring it to normal format */
+      /* here we use implicitly DUM_MATRIX and DUM_MATRIX+1 */
+      convert_eo_to_lexic(g_spinor_field[DUM_MATRIX], g_spinor_field[2], g_spinor_field[3]);
       
-        /* now we sum only over local space for every t */
-        for(t = 0; t < T; t++) {
-          j = g_ipt[t][0][0][0];
-          res = 0.;
-          respa = 0.;
-          resp4 = 0.;
-          for(i = j; i < j+LX*LY*LZ; i++) {
-            res += _spinor_prod_re(g_spinor_field[DUM_MATRIX][i], g_spinor_field[DUM_MATRIX][i]);
-            _gamma0(phi, g_spinor_field[DUM_MATRIX][i]);
-            respa += _spinor_prod_re(g_spinor_field[DUM_MATRIX][i], phi);
-            _gamma5(phi, phi);
-            resp4 += _spinor_prod_im(g_spinor_field[DUM_MATRIX][i], phi);
-          }
-          
+      /* now we sum only over local space for every t */
+      for(t = 0; t < T; t++) {
+        j = g_ipt[t][0][0][0];
+        res = 0.;
+        respa = 0.;
+        resp4 = 0.;
+        for(i = j; i < j+LX*LY*LZ; i++) {
+          res += _spinor_prod_re(g_spinor_field[DUM_MATRIX][i], g_spinor_field[DUM_MATRIX][i]);
+          _gamma0(phi, g_spinor_field[DUM_MATRIX][i]);
+          respa += _spinor_prod_re(g_spinor_field[DUM_MATRIX][i], phi);
+          _gamma5(phi, phi);
+          resp4 += _spinor_prod_im(g_spinor_field[DUM_MATRIX][i], phi);
+        }
+        
 #if defined TM_USE_MPI
-          MPI_Reduce(&res, &mpi_res, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
-          res = mpi_res;
-          MPI_Reduce(&respa, &mpi_respa, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
-          respa = mpi_respa;
-          MPI_Reduce(&resp4, &mpi_resp4, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
-          resp4 = mpi_resp4;
-          sCpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
-          sCpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
-          sCp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+        MPI_Reduce(&res, &mpi_res, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
+        res = mpi_res;
+        MPI_Reduce(&respa, &mpi_respa, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
+        respa = mpi_respa;
+        MPI_Reduce(&resp4, &mpi_resp4, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
+        resp4 = mpi_resp4;
+        sCpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+        sCpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+        sCp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
 #else
-          Cpp[t+g_nproc_t*T*r] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
-          Cpa[t+g_nproc_t*T*r] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
-          Cp4[t+g_nproc_t*T*r] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+        Cpp[t+g_nproc_t*T] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+        Cpa[t+g_nproc_t*T] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+        Cp4[t+g_nproc_t*T] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
 #endif
         }
         
 #ifdef TM_USE_MPI
         /* some gymnastics needed in case of parallelisation */
         if(g_mpi_time_rank == 0) {
-          MPI_Gather(sCpp, T, MPI_DOUBLE, Cpp+g_nproc_t*T*r, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
-          MPI_Gather(sCpa, T, MPI_DOUBLE, Cpa+g_nproc_t*T*r, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
-          MPI_Gather(sCp4, T, MPI_DOUBLE, Cp4+g_nproc_t*T*r, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
+          MPI_Gather(sCpp, T, MPI_DOUBLE, Cpp+g_nproc_t*T, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
+          MPI_Gather(sCpa, T, MPI_DOUBLE, Cpa+g_nproc_t*T, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
+          MPI_Gather(sCp4, T, MPI_DOUBLE, Cp4+g_nproc_t*T, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
         }
 #endif
         
         /* and write everything into a file */
         if(g_mpi_time_rank == 0 && g_proc_coords[0] == 0) {
-          if(runs > 1) {
-            sprintf(filename_tmp,"%s.r%02d", filename, r);
-            ofs = fopen(filename_tmp, "w");
-          } else {
-            ofs = fopen(filename, "w");
-          }
-          fprintf( ofs, "1  1  0  %e  %e\n", Cpp[t0+g_nproc_t*T*r], 0.);
-          for(t = 1; t < g_nproc_t*T/2; t++) {
-            tt = (t0+t)%(g_nproc_t*T);
-            fprintf( ofs, "1  1  %d  %e  ", t, Cpp[tt+g_nproc_t*T*r]);
-            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
-            fprintf( ofs, "%e\n", Cpp[tt+g_nproc_t*T*r]);
-          }
-          tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
-          fprintf( ofs, "1  1  %d  %e  %e\n", t, Cpp[tt+g_nproc_t*T*r], 0.);
-          
-          fprintf( ofs, "2  1  0  %e  %e\n", Cpa[t0+g_nproc_t*T*r], 0.);
+          ofs = fopen(filename, "w");
+          fprintf( ofs, "1  1  0  %e  %e\n", Cpp[t0+g_nproc_t*T], 0.);
           for(t = 1; t < g_nproc_t*T/2; t++) {
             tt = (t0+t)%(g_nproc_t*T);
-            fprintf( ofs, "2  1  %d  %e  ", t, Cpa[tt+g_nproc_t*T*r]);
+            fprintf( ofs, "1  1  %d  %e  ", t, Cpp[tt+g_nproc_t*T]);
             tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
-            fprintf( ofs, "%e\n", Cpa[tt+g_nproc_t*T*r]);
+            fprintf( ofs, "%e\n", Cpp[tt+g_nproc_t*T]);
           }
           tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
-          fprintf( ofs, "2  1  %d  %e  %e\n", t, Cpa[tt+g_nproc_t*T*r], 0.);
+          fprintf( ofs, "1  1  %d  %e  %e\n", t, Cpp[tt+g_nproc_t*T], 0.);
           
-          fprintf( ofs, "6  1  0  %e  %e\n", Cp4[t0+g_nproc_t*T*r], 0.);
-          for(t = 1; t < g_nproc_t*T/2; t++) {
-            tt = (t0+t)%(g_nproc_t*T);
-            fprintf( ofs, "6  1  %d  %e  ", t, Cp4[tt+g_nproc_t*T*r]);
-            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
-            fprintf( ofs, "%e\n", Cp4[tt+g_nproc_t*T*r]);
-          }
-          tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
-          fprintf( ofs, "6  1  %d  %e  %e\n", t, Cp4[tt+g_nproc_t*T*r], 0.);
-          fclose(ofs);
-        }
-      }
-      if(g_mpi_time_rank == 0 && g_proc_coords[0] == 0 && runs == 2) {
-        sprintf(filename_tmp,"%s.diff", filename);
-        ofs = fopen(filename_tmp, "w");
-        fprintf( ofs, "1  1  0  %e  %e\n", Cpp[t0] - Cpp[t0+g_nproc_t*T], 0.);
+          fprintf( ofs, "2  1  0  %e  %e\n", Cpa[t0+g_nproc_t*T], 0.);
           for(t = 1; t < g_nproc_t*T/2; t++) {
             tt = (t0+t)%(g_nproc_t*T);
-            fprintf( ofs, "1  1  %d  %e  ", t, Cpp[tt] - Cpp[tt+g_nproc_t*T]);
+            fprintf( ofs, "2  1  %d  %e  ", t, Cpa[tt+g_nproc_t*T]);
             tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
-            fprintf( ofs, "%e\n", Cpp[tt] - Cpp[tt+g_nproc_t*T]);
+            fprintf( ofs, "%e\n", Cpa[tt+g_nproc_t*T]);
           }
           tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
-          fprintf( ofs, "1  1  %d  %e  %e\n", t, Cpp[tt] - Cpp[tt+g_nproc_t*T], 0.);
+          fprintf( ofs, "2  1  %d  %e  %e\n", t, Cpa[tt+g_nproc_t*T], 0.);
           
-          fprintf( ofs, "2  1  0  %e  %e\n", Cpa[t0] - Cpa[t0+g_nproc_t*T], 0.);
+          fprintf( ofs, "6  1  0  %e  %e\n", Cp4[t0+g_nproc_t*T], 0.);
           for(t = 1; t < g_nproc_t*T/2; t++) {
             tt = (t0+t)%(g_nproc_t*T);
-            fprintf( ofs, "2  1  %d  %e  ", t, Cpa[tt] - Cpa[tt+g_nproc_t*T]);
+            fprintf( ofs, "6  1  %d  %e  ", t, Cp4[tt+g_nproc_t*T]);
             tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
-            fprintf( ofs, "%e\n", Cpa[tt] - Cpa[tt+g_nproc_t*T]);
+            fprintf( ofs, "%e\n", Cp4[tt+g_nproc_t*T]);
           }
           tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
-          fprintf( ofs, "2  1  %d  %e  %e\n", t, Cpa[tt] - Cpa[tt+g_nproc_t*T], 0.);
-          
-          fprintf( ofs, "6  1  0  %e  %e\n", Cp4[t0] - Cp4[t0+g_nproc_t*T], 0.);
-          for(t = 1; t < g_nproc_t*T/2; t++) {
-            tt = (t0+t)%(g_nproc_t*T);
-            fprintf( ofs, "6  1  %d  %e  ", t, Cp4[tt] - Cp4[tt+g_nproc_t*T]);
-            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
-            fprintf( ofs, "%e\n", Cp4[tt] - Cp4[tt+g_nproc_t*T]);
-          }
-          tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
-          fprintf( ofs, "6  1  %d  %e  %e\n", t, Cp4[tt] - Cp4[tt+g_nproc_t*T], 0.);
+          fprintf( ofs, "6  1  %d  %e  %e\n", t, Cp4[tt+g_nproc_t*T], 0.);
           fclose(ofs);
         }
-
 #ifdef TM_USE_MPI
       if(g_mpi_time_rank == 0) {
         free(Cpp); free(Cpa); free(Cp4);
@@ -306,8 +237,8 @@ void correlators_measurement(const int traj, const int id, const int ieo) {
 #else
       free(Cpp); free(Cpa); free(Cp4);
 #endif
-    }
-  } 
+    } // for(max_time_slices)
+  } // for(max_samples)
   etime = gettime();
   if(g_proc_id == 0 && g_debug_level > 0) {
     printf("ONLINE: measurement done int t/s = %1.4e\n", etime - atime);

From 1b65403625ffbebd3a8c24380082d5bc52d89d97 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz_kostrzewa@fastmail.com>
Date: Thu, 2 May 2019 10:53:53 +0200
Subject: [PATCH 84/85] some illegal memory accesses in meas/correlators

---
 meas/correlators.c | 109 +++++++++++++++++++++++----------------------
 1 file changed, 56 insertions(+), 53 deletions(-)

diff --git a/meas/correlators.c b/meas/correlators.c
index 3f9cae151..1185594ba 100644
--- a/meas/correlators.c
+++ b/meas/correlators.c
@@ -38,9 +38,7 @@
 #include "measurements.h"
 #include "correlators.h"
 #include "gettime.h"
-#include "DDalphaAMG_interface.h"
-#include "read_input.h"
-#include "init/init_gauge_tmp.h"
+
 
 /******************************************************
  *
@@ -55,6 +53,8 @@
  *
  ******************************************************/
 
+#define TM_OMEAS_FILENAME_LENGTH 100
+
 void correlators_measurement(const int traj, const int id, const int ieo) {
   int i, j, t, tt, t0;
   double *Cpp = NULL, *Cpa = NULL, *Cp4 = NULL;
@@ -68,7 +68,7 @@ void correlators_measurement(const int traj, const int id, const int ieo) {
   double *sCpp = NULL, *sCpa = NULL, *sCp4 = NULL;
 #endif
   FILE *ofs;
-  char filename[100];
+  char filename[TM_OMEAS_FILENAME_LENGTH];
   spinor phi;
 
   init_operators();
@@ -106,11 +106,14 @@ void correlators_measurement(const int traj, const int id, const int ieo) {
     for(int ts = 0; ts < max_time_slices; ts++){
 
       if( max_samples == 1 && max_time_slices == 1 ){
-        sprintf(filename,"%s%06d", "onlinemeas.", traj);
+        snprintf(filename, TM_OMEAS_FILENAME_LENGTH, 
+                 "%s%06d", "onlinemeas." ,traj);
       } else if ( max_samples == 1 && max_time_slices > 1){
-        sprintf(filename,"%s%06d.t%03d", "onlinemeas.", traj, ts );
+        snprintf(filename, TM_OMEAS_FILENAME_LENGTH, 
+                 "%s.t%03d.%06d", "onlinemeas", ts, traj );
       } else {
-        sprintf(filename,"%s%06d.s%03d", "onlinemeas.", traj, sample);
+        snprintf(filename, TM_OMEAS_FILENAME_LENGTH,
+                 "%s.s%03d.%06d", "onlinemeas", sample, traj);
       }
       /* generate random timeslice */
       t0 = ts;
@@ -168,7 +171,7 @@ void correlators_measurement(const int traj, const int id, const int ieo) {
           _gamma5(phi, phi);
           resp4 += _spinor_prod_im(g_spinor_field[DUM_MATRIX][i], phi);
         }
-        
+
 #if defined TM_USE_MPI
         MPI_Reduce(&res, &mpi_res, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
         res = mpi_res;
@@ -180,55 +183,55 @@ void correlators_measurement(const int traj, const int id, const int ieo) {
         sCpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
         sCp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
 #else
-        Cpp[t+g_nproc_t*T] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
-        Cpa[t+g_nproc_t*T] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
-        Cp4[t+g_nproc_t*T] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+        Cpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+        Cpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
+        Cp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)/2./optr->kappa/optr->kappa;
 #endif
-        }
-        
+      }
+
 #ifdef TM_USE_MPI
-        /* some gymnastics needed in case of parallelisation */
-        if(g_mpi_time_rank == 0) {
-          MPI_Gather(sCpp, T, MPI_DOUBLE, Cpp+g_nproc_t*T, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
-          MPI_Gather(sCpa, T, MPI_DOUBLE, Cpa+g_nproc_t*T, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
-          MPI_Gather(sCp4, T, MPI_DOUBLE, Cp4+g_nproc_t*T, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
-        }
+      /* some gymnastics needed in case of parallelisation */
+      if(g_mpi_time_rank == 0) {
+        MPI_Gather(sCpp, T, MPI_DOUBLE, Cpp, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
+        MPI_Gather(sCpa, T, MPI_DOUBLE, Cpa, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
+        MPI_Gather(sCp4, T, MPI_DOUBLE, Cp4, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
+      }
 #endif
-        
-        /* and write everything into a file */
-        if(g_mpi_time_rank == 0 && g_proc_coords[0] == 0) {
-          ofs = fopen(filename, "w");
-          fprintf( ofs, "1  1  0  %e  %e\n", Cpp[t0+g_nproc_t*T], 0.);
-          for(t = 1; t < g_nproc_t*T/2; t++) {
-            tt = (t0+t)%(g_nproc_t*T);
-            fprintf( ofs, "1  1  %d  %e  ", t, Cpp[tt+g_nproc_t*T]);
-            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
-            fprintf( ofs, "%e\n", Cpp[tt+g_nproc_t*T]);
-          }
-          tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
-          fprintf( ofs, "1  1  %d  %e  %e\n", t, Cpp[tt+g_nproc_t*T], 0.);
-          
-          fprintf( ofs, "2  1  0  %e  %e\n", Cpa[t0+g_nproc_t*T], 0.);
-          for(t = 1; t < g_nproc_t*T/2; t++) {
-            tt = (t0+t)%(g_nproc_t*T);
-            fprintf( ofs, "2  1  %d  %e  ", t, Cpa[tt+g_nproc_t*T]);
-            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
-            fprintf( ofs, "%e\n", Cpa[tt+g_nproc_t*T]);
-          }
-          tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
-          fprintf( ofs, "2  1  %d  %e  %e\n", t, Cpa[tt+g_nproc_t*T], 0.);
-          
-          fprintf( ofs, "6  1  0  %e  %e\n", Cp4[t0+g_nproc_t*T], 0.);
-          for(t = 1; t < g_nproc_t*T/2; t++) {
-            tt = (t0+t)%(g_nproc_t*T);
-            fprintf( ofs, "6  1  %d  %e  ", t, Cp4[tt+g_nproc_t*T]);
-            tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
-            fprintf( ofs, "%e\n", Cp4[tt+g_nproc_t*T]);
-          }
-          tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
-          fprintf( ofs, "6  1  %d  %e  %e\n", t, Cp4[tt+g_nproc_t*T], 0.);
-          fclose(ofs);
+
+      /* and write everything into a file */
+      if(g_mpi_time_rank == 0 && g_proc_coords[0] == 0) {
+        ofs = fopen(filename, "w");
+        fprintf( ofs, "1  1  0  %e  %e\n", Cpp[t0], 0.);
+        for(t = 1; t < g_nproc_t*T/2; t++) {
+          tt = (t0+t)%(g_nproc_t*T);
+          fprintf( ofs, "1  1  %d  %e  ", t, Cpp[tt]);
+          tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
+          fprintf( ofs, "%e\n", Cpp[tt]);
+        }
+        tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
+        fprintf( ofs, "1  1  %d  %e  %e\n", t, Cpp[tt], 0.);
+
+        fprintf( ofs, "2  1  0  %e  %e\n", Cpa[t0], 0.);
+        for(t = 1; t < g_nproc_t*T/2; t++) {
+          tt = (t0+t)%(g_nproc_t*T);
+          fprintf( ofs, "2  1  %d  %e  ", t, Cpa[tt]);
+          tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
+          fprintf( ofs, "%e\n", Cpa[tt]);
         }
+        tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
+        fprintf( ofs, "2  1  %d  %e  %e\n", t, Cpa[tt], 0.);
+
+        fprintf( ofs, "6  1  0  %e  %e\n", Cp4[t0], 0.);
+        for(t = 1; t < g_nproc_t*T/2; t++) {
+          tt = (t0+t)%(g_nproc_t*T);
+          fprintf( ofs, "6  1  %d  %e  ", t, Cp4[tt]);
+          tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
+          fprintf( ofs, "%e\n", Cp4[tt]);
+        }
+        tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
+        fprintf( ofs, "6  1  %d  %e  %e\n", t, Cp4[tt], 0.);
+        fclose(ofs);
+      }
 #ifdef TM_USE_MPI
       if(g_mpi_time_rank == 0) {
         free(Cpp); free(Cpa); free(Cp4);

From 42356fcedf3fea156294fab6eec06c30501034c2 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz_kostrzewa@fastmail.com>
Date: Sat, 4 May 2019 15:11:54 +0200
Subject: [PATCH 85/85] remove restoresu3_flag

---
 default_input_values.h               |  1 -
 invert.c                             | 31 ----------------------------
 monomial/cloverdetratio_rwmonomial.c | 29 --------------------------
 read_input.l                         | 12 -----------
 4 files changed, 73 deletions(-)

diff --git a/default_input_values.h b/default_input_values.h
index 851a222b0..f9903da8f 100644
--- a/default_input_values.h
+++ b/default_input_values.h
@@ -155,7 +155,6 @@
 #define _default_timescale 1
 #define _default_reweighting_flag 0
 #define _default_reweighting_samples 10
-#define _default_restoresu3_flag 0
 #define _default_source_type_flag 0
 #define _default_no_samples 1
 #define _default_online_measurement_flag 1
diff --git a/invert.c b/invert.c
index 63d9c6d6b..9980e290f 100644
--- a/invert.c
+++ b/invert.c
@@ -59,7 +59,6 @@
 #include "boundary.h"
 #include "solver/solver.h"
 #include "init/init.h"
-#include "init/init_gauge_tmp.h"
 #include "smearing/stout.h"
 #include "invert_eo.h"
 #include "monomial/monomial.h"
@@ -181,10 +180,6 @@ int main(int argc, char *argv[])
   j = init_gauge_field(VOLUMEPLUSRAND, 0);
   j += init_gauge_field_32(VOLUMEPLUSRAND, 0);  
 #endif
-  if(restoresu3_flag) {
-    j += init_gauge_tmp(VOLUMEPLUSRAND);
-  }
- 
   if (j != 0) {
     fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n");
     exit(-1);
@@ -300,19 +295,6 @@ int main(int argc, char *argv[])
       fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename);
       exit(-2);
     }
-    if (restoresu3_flag) {
-      if (g_cart_id == 0) 
-        printf("# Restoring SU(3) matrices.\n");
-      for(int ix=0;ix<VOLUME;ix++) {
-        for(int mu=0;mu<4;mu++){
-          su3 *v, *w;
-          v=&(g_gauge_field[ix][mu]);
-          w=&(gauge_tmp[ix][mu]);
-          _su3_assign(*w,*v);
-          restoresu3_in_place(v);
-        }
-      }
-    }
 
     if (g_cart_id == 0) {
       printf("# Finished reading gauge field.\n");
@@ -320,9 +302,6 @@ int main(int argc, char *argv[])
     }
 #ifdef TM_USE_MPI
     xchange_gauge(g_gauge_field);
-    if (restoresu3_flag) {
-      xchange_gauge(gauge_tmp);
-    }
 #endif
     /*Convert to a 32 bit gauge field, after xchange*/
     convert_32_gauge_field(g_gauge_field_32, g_gauge_field, VOLUMEPLUSRAND);
@@ -334,15 +313,6 @@ int main(int argc, char *argv[])
       fflush(stdout);
     }
 
-    if (restoresu3_flag) {
-      double plaquette_old = measure_plaquette( (const su3**) gauge_tmp);
-      if (g_cart_id == 0) {
-        printf("# The computed plaquette value before restoring SU(3) is %e\n which differ from the new one of %e.\n",
-               plaquette_old / (6.*VOLUME*g_nproc), (plaquette_energy-plaquette_old) / (6.*VOLUME*g_nproc));
-        fflush(stdout);
-      }
-    }
-
     if (use_stout_flag == 1){
       params_smear.rho = stout_rho;
       params_smear.iterations = stout_no_iter;
@@ -474,7 +444,6 @@ int main(int argc, char *argv[])
 #endif
   free_blocks();
   free_dfl_subspace();
-  free_gauge_tmp();
   free_gauge_field();
   free_gauge_field_32();
   free_geometry_indices();
diff --git a/monomial/cloverdetratio_rwmonomial.c b/monomial/cloverdetratio_rwmonomial.c
index 11e1cc7b8..fc95575ef 100644
--- a/monomial/cloverdetratio_rwmonomial.c
+++ b/monomial/cloverdetratio_rwmonomial.c
@@ -56,20 +56,6 @@ double cloverdetratio_rwacc(const int id, hamiltonian_field_t * const hf) {
   double atime, etime;
   atime = gettime();
 
-  if (restoresu3_flag) {
-    for(int ix=0;ix<VOLUME;ix++) {
-      for(int mu=0;mu<4;mu++){
-        su3 *v, *w;
-        v=&(hf->gaugefield[ix][mu]);
-        w=&(gauge_tmp[ix][mu]);
-        _su3_assign(*v,*w);
-      }
-    }
-#ifdef TM_USE_MPI
-    xchange_gauge(hf->gaugefield);
-#endif
-  }
-
   g_mu = mnl->mu2;
   boundary(mnl->kappa2);
 
@@ -79,21 +65,6 @@ double cloverdetratio_rwacc(const int id, hamiltonian_field_t * const hf) {
   g_mu3 = 0.;
   mnl->Qp(mnl->w_fields[1], mnl->pf);
 
-
-  if (restoresu3_flag) {
-    for(int ix=0;ix<VOLUME;ix++) {
-      for(int mu=0;mu<4;mu++){
-        su3 *v;
-        v=&(hf->gaugefield[ix][mu]);
-        restoresu3_in_place(v);
-      }
-    }
-#ifdef TM_USE_MPI
-    xchange_gauge(hf->gaugefield);
-#endif
-    mg_update_gauge = 1;
-  }
-
   g_mu3 = 0.;
   g_mu = mnl->mu;
   boundary(mnl->kappa);
diff --git a/read_input.l b/read_input.l
index 20581321b..36e3b5ff3 100644
--- a/read_input.l
+++ b/read_input.l
@@ -177,7 +177,6 @@ static inline double fltlist_next_token(int * const list_end){
   int bc_flag;
   int online_measurement_flag;
   int online_measurement_freq;
-  int restoresu3_flag;
   int reweighting_flag;
   int reweighting_samples;
   int no_samples;
@@ -345,7 +344,6 @@ static inline double fltlist_next_token(int * const list_end){
 %x ORIENTEDPLAQUETTESMEAS
 %x GRADIENTFLOWMEAS
 
-%x RESTORESU3
 %x REWEIGH
 %x REWSAMPLES
 
@@ -492,7 +490,6 @@ static inline double fltlist_next_token(int * const list_end){
 ^PropagatorType{EQL}               BEGIN(WRPROPFLAG);
 ^RanluxdLevel{EQL}                 BEGIN(RLXDLEVEL);
 ^GCRPreconditioner{EQL}            BEGIN(PRECON);
-^RestoreSU3{EQL}                   BEGIN(RESTORESU3);
 ^ComputeReweightingFactor{EQL}     BEGIN(REWEIGH);
 ^NoReweightingSamples{EQL}         BEGIN(REWSAMPLES);
 ^SourceTimeSlice{EQL}              BEGIN(SOURCETS);
@@ -3006,14 +3003,6 @@ static inline double fltlist_next_token(int * const list_end){
   reweighting_flag = 0;
   if(myverbose!=0) printf("Do not compute reweighting factor\n");
 }
-<RESTORESU3>yes {
-  restoresu3_flag = 1;
-  if(myverbose!=0) printf("Restore SU(3) gauge-field\n");
-}
-<RESTORESU3>no {
-  restoresu3_flag = 0;
-  if(myverbose!=0) printf("Do not restore SU(3) gauge-field\n");
-}
 <REWSAMPLES>{DIGIT}+ {
   reweighting_samples = atoi(yytext);
   if(myverbose!=0) printf("Number of reweighting samples set to %d\n", reweighting_samples);
@@ -3191,7 +3180,6 @@ int read_input(char * conf_file){
   use_preconditioning = _default_use_preconditioning;
   stout_rho = _default_stout_rho;
   stout_no_iter = _default_stout_no_iter;
-  restoresu3_flag = _default_restoresu3_flag;
 
   /* check for reread ! */ 
   phmc_compute_evs = _default_phmc_compute_evs;