From b1d4ca3aba5243d8aa907f9f7eb28729868c5cb7 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 20 Mar 2012 11:18:17 +0100
Subject: [PATCH 001/110] introduced working spinor fields in monomial.h|c and
 replaced all g_spinor_fields in *monomial.c

reduced the number of g_spinor_fields in hmc_tm.c

g_spinor_field[0]|[1]|[2]|[3] are still used in some hmc_tm relevant
source files. Should be removed, too.
---
 cloverdet_monomial.c      |  42 +++++++--------
 cloverdetratio_monomial.c | 100 +++++++++++++++++------------------
 det_monomial.c            |  82 ++++++++++++++---------------
 detratio_monomial.c       | 106 +++++++++++++++++++-------------------
 hmc_tm.c                  |   4 +-
 monomial.c                |  27 +++++++---
 monomial.h                |   7 +--
 nddetratio_monomial.c     |  14 ++---
 ndpoly_monomial.c         |  32 ++++++------
 poly_monomial.c           |  62 +++++++++++-----------
 10 files changed, 242 insertions(+), 234 deletions(-)

diff --git a/cloverdet_monomial.c b/cloverdet_monomial.c
index 53b1366c7..ff1bd9100 100644
--- a/cloverdet_monomial.c
+++ b/cloverdet_monomial.c
@@ -77,39 +77,39 @@ void cloverdet_derivative(const int id, hamiltonian_field_t * const hf) {
   }
   
   // Invert Q_{+} Q_{-}
-  // X_o -> DUM_DERI+1
-  chrono_guess(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->csg_field, mnl->csg_index_array,
+  // X_o -> w_fields[1]
+  chrono_guess(mnl->w_fields[1], mnl->pf, mnl->csg_field, mnl->csg_index_array,
 	       mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
-  mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->maxiter, mnl->forceprec, 
+  mnl->iter1 += cg_her(mnl->w_fields[1], mnl->pf, mnl->maxiter, mnl->forceprec, 
 		       g_relative_precision_flag, VOLUME/2, mnl->Qsq);
-  chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array,
+  chrono_add_solution(mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array,
 		      mnl->csg_N, &mnl->csg_n, VOLUME/2);
   
-  // Y_o -> DUM_DERI
-  mnl->Qm(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+  // Y_o -> w_fields[0]
+  mnl->Qm(mnl->w_fields[0], mnl->w_fields[1]);
   
   // apply Hopping Matrix M_{eo}
   // to get the even sites of X_e
-  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EE, -mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -mnl->mu);
   // \delta Q sandwitched by Y_o^\dagger and X_e
-  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf); 
+  deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf); 
   
   // to get the even sites of Y_e
-  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EE, mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, mnl->mu);
   // \delta Q sandwitched by Y_e^\dagger and X_o
   // uses the gauge field in hf and changes the derivative fields in hf
-  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf);
+  deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf);
   
   // here comes the clover term...
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-  gamma5(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], VOLUME/2);
-  sw_spinor(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3]);
+  gamma5(mnl->w_fields[2], mnl->w_fields[2], VOLUME/2);
+  sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3]);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
-  sw_spinor(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+  gamma5(mnl->w_fields[0], mnl->w_fields[0], VOLUME/2);
+  sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1]);
   
   // compute the contribution for the det-part
   // we again compute only the insertion matrices for S_det
@@ -147,10 +147,10 @@ void cloverdet_heatbath(const int id, hamiltonian_field_t * const hf) {
   sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
   sw_invert(EE, mnl->mu);
 
-  random_spinor_field(g_spinor_field[2], VOLUME/2, mnl->rngrepro);
-  mnl->energy0 = square_norm(g_spinor_field[2], VOLUME/2, 1);
+  random_spinor_field(mnl->w_fields[0], VOLUME/2, mnl->rngrepro);
+  mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
   
-  mnl->Qp(mnl->pf, g_spinor_field[2]);
+  mnl->Qp(mnl->pf, mnl->w_fields[0]);
   chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
 		      mnl->csg_N, &mnl->csg_n, VOLUME/2);
 
@@ -176,16 +176,16 @@ double cloverdet_acc(const int id, hamiltonian_field_t * const hf) {
   sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
   sw_invert(EE, mnl->mu);
 
-  chrono_guess(g_spinor_field[2], mnl->pf, mnl->csg_field, mnl->csg_index_array,
+  chrono_guess(mnl->w_fields[0], mnl->pf, mnl->csg_field, mnl->csg_index_array,
 	       mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
   g_sloppy_precision_flag = 0;
-  mnl->iter0 = cg_her(g_spinor_field[2], mnl->pf, mnl->maxiter, mnl->accprec,  
+  mnl->iter0 = cg_her(mnl->w_fields[0], mnl->pf, mnl->maxiter, mnl->accprec,  
 		      g_relative_precision_flag, VOLUME/2, mnl->Qsq); 
-  mnl->Qm(g_spinor_field[2], g_spinor_field[2]);
+  mnl->Qm(mnl->w_fields[0], mnl->w_fields[0]);
   
   g_sloppy_precision_flag = save_sloppy;
   /* Compute the energy contr. from first field */
-  mnl->energy1 = square_norm(g_spinor_field[2], VOLUME/2, 1);
+  mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
   g_mu = g_mu1;
   g_mu3 = 0.;
diff --git a/cloverdetratio_monomial.c b/cloverdetratio_monomial.c
index 8106d0697..a0f04d6d7 100644
--- a/cloverdetratio_monomial.c
+++ b/cloverdetratio_monomial.c
@@ -82,69 +82,69 @@ void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf
     fprintf(stderr, "Bicgstab currently not implemented, using CG instead! (detratio_monomial.c)\n");
   }
   
-  mnl->Qp(g_spinor_field[DUM_DERI+2], mnl->pf);
+  mnl->Qp(mnl->w_fields[2], mnl->pf);
   g_mu3 = mnl->rho; // rho1
 
   /* Invert Q_{+} Q_{-} */
-  /* X_W -> DUM_DERI+1 */
-  chrono_guess(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], mnl->csg_field, 
+  /* X_W -> w_fields[1] */
+  chrono_guess(mnl->w_fields[1], mnl->w_fields[2], mnl->csg_field, 
 	       mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
-  mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], mnl->maxiter, 
+  mnl->iter1 += cg_her(mnl->w_fields[1], mnl->w_fields[2], mnl->maxiter, 
 		       mnl->forceprec, g_relative_precision_flag, VOLUME/2, mnl->Qsq);
-  chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array,
+  chrono_add_solution(mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array,
 		      mnl->csg_N, &mnl->csg_n, VOLUME/2);
-  /* Y_W -> DUM_DERI  */
-  mnl->Qm(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+  /* Y_W -> w_fields[0]  */
+  mnl->Qm(mnl->w_fields[0], mnl->w_fields[1]);
   
   /* apply Hopping Matrix M_{eo} */
   /* to get the even sites of X */
-  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EE, -mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -mnl->mu);
   /* \delta Q sandwitched by Y_o^\dagger and X_e */
-  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf); 
+  deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf); 
   
   /* to get the even sites of Y */
-  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EE, mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, mnl->mu);
   /* \delta Q sandwitched by Y_e^\dagger and X_o */
-  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf); 
+  deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf); 
 
   // here comes the clover term...
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e  
-  gamma5(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], VOLUME/2);
-  sw_spinor(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3]);
+  gamma5(mnl->w_fields[2], mnl->w_fields[2], VOLUME/2);
+  sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3]);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
-  sw_spinor(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+  gamma5(mnl->w_fields[0], mnl->w_fields[0], VOLUME/2);
+  sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1]);
 
   g_mu3 = mnl->rho2; // rho2
   
   /* Second term coming from the second field */
   /* The sign is opposite!! */
-  mul_r(g_spinor_field[DUM_DERI], -1., mnl->pf, VOLUME/2);
+  mul_r(mnl->w_fields[0], -1., mnl->pf, VOLUME/2);
   
   /* apply Hopping Matrix M_{eo} */
   /* to get the even sites of X */
-  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EE, -mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -mnl->mu);
   /* \delta Q sandwitched by Y_o^\dagger and X_e */
-  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf); 
+  deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf); 
   
   /* to get the even sites of Y */
-  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EE, mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, mnl->mu);
   /* \delta Q sandwitched by Y_e^\dagger and X_o */
-  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf);
+  deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf);
 
   // here comes the clover term...
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-  gamma5(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], VOLUME/2);
-  sw_spinor(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3]);
+  gamma5(mnl->w_fields[2], mnl->w_fields[2], VOLUME/2);
+  sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3]);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
-  sw_spinor(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+  gamma5(mnl->w_fields[0], mnl->w_fields[0], VOLUME/2);
+  sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1]);
 
   sw_all(hf, mnl->kappa, mnl->c_sw);
   
@@ -186,43 +186,43 @@ void cloverdetratio_derivative(const int no, hamiltonian_field_t * const hf) {
   
   // apply W_{+} to phi
   g_mu3 = mnl->rho2; //rho2
-  mnl->Qp(g_spinor_field[DUM_DERI+2], mnl->pf);
+  mnl->Qp(mnl->w_fields[2], mnl->pf);
   g_mu3 = mnl->rho; // rho1
 
   // Invert Q_{+} Q_{-}
-  // X_W -> DUM_DERI+1 
-  chrono_guess(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], mnl->csg_field, 
+  // X_W -> w_fields[1] 
+  chrono_guess(mnl->w_fields[1], mnl->w_fields[2], mnl->csg_field, 
 	       mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
-  mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], mnl->maxiter, 
+  mnl->iter1 += cg_her(mnl->w_fields[1], mnl->w_fields[2], mnl->maxiter, 
 		       mnl->forceprec, g_relative_precision_flag, VOLUME/2, mnl->Qsq);
-  chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array,
+  chrono_add_solution(mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array,
 		      mnl->csg_N, &mnl->csg_n, VOLUME/2);
-  // Apply Q_{-} to get Y_W -> DUM_DERI 
-  mnl->Qm(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
-  // Compute phi - Y_W -> DUM_DERI
-  diff(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], mnl->pf, VOLUME/2);
+  // Apply Q_{-} to get Y_W -> w_fields[0] 
+  mnl->Qm(mnl->w_fields[0], mnl->w_fields[1]);
+  // Compute phi - Y_W -> w_fields[0]
+  diff(mnl->w_fields[0], mnl->w_fields[0], mnl->pf, VOLUME/2);
 
   /* apply Hopping Matrix M_{eo} */
   /* to get the even sites of X */
-  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EE, -mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -mnl->mu);
   /* \delta Q sandwitched by Y_o^\dagger and X_e */
-  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf); 
+  deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf); 
   
   /* to get the even sites of Y */
-  H_eo_sw_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EE, mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, mnl->mu);
   /* \delta Q sandwitched by Y_e^\dagger and X_o */
-  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf); 
+  deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf); 
 
   // here comes the clover term...
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e  
-  gamma5(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+2], VOLUME/2);
-  sw_spinor(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3]);
+  gamma5(mnl->w_fields[2], mnl->w_fields[2], VOLUME/2);
+  sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3]);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
-  sw_spinor(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+  gamma5(mnl->w_fields[0], mnl->w_fields[0], VOLUME/2);
+  sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1]);
 
   sw_all(hf, mnl->kappa, mnl->c_sw);
   
@@ -249,15 +249,15 @@ void cloverdetratio_heatbath(const int id, hamiltonian_field_t * const hf) {
   sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
   sw_invert(EE, mnl->mu);
 
-  random_spinor_field(g_spinor_field[4], VOLUME/2, mnl->rngrepro);
-  mnl->energy0  = square_norm(g_spinor_field[4], VOLUME/2, 1);
+  random_spinor_field(mnl->w_fields[0], VOLUME/2, mnl->rngrepro);
+  mnl->energy0  = square_norm(mnl->w_fields[0], VOLUME/2, 1);
   
   g_mu3 = mnl->rho;
-  mnl->Qp(g_spinor_field[3], g_spinor_field[4]);
+  mnl->Qp(mnl->w_fields[1], mnl->w_fields[0]);
   g_mu3 = mnl->rho2;
   zero_spinor_field(mnl->pf,VOLUME/2);
 
-  mnl->iter0 = cg_her(mnl->pf, g_spinor_field[3], mnl->maxiter, mnl->accprec,  
+  mnl->iter0 = cg_her(mnl->pf, mnl->w_fields[1], mnl->maxiter, mnl->accprec,  
 		      g_relative_precision_flag, VOLUME/2, mnl->Qsq); 
 
   chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
@@ -281,20 +281,20 @@ double cloverdetratio_acc(const int id, hamiltonian_field_t * const hf) {
   boundary(mnl->kappa);
   
   g_mu3 = mnl->rho2;
-  mnl->Qp(g_spinor_field[DUM_DERI+5], mnl->pf);
+  mnl->Qp(mnl->w_fields[1], mnl->pf);
   g_mu3 = mnl->rho;
 
-  chrono_guess(g_spinor_field[3], g_spinor_field[DUM_DERI+5], mnl->csg_field, mnl->csg_index_array, 
+  chrono_guess(mnl->w_fields[0], mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array, 
 	       mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_plus_psi);
   g_sloppy_precision_flag = 0;    
-  mnl->iter0 += cg_her(g_spinor_field[3], g_spinor_field[DUM_DERI+5], mnl->maxiter, mnl->accprec,  
+  mnl->iter0 += cg_her(mnl->w_fields[0], mnl->w_fields[1], mnl->maxiter, mnl->accprec,  
 		      g_relative_precision_flag, VOLUME/2, mnl->Qsq);
-  mnl->Qm(g_spinor_field[3], g_spinor_field[3]);
+  mnl->Qm(mnl->w_fields[0], mnl->w_fields[0]);
 
   g_sloppy_precision_flag = save_sloppy;
 
   /* Compute the energy contr. from second field */
-  mnl->energy1 = square_norm(g_spinor_field[3], VOLUME/2, 1);
+  mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
   g_mu = g_mu1;
   g_mu3 = 0.;
diff --git a/det_monomial.c b/det_monomial.c
index 53a7dc0ad..64416e27c 100644
--- a/det_monomial.c
+++ b/det_monomial.c
@@ -76,27 +76,27 @@ void det_derivative(const int id, hamiltonian_field_t * const hf) {
     }
     
     /* Invert Q_{+} Q_{-} */
-    /* X_o -> DUM_DERI+1 */
-    chrono_guess(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->csg_field, mnl->csg_index_array,
+    /* X_o -> w_fields[1] */
+    chrono_guess(mnl->w_fields[1], mnl->pf, mnl->csg_field, mnl->csg_index_array,
 		 mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_pm_psi);
-    mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->maxiter, mnl->forceprec, 
+    mnl->iter1 += cg_her(mnl->w_fields[1], mnl->pf, mnl->maxiter, mnl->forceprec, 
 			 g_relative_precision_flag, VOLUME/2, &Qtm_pm_psi);
-    chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array,
+    chrono_add_solution(mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array,
 			mnl->csg_N, &mnl->csg_n, VOLUME/2);
     
-    /* Y_o -> DUM_DERI  */
-    Qtm_minus_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+    /* Y_o -> w_fields[0]  */
+    Qtm_minus_psi(mnl->w_fields[0], mnl->w_fields[1]);
     
     /* apply Hopping Matrix M_{eo} */
     /* to get the even sites of X_e */
-    H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EO, -1.);
+    H_eo_tm_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EO, -1.);
     /* \delta Q sandwitched by Y_o^\dagger and X_e */
-    deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf); 
+    deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf); 
     
     /* to get the even sites of Y_e */
-    H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EO, +1);
+    H_eo_tm_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EO, +1);
     /* \delta Q sandwitched by Y_e^\dagger and X_o */
-    deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf);
+    deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf);
 
   } 
   else {
@@ -110,45 +110,45 @@ void det_derivative(const int id, hamiltonian_field_t * const hf) {
     boundary(mnl->kappa);
     if(mnl->solver == CG) {
       /* Invert Q_{+} Q_{-} */
-      /* X -> DUM_DERI+1 */
-      chrono_guess(g_spinor_field[DUM_DERI+1], mnl->pf, mnl->csg_field, mnl->csg_index_array,
+      /* X -> w_fields[1] */
+      chrono_guess(mnl->w_fields[1], mnl->pf, mnl->csg_field, mnl->csg_index_array,
 		   mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_pm_psi);
-      mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], mnl->pf, 
+      mnl->iter1 += cg_her(mnl->w_fields[1], mnl->pf, 
 			mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
 			VOLUME, &Q_pm_psi);
-      chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array,
+      chrono_add_solution(mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array,
 			  mnl->csg_N, &mnl->csg_n, VOLUME/2);
 
-      /* Y -> DUM_DERI  */
-      Q_minus_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+      /* Y -> w_fields[0]  */
+      Q_minus_psi(mnl->w_fields[0], mnl->w_fields[1]);
       
     }
     else {
       /* Invert first Q_+ */
-      /* Y -> DUM_DERI  */
-      chrono_guess(g_spinor_field[DUM_DERI], mnl->pf, mnl->csg_field, mnl->csg_index_array,
+      /* Y -> w_fields[0]  */
+      chrono_guess(mnl->w_fields[0], mnl->pf, mnl->csg_field, mnl->csg_index_array,
 		   mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_plus_psi);
-      mnl->iter1 += bicgstab_complex(g_spinor_field[DUM_DERI], mnl->pf, 
+      mnl->iter1 += bicgstab_complex(mnl->w_fields[0], mnl->pf, 
 				     mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
 				     VOLUME,  Q_plus_psi);
-      chrono_add_solution(g_spinor_field[DUM_DERI], mnl->csg_field, mnl->csg_index_array,
+      chrono_add_solution(mnl->w_fields[0], mnl->csg_field, mnl->csg_index_array,
 			  mnl->csg_N, &mnl->csg_n, VOLUME/2);
       
       /* Now Q_- */
-      /* X -> DUM_DERI+1 */
+      /* X -> w_fields[1] */
       g_mu = -g_mu;
-      chrono_guess(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], mnl->csg_field2, 
+      chrono_guess(mnl->w_fields[1], mnl->w_fields[0], mnl->csg_field2, 
 		   mnl->csg_index_array2, mnl->csg_N2, mnl->csg_n2, VOLUME/2, &Q_minus_psi);
-      mnl->iter1 += bicgstab_complex(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], 
+      mnl->iter1 += bicgstab_complex(mnl->w_fields[1], mnl->w_fields[0], 
 				     mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
 				     VOLUME, Q_minus_psi);
-      chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field2, mnl->csg_index_array2,
+      chrono_add_solution(mnl->w_fields[1], mnl->csg_field2, mnl->csg_index_array2,
 			  mnl->csg_N2, &mnl->csg_n2, VOLUME/2);
       g_mu = -g_mu;   
     }
     
     /* \delta Q sandwitched by Y^\dagger and X */
-    deriv_Sb_D_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], hf);
+    deriv_Sb_D_psi(mnl->w_fields[0], mnl->w_fields[1], hf);
   }
   g_mu = g_mu1;
   boundary(g_kappa);
@@ -168,10 +168,10 @@ void det_heatbath(const int id, hamiltonian_field_t * const hf) {
   mnl->iter1 = 0;
 
   if(mnl->even_odd_flag) {
-    random_spinor_field(g_spinor_field[2], VOLUME/2, mnl->rngrepro);
-    mnl->energy0 = square_norm(g_spinor_field[2], VOLUME/2, 1);
+    random_spinor_field(mnl->w_fields[0], VOLUME/2, mnl->rngrepro);
+    mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
-    Qtm_plus_psi(mnl->pf, g_spinor_field[2]);
+    Qtm_plus_psi(mnl->pf, mnl->w_fields[0]);
     chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
 			mnl->csg_N, &mnl->csg_n, VOLUME/2);
     if(mnl->solver != CG) {
@@ -180,10 +180,10 @@ void det_heatbath(const int id, hamiltonian_field_t * const hf) {
     }
   }
   else {
-    random_spinor_field(g_spinor_field[2], VOLUME, mnl->rngrepro);
-    mnl->energy0 = square_norm(g_spinor_field[2], VOLUME, 1);
+    random_spinor_field(mnl->w_fields[0], VOLUME, mnl->rngrepro);
+    mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME, 1);
 
-    Q_plus_psi(mnl->pf, g_spinor_field[2]);
+    Q_plus_psi(mnl->pf, mnl->w_fields[0]);
     chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
 			mnl->csg_N, &mnl->csg_n, VOLUME/2);
     if(mnl->solver != CG) {
@@ -212,32 +212,32 @@ double det_acc(const int id, hamiltonian_field_t * const hf) {
     if(mnl->solver == CG) {
       ITER_MAX_BCG = 0;
     }
-    chrono_guess(g_spinor_field[2], mnl->pf, mnl->csg_field, mnl->csg_index_array,
+    chrono_guess(mnl->w_fields[0], mnl->pf, mnl->csg_field, mnl->csg_index_array,
 		 mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_plus_psi);
     g_sloppy_precision_flag = 0;
-    mnl->iter0 = bicg(g_spinor_field[2], mnl->pf, mnl->accprec, g_relative_precision_flag);
+    mnl->iter0 = bicg(mnl->w_fields[0], mnl->pf, mnl->accprec, g_relative_precision_flag);
     g_sloppy_precision_flag = save_sloppy;
     /* Compute the energy contr. from first field */
-    mnl->energy1 = square_norm(g_spinor_field[2], VOLUME/2, 1);
+    mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
   }
   else {
     if(mnl->solver == CG) {
-      chrono_guess(g_spinor_field[DUM_DERI+5], mnl->pf, mnl->csg_field, mnl->csg_index_array,
+      chrono_guess(mnl->w_fields[1], mnl->pf, mnl->csg_field, mnl->csg_index_array,
 		   mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_pm_psi);
-      mnl->iter0 = cg_her(g_spinor_field[DUM_DERI+5], mnl->pf, 
+      mnl->iter0 = cg_her(mnl->w_fields[1], mnl->pf, 
 			  mnl->maxiter, mnl->accprec, g_relative_precision_flag, 
 			  VOLUME, Q_pm_psi);
-      Q_minus_psi(g_spinor_field[2], g_spinor_field[DUM_DERI+5]);
+      Q_minus_psi(mnl->w_fields[0], mnl->w_fields[1]);
       /* Compute the energy contr. from first field */
-      mnl->energy1 = square_norm(g_spinor_field[2], VOLUME, 1);
+      mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME, 1);
     }
     else {
-      chrono_guess(g_spinor_field[2], mnl->pf, mnl->csg_field, mnl->csg_index_array,
+      chrono_guess(mnl->w_fields[0], mnl->pf, mnl->csg_field, mnl->csg_index_array,
 		   mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_plus_psi);
-      mnl->iter0 += bicgstab_complex(g_spinor_field[2], mnl->pf, 
+      mnl->iter0 += bicgstab_complex(mnl->w_fields[0], mnl->pf, 
 				     mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
 				     VOLUME,  Q_plus_psi);
-      mnl->energy1 = square_norm(g_spinor_field[2], VOLUME, 1);
+      mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME, 1);
     }
   }
   g_mu = g_mu1;
diff --git a/detratio_monomial.c b/detratio_monomial.c
index 627328809..079d94eb0 100644
--- a/detratio_monomial.c
+++ b/detratio_monomial.c
@@ -82,48 +82,48 @@ void detratio_derivative(const int no, hamiltonian_field_t * const hf) {
       fprintf(stderr, "Bicgstab currently not implemented, using CG instead! (detratio_monomial.c)\n");
     }
 
-    Qtm_plus_psi(g_spinor_field[DUM_DERI+2], mnl->pf);
+    Qtm_plus_psi(mnl->w_fields[2], mnl->pf);
     g_mu = mnl->mu;
     boundary(mnl->kappa);
     /* Invert Q_{+} Q_{-} */
-    /* X_W -> DUM_DERI+1 */
-    chrono_guess(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], mnl->csg_field, 
+    /* X_W -> w_fields[1] */
+    chrono_guess(mnl->w_fields[1], mnl->w_fields[2], mnl->csg_field, 
 		 mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_pm_psi);
-    mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], mnl->maxiter, 
+    mnl->iter1 += cg_her(mnl->w_fields[1], mnl->w_fields[2], mnl->maxiter, 
 			 mnl->forceprec, g_relative_precision_flag, VOLUME/2, &Qtm_pm_psi);
-    chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array,
+    chrono_add_solution(mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array,
 			mnl->csg_N, &mnl->csg_n, VOLUME/2);
-    /* Y_W -> DUM_DERI  */
-    Qtm_minus_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+    /* Y_W -> w_fields[0]  */
+    Qtm_minus_psi(mnl->w_fields[0], mnl->w_fields[1]);
     
     /* apply Hopping Matrix M_{eo} */
     /* to get the even sites of X */
-    H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EO, -1.);
+    H_eo_tm_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EO, -1.);
     /* \delta Q sandwitched by Y_o^\dagger and X_e */
-    deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf); 
+    deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf); 
     
     /* to get the even sites of Y */
-    H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EO, +1);
+    H_eo_tm_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EO, +1);
     /* \delta Q sandwitched by Y_e^\dagger and X_o */
-    deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf); 
+    deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf); 
 
     g_mu = mnl->mu2;
     boundary(mnl->kappa2);
     
     /* Second term coming from the second field */
     /* The sign is opposite!! */
-    mul_r(g_spinor_field[DUM_DERI], -1., mnl->pf, VOLUME/2);
+    mul_r(mnl->w_fields[0], -1., mnl->pf, VOLUME/2);
 
     /* apply Hopping Matrix M_{eo} */
     /* to get the even sites of X */
-    H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+1], EO, -1.);
+    H_eo_tm_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EO, -1.);
     /* \delta Q sandwitched by Y_o^\dagger and X_e */
-    deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf); 
+    deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf); 
     
     /* to get the even sites of Y */
-    H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI], EO, +1);
+    H_eo_tm_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EO, +1);
     /* \delta Q sandwitched by Y_e^\dagger and X_o */
-    deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf);
+    deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf);
 
   } 
   else { /* no even/odd preconditioning */
@@ -137,64 +137,64 @@ void detratio_derivative(const int no, hamiltonian_field_t * const hf) {
     /* Multiply with W_+ */
     g_mu = mnl->mu2;
     boundary(mnl->kappa2);	
-    Q_plus_psi(g_spinor_field[DUM_DERI+2], mnl->pf);
+    Q_plus_psi(mnl->w_fields[2], mnl->pf);
     g_mu = mnl->mu;
     boundary(mnl->kappa);
     if(mnl->solver == CG) {
       /* If CG is used anyhow */
-      /*       gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], VOLUME/2); */
+      /*       gamma5(mnl->w_fields[1], mnl->w_fields[2], VOLUME/2); */
       /* Invert Q_{+} Q_{-} */
-      /* X_W -> DUM_DERI+1 */
-      chrono_guess(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], mnl->csg_field, 
+      /* X_W -> w_fields[1] */
+      chrono_guess(mnl->w_fields[1], mnl->w_fields[2], mnl->csg_field, 
 		   mnl->csg_index_array, mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_pm_psi);
-      mnl->iter1 += cg_her(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], 
+      mnl->iter1 += cg_her(mnl->w_fields[1], mnl->w_fields[2], 
 			   mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
 			   VOLUME, &Q_pm_psi);
-      chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field, mnl->csg_index_array,
+      chrono_add_solution(mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array,
 			  mnl->csg_N, &mnl->csg_n, VOLUME/2);
       
-      /* Y_W -> DUM_DERI  */
-      Q_minus_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+      /* Y_W -> w_fields[0]  */
+      Q_minus_psi(mnl->w_fields[0], mnl->w_fields[1]);
     }
     else {
       /* Invert first Q_+ */
-      /* Y_o -> DUM_DERI  */
+      /* Y_o -> w_fields[0]  */
 
-      chrono_guess(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], mnl->csg_field, mnl->csg_index_array,
+      chrono_guess(mnl->w_fields[0], mnl->w_fields[2], mnl->csg_field, mnl->csg_index_array,
 		   mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_plus_psi);
-      gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME);
-      mnl->iter1 += bicgstab_complex(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], 
+      gamma5(mnl->w_fields[0], mnl->w_fields[0], VOLUME);
+      mnl->iter1 += bicgstab_complex(mnl->w_fields[0], mnl->w_fields[2], 
 				     mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
 				     VOLUME, Q_plus_psi);
-      chrono_add_solution(g_spinor_field[DUM_DERI], mnl->csg_field, mnl->csg_index_array,
+      chrono_add_solution(mnl->w_fields[0], mnl->csg_field, mnl->csg_index_array,
 			  mnl->csg_N, &mnl->csg_n, VOLUME/2);
 
       /* Now Q_- */
-      /* X_o -> DUM_DERI+1 */
+      /* X_o -> w_fields[1] */
       g_mu = -g_mu;
-      chrono_guess(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI], mnl->csg_field2, 
+      chrono_guess(mnl->w_fields[1], mnl->w_fields[0], mnl->csg_field2, 
 		   mnl->csg_index_array2, mnl->csg_N2, mnl->csg_n2, VOLUME/2, &Q_minus_psi);
-      gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], VOLUME);
-      mnl->iter1 += bicgstab_complex(g_spinor_field[DUM_DERI+1],g_spinor_field[DUM_DERI], 
+      gamma5(mnl->w_fields[1], mnl->w_fields[1], VOLUME);
+      mnl->iter1 += bicgstab_complex(mnl->w_fields[1],mnl->w_fields[0], 
 				     mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
 				     VOLUME, Q_minus_psi);
-      chrono_add_solution(g_spinor_field[DUM_DERI+1], mnl->csg_field2, mnl->csg_index_array2,
+      chrono_add_solution(mnl->w_fields[1], mnl->csg_field2, mnl->csg_index_array2,
 			  mnl->csg_N2, &mnl->csg_n2, VOLUME/2);
       g_mu = -g_mu;   
     }
 
     /* \delta Q sandwitched by Y^\dagger and X */
-    deriv_Sb_D_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], hf); 
+    deriv_Sb_D_psi(mnl->w_fields[0], mnl->w_fields[1], hf); 
     
     g_mu = mnl->mu2;
     boundary(mnl->kappa2);
     
     /* Second term coming from the second field */
     /* The sign is opposite!! */
-    mul_r(g_spinor_field[DUM_DERI], -1., mnl->pf, VOLUME);
+    mul_r(mnl->w_fields[0], -1., mnl->pf, VOLUME);
     
     /* \delta Q sandwitched by Y^\dagger and X */
-    deriv_Sb_D_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], hf);
+    deriv_Sb_D_psi(mnl->w_fields[0], mnl->w_fields[1], hf);
   }
   g_mu = g_mu1;
   boundary(g_kappa);
@@ -215,16 +215,16 @@ void detratio_heatbath(const int id, hamiltonian_field_t * const hf) {
   mnl->iter0 = 0;
   mnl->iter1 = 0;
   if(mnl->even_odd_flag) {
-    random_spinor_field(g_spinor_field[4], VOLUME/2, mnl->rngrepro);
-    mnl->energy0  = square_norm(g_spinor_field[4], VOLUME/2, 1);
+    random_spinor_field(mnl->w_fields[0], VOLUME/2, mnl->rngrepro);
+    mnl->energy0  = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
-    Qtm_plus_psi(g_spinor_field[3], g_spinor_field[4]);
+    Qtm_plus_psi(mnl->w_fields[1], mnl->w_fields[0]);
     g_mu = mnl->mu2;
     boundary(mnl->kappa2);
     zero_spinor_field(mnl->pf,VOLUME/2);
     if(mnl->solver == CG) ITER_MAX_BCG = 0;
     ITER_MAX_CG = mnl->maxiter;
-    mnl->iter0 += bicg(mnl->pf, g_spinor_field[3], mnl->accprec, g_relative_precision_flag);
+    mnl->iter0 += bicg(mnl->pf, mnl->w_fields[1], mnl->accprec, g_relative_precision_flag);
 
     chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
 			mnl->csg_N, &mnl->csg_n, VOLUME/2);
@@ -234,14 +234,14 @@ void detratio_heatbath(const int id, hamiltonian_field_t * const hf) {
     }
   }
   else {
-    random_spinor_field(g_spinor_field[4], VOLUME, mnl->rngrepro);
-    mnl->energy0 = square_norm(g_spinor_field[4], VOLUME, 1);
+    random_spinor_field(mnl->w_fields[0], VOLUME, mnl->rngrepro);
+    mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME, 1);
 
-    Q_plus_psi(g_spinor_field[3], g_spinor_field[4]);
+    Q_plus_psi(mnl->w_fields[1], mnl->w_fields[0]);
     g_mu = mnl->mu2;
     boundary(mnl->kappa2);
     zero_spinor_field(mnl->pf,VOLUME);
-    mnl->iter0 += bicgstab_complex(mnl->pf, g_spinor_field[3], mnl->maxiter, mnl->accprec, 
+    mnl->iter0 += bicgstab_complex(mnl->pf, mnl->w_fields[1], mnl->maxiter, mnl->accprec, 
 				   g_relative_precision_flag, VOLUME, Q_plus_psi);
     chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
 			mnl->csg_N, &mnl->csg_n, VOLUME/2);
@@ -267,32 +267,32 @@ double detratio_acc(const int id, hamiltonian_field_t * const hf) {
   g_mu = mnl->mu2;
   boundary(mnl->kappa2);
   if(even_odd_flag) {
-    Qtm_plus_psi(g_spinor_field[DUM_DERI+5], mnl->pf);
+    Qtm_plus_psi(mnl->w_fields[1], mnl->pf);
     g_mu = mnl->mu;
     boundary(mnl->kappa);
     if(mnl->solver == CG) ITER_MAX_BCG = 0;
     ITER_MAX_CG = mnl->maxiter;
-    chrono_guess(g_spinor_field[3], g_spinor_field[DUM_DERI+5], mnl->csg_field, mnl->csg_index_array, 
+    chrono_guess(mnl->w_fields[0], mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array, 
 		 mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_plus_psi);
     g_sloppy_precision_flag = 0;    
-    mnl->iter0 += bicg(g_spinor_field[3], g_spinor_field[DUM_DERI+5], mnl->accprec, g_relative_precision_flag); 
+    mnl->iter0 += bicg(mnl->w_fields[0], mnl->w_fields[1], mnl->accprec, g_relative_precision_flag); 
     g_sloppy_precision_flag = save_sloppy;
     /*     ITER_MAX_BCG = *saveiter_max; */
     /* Compute the energy contr. from second field */
-    mnl->energy1 = square_norm(g_spinor_field[3], VOLUME/2, 1);
+    mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
   }
   else {
-    Q_plus_psi(g_spinor_field[DUM_DERI+5], mnl->pf);
+    Q_plus_psi(mnl->w_fields[1], mnl->pf);
     g_mu = mnl->mu;
     boundary(mnl->kappa);
-    chrono_guess(g_spinor_field[3], g_spinor_field[DUM_DERI+5], mnl->csg_field, mnl->csg_index_array, 
+    chrono_guess(mnl->w_fields[0], mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array, 
 		 mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_plus_psi);
-    mnl->iter0 += bicgstab_complex(g_spinor_field[3], g_spinor_field[DUM_DERI+5], 
+    mnl->iter0 += bicgstab_complex(mnl->w_fields[0], mnl->w_fields[1], 
 				   mnl->maxiter, mnl->accprec, g_relative_precision_flag, 
 				   VOLUME, Q_plus_psi); 
     /*     ITER_MAX_BCG = *saveiter_max; */
     /* Compute the energy contr. from second field */
-    mnl->energy1 = square_norm(g_spinor_field[3], VOLUME, 1);
+    mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME, 1);
   }
   g_mu = g_mu1;
   boundary(g_kappa);
diff --git a/hmc_tm.c b/hmc_tm.c
index 3399b96ea..7802ce8a7 100644
--- a/hmc_tm.c
+++ b/hmc_tm.c
@@ -184,8 +184,8 @@ int main(int argc,char *argv[]) {
     exit(-1);
   }
 
-  DUM_DERI = 6;
-  DUM_SOLVER = DUM_DERI+8;
+  DUM_DERI = 4;
+  DUM_SOLVER = DUM_DERI+1;
   DUM_MATRIX = DUM_SOLVER+6;
   if(g_running_phmc) {
     NO_OF_SPINORFIELDS = DUM_MATRIX+8;
diff --git a/monomial.c b/monomial.c
index 6498be4f7..88e6756c0 100644
--- a/monomial.c
+++ b/monomial.c
@@ -1,8 +1,7 @@
 /***********************************************************************
  *
- * Copyright (C) 2008 Carsten Urbach
- *
- * Modified by Jenifer Gonzalez Lopez 2009/03/31
+ * Copyright (C) 2008,2011,2012 Carsten Urbach
+ *               2009 Jenifer Gonzalez Lopez
  *
  * This file is part of tmLQCD.
  *
@@ -50,6 +49,8 @@ int no_gauge_monomials = 0;
 int no_ndpoly_monomials = 0;
 int clover_trlog_monomial = 0;
 static spinor * _pf;
+spinor ** w_fields;
+const int no_wfields = 4;
 
 int add_monomial(const int type) {
   
@@ -63,10 +64,12 @@ int add_monomial(const int type) {
 
   monomial_list[no_monomials].pf = NULL;
   monomial_list[no_monomials].pf2 = NULL;
+  monomial_list[no_monomials].w_fields = NULL;
   monomial_list[no_monomials].csg_field = NULL;
   monomial_list[no_monomials].csg_field2 = NULL;
   monomial_list[no_monomials].csg_index_array = NULL;
   monomial_list[no_monomials].csg_index_array2 = NULL;
+  monomial_list[no_monomials].no_wfields = no_wfields;
   monomial_list[no_monomials].csg_N = 0;
   monomial_list[no_monomials].csg_N2 = 0;
   monomial_list[no_monomials].csg_n = 1;
@@ -123,17 +126,17 @@ int add_monomial(const int type) {
 
 
 int init_monomials(const int V, const int even_odd_flag) {
-  int i, no=0;
+  int no=0;
   int retval;
   spinor * __pf = NULL;
   double sw_mu=0., sw_k=0., sw_c=0.;
-  for(i = 0; i < no_monomials; i++) {
+  for(int i = 0; i < no_monomials; i++) {
     if((monomial_list[i].type != GAUGE) && (monomial_list[i].type != SFGAUGE)) no++;
     /* non-degenerate monomials need two pseudo fermion fields */
     if((monomial_list[i].type == NDPOLY) || (monomial_list[i].type == NDDETRATIO)) no++;
   }
   if(no_monomials > 0) {
-    if((void*)(_pf = (spinor*)calloc(no*V+1, sizeof(spinor))) == NULL) {
+    if((void*)(_pf = (spinor*)calloc((no+4)*V+1, sizeof(spinor))) == NULL) {
       printf ("malloc errno in monomial pf fields: %d\n",errno); 
       errno = 0;
       return(1);
@@ -145,12 +148,20 @@ int init_monomials(const int V, const int even_odd_flag) {
       __pf = _pf;
 #endif
     }
+    if((void*)(w_fields = (spinor**)calloc(no_wfields, sizeof(spinor*))) == NULL) {
+      printf ("malloc errno in monomial  w_fields: %d\n",errno); 
+      errno = 0;
+      return(1);
+    }
+    for(int i = 0; i < no_wfields; i++) {
+      w_fields[i] = __pf+(no+i)*V;
+    }
   }
 
   no = 0;
-  for(i = 0; i < no_monomials; i++) {
+  for(int i = 0; i < no_monomials; i++) {
     if((monomial_list[i].type != GAUGE) && (monomial_list[i].type != SFGAUGE)) {
-          
+      monomial_list[i].w_fields = w_fields;
       monomial_list[i].pf = __pf+no*V;
       no++;
       monomial_list[i].rngrepro = reproduce_randomnumber_flag;
diff --git a/monomial.h b/monomial.h
index 3d4e0f78b..5ed6eb57e 100644
--- a/monomial.h
+++ b/monomial.h
@@ -1,8 +1,7 @@
 /***********************************************************************
  *
- * Copyright (C) 2008 Carsten Urbach
- *
- * Modified by Jenifer Gonzalez Lopez 2009/03/27
+ * Copyright (C) 2008,2011,2012 Carsten Urbach
+ *               2009 Jenifer Gonzalez Lopez
  *
  * This file is part of tmLQCD.
  *
@@ -92,12 +91,14 @@ typedef struct {
   double MDPolyLocNormConst;
   int MDPolyDetRatio;
   int MaxPtildeDegree;
+  int no_wfields;
   double PrecisionPtilde;
   double PrecisionHfinal;
   double StildeMin, StildeMax;
   /* chronological solver fields */
   spinor ** csg_field;
   spinor ** csg_field2;
+  spinor ** w_fields;
   /* functions for the HMC update */
   void (*hbfunction) (const int no, hamiltonian_field_t * const hf);
   double (*accfunction) (const int no, hamiltonian_field_t * const hf);
diff --git a/nddetratio_monomial.c b/nddetratio_monomial.c
index 34cae8ec0..17ca71d3e 100644
--- a/nddetratio_monomial.c
+++ b/nddetratio_monomial.c
@@ -59,21 +59,21 @@ double nddetratio_acc(const int id, hamiltonian_field_t * const hf) {
   g_epsbar = mnl->epsbar;
   boundary(mnl->kappa);
 
-  iter = cg_her_nd(g_spinor_field[2], g_spinor_field[3], mnl->pf, mnl->pf2,
+  iter = cg_her_nd(mnl->w_fields[0], mnl->w_fields[1], mnl->pf, mnl->pf2,
 		   mnl->maxiter, mnl->accprec, g_relative_precision_flag, 
 		   VOLUME/2, &Q_Qdagger_ND);
-  QdaggerNon_degenerate(g_spinor_field[0], g_spinor_field[1],
-			g_spinor_field[2], g_spinor_field[3]);
+  QdaggerNon_degenerate(mnl->w_fields[2], mnl->w_fields[3],
+			mnl->w_fields[0], mnl->w_fields[1]);
 
   g_mubar = mnl->mubar2;
   g_epsbar = mnl->epsbar2;
   boundary(mnl->kappa2);
 
-  QNon_degenerate(g_spinor_field[2], g_spinor_field[3],
-		  g_spinor_field[0], g_spinor_field[1]);
+  QNon_degenerate(mnl->w_fields[0], mnl->w_fields[1],
+		  mnl->w_fields[2], mnl->w_fields[3]);
   
-  mnl->energy1  = scalar_prod_r(mnl->pf , g_spinor_field[2], VOLUME/2, 1);
-  mnl->energy1 += scalar_prod_r(mnl->pf2, g_spinor_field[3], VOLUME/2, 1);
+  mnl->energy1  = scalar_prod_r(mnl->pf , mnl->w_fields[0], VOLUME/2, 1);
+  mnl->energy1 += scalar_prod_r(mnl->pf2, mnl->w_fields[1], VOLUME/2, 1);
 
   return(mnl->energy1 - mnl->energy0);
 }
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index 0c67d3932..eb960b9db 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -94,20 +94,20 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 			   phmc_root[2*phmc_dop_n_cheby-3-j]);
       
       /* Get the even parts of the  (j-1)th  chi_spinors */
-      H_eo_ND(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], 
+      H_eo_ND(mnl->w_fields[0], mnl->w_fields[1], 
 	      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
       
       /* \delta M_eo sandwitched by  chi[j-1]_e^\dagger  and  chi[2N-j]_o */
-      deriv_Sb(EO, g_spinor_field[DUM_DERI], g_chi_up_spinor_field[phmc_dop_n_cheby], hf);      /* UP */
-      deriv_Sb(EO, g_spinor_field[DUM_DERI+1], g_chi_dn_spinor_field[phmc_dop_n_cheby], hf);    /* DN */
+      deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[phmc_dop_n_cheby], hf);      /* UP */
+      deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[phmc_dop_n_cheby], hf);    /* DN */
       
       /* Get the even parts of the  (2N-j)-th  chi_spinors */
-      H_eo_ND(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], 
+      H_eo_ND(mnl->w_fields[0], mnl->w_fields[1], 
 	      g_chi_up_spinor_field[phmc_dop_n_cheby], g_chi_dn_spinor_field[phmc_dop_n_cheby], EO);
       
       /* \delta M_oe sandwitched by  chi[j-1]_o^\dagger  and  chi[2N-j]_e */
-      deriv_Sb(OE, g_chi_up_spinor_field[j-1], g_spinor_field[DUM_DERI], hf);
-      deriv_Sb(OE, g_chi_dn_spinor_field[j-1], g_spinor_field[DUM_DERI+1], hf);
+      deriv_Sb(OE, g_chi_up_spinor_field[j-1], mnl->w_fields[0], hf);
+      deriv_Sb(OE, g_chi_dn_spinor_field[j-1], mnl->w_fields[1], hf);
     }
   } 
   else if(g_epsbar == 0.0) {
@@ -130,21 +130,21 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 			   g_chi_up_spinor_field[phmc_dop_n_cheby-1],
 			   phmc_root[2*phmc_dop_n_cheby-3-j]);
 
-      Qtm_minus_psi(g_spinor_field[DUM_DERI+3],g_chi_up_spinor_field[j-1]); 
+      Qtm_minus_psi(mnl->w_fields[3],g_chi_up_spinor_field[j-1]); 
 
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_chi_up_spinor_field[phmc_dop_n_cheby], EO, -1.);
-      deriv_Sb(OE, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+2], hf); 
+      H_eo_tm_inv_psi(mnl->w_fields[2], g_chi_up_spinor_field[phmc_dop_n_cheby], EO, -1.);
+      deriv_Sb(OE, mnl->w_fields[3], mnl->w_fields[2], hf); 
       
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], EO, 1.); 
-      deriv_Sb(EO, g_spinor_field[DUM_DERI+2], g_chi_up_spinor_field[phmc_dop_n_cheby], hf);
+      H_eo_tm_inv_psi(mnl->w_fields[2], mnl->w_fields[3], EO, 1.); 
+      deriv_Sb(EO, mnl->w_fields[2], g_chi_up_spinor_field[phmc_dop_n_cheby], hf);
 
-      Qtm_minus_psi(g_spinor_field[DUM_DERI+3],g_chi_up_spinor_field[phmc_dop_n_cheby]); 
+      Qtm_minus_psi(mnl->w_fields[3],g_chi_up_spinor_field[phmc_dop_n_cheby]); 
 
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2],g_spinor_field[DUM_DERI+3], EO, +1.);
-      deriv_Sb(OE, g_chi_up_spinor_field[j-1] , g_spinor_field[DUM_DERI+2], hf); 
+      H_eo_tm_inv_psi(mnl->w_fields[2],mnl->w_fields[3], EO, +1.);
+      deriv_Sb(OE, g_chi_up_spinor_field[j-1] , mnl->w_fields[2], hf); 
       
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_chi_up_spinor_field[j-1], EO, -1.); 
-      deriv_Sb(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], hf);
+      H_eo_tm_inv_psi(mnl->w_fields[2], g_chi_up_spinor_field[j-1], EO, -1.); 
+      deriv_Sb(EO, mnl->w_fields[2], mnl->w_fields[3], hf);
     }
   }
   /*
diff --git a/poly_monomial.c b/poly_monomial.c
index 9a20fd664..bbf1c2c43 100644
--- a/poly_monomial.c
+++ b/poly_monomial.c
@@ -119,22 +119,22 @@ void poly_derivative(const int id, hamiltonian_field_t * const hf){
 			    mnl->MDPolyRoots[mnl->MDPolyDegree-(j+1)]);
       
 
-      Qtm_minus_psi(g_spinor_field[DUM_DERI+3],chi_spinor_field[j-1]); 
+      Qtm_minus_psi(mnl->w_fields[1],chi_spinor_field[j-1]); 
       
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], chi_spinor_field[degreehalf+1], EO, -1.);
-      deriv_Sb(OE, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+2], hf); 
+      H_eo_tm_inv_psi(mnl->w_fields[0], chi_spinor_field[degreehalf+1], EO, -1.);
+      deriv_Sb(OE, mnl->w_fields[1], mnl->w_fields[0], hf); 
       
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], EO, 1.); 
-      deriv_Sb(EO, g_spinor_field[DUM_DERI+2], chi_spinor_field[degreehalf+1], hf);
+      H_eo_tm_inv_psi(mnl->w_fields[0], mnl->w_fields[1], EO, 1.); 
+      deriv_Sb(EO, mnl->w_fields[0], chi_spinor_field[degreehalf+1], hf);
       
     
-      Qtm_minus_psi(g_spinor_field[DUM_DERI+3],chi_spinor_field[degreehalf+1]); 
+      Qtm_minus_psi(mnl->w_fields[1],chi_spinor_field[degreehalf+1]); 
 
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2],g_spinor_field[DUM_DERI+3], EO, +1.);
-      deriv_Sb(OE, chi_spinor_field[j-1] , g_spinor_field[DUM_DERI+2], hf); 
+      H_eo_tm_inv_psi(mnl->w_fields[0],mnl->w_fields[1], EO, +1.);
+      deriv_Sb(OE, chi_spinor_field[j-1] , mnl->w_fields[0], hf); 
       
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], chi_spinor_field[j-1], EO, -1.); 
-      deriv_Sb(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], hf);
+      H_eo_tm_inv_psi(mnl->w_fields[0], chi_spinor_field[j-1], EO, -1.); 
+      deriv_Sb(EO, mnl->w_fields[0], mnl->w_fields[1], hf);
       
     }
 
@@ -158,11 +158,11 @@ void poly_derivative(const int id, hamiltonian_field_t * const hf){
       g_mu=mnl->mu2;
       boundary(mnl->kappa2);
 
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2],chi_spinor_field[degreehalf], EO, -1.);
-      deriv_Sb(OE, mnl->pf , g_spinor_field[DUM_DERI+2], hf);
+      H_eo_tm_inv_psi(mnl->w_fields[0],chi_spinor_field[degreehalf], EO, -1.);
+      deriv_Sb(OE, mnl->pf , mnl->w_fields[0], hf);
       
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], mnl->pf, EO, +1.);
-      deriv_Sb(EO, g_spinor_field[DUM_DERI+2], chi_spinor_field[degreehalf], hf);
+      H_eo_tm_inv_psi(mnl->w_fields[0], mnl->pf, EO, +1.);
+      deriv_Sb(EO, mnl->w_fields[0], chi_spinor_field[degreehalf], hf);
 
 
 
@@ -195,8 +195,6 @@ double poly_acc(const int id, hamiltonian_field_t * const hf){
 
   monomial * mnl = &monomial_list[id];
   int j;
-  spinor* spinor1=g_spinor_field[2];
-  spinor* spinor2=g_spinor_field[3];
   double diff;
   int no_eigenvalues=-1;
 
@@ -209,10 +207,10 @@ double poly_acc(const int id, hamiltonian_field_t * const hf){
       g_mu = mnl->mu2;
       boundary(mnl->kappa2);
 
-      Qtm_plus_psi(spinor2,mnl->pf);
+      Qtm_plus_psi(mnl->w_fields[1],mnl->pf);
 
     } else {
-      assign(spinor2,mnl->pf,VOLUME/2);
+      assign(mnl->w_fields[1],mnl->pf,VOLUME/2);
     }
 
     g_mu = mnl->mu;
@@ -224,13 +222,13 @@ double poly_acc(const int id, hamiltonian_field_t * const hf){
 
     /* apply B */
     for(j = 0; j < mnl->MDPolyDegree/2; j++){
-      assign(spinor1, spinor2, VOLUME/2);
-      Qtm_pm_min_cconst_nrm(spinor2,
-			    spinor1,
+      assign(mnl->w_fields[0], mnl->w_fields[1], VOLUME/2);
+      Qtm_pm_min_cconst_nrm(mnl->w_fields[1],
+			    mnl->w_fields[0],
 			    mnl->MDPolyRoots[j]);
     }
 
-    mnl->energy1 =  square_norm(spinor2, VOLUME/2,1);
+    mnl->energy1 =  square_norm(mnl->w_fields[1], VOLUME/2,1);
 
     /* calculate evs */
     if (compute_evs != 0) {
@@ -279,8 +277,6 @@ double poly_acc(const int id, hamiltonian_field_t * const hf){
 void poly_heatbath(const int id, hamiltonian_field_t * const hf){
   monomial * mnl = &monomial_list[id];
   int j;
-  spinor* spinor1=g_spinor_field[2];
-  spinor* spinor2=g_spinor_field[3];
 
   mnl->csg_n = 0;
   mnl->csg_n2 = 0;
@@ -297,25 +293,25 @@ void poly_heatbath(const int id, hamiltonian_field_t * const hf){
   if(mnl->even_odd_flag) {
 
 
-    random_spinor_field(spinor1, VOLUME/2, mnl->rngrepro);
-    mnl->energy0 = square_norm(spinor1, VOLUME/2, 1);
+    random_spinor_field(mnl->w_fields[0], VOLUME/2, mnl->rngrepro);
+    mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
     if(g_proc_id == 0 && g_debug_level > 3) {
       fprintf(stderr," Poly energy0     = %e \n" , mnl->energy0);
     }
 
     /* calculate the phmc hamiltonian */
-    Qtm_pm_psi(spinor2, spinor1);
+    Qtm_pm_psi(mnl->w_fields[1], mnl->w_fields[0]);
 
     /* solve (Q+)*(Q-)*P((Q+)*(Q-)) *x=y */
-    cg_her(spinor1, spinor2,
+    cg_her(mnl->w_fields[0], mnl->w_fields[1],
 	   1000,mnl->accprec,g_relative_precision_flag,VOLUME/2, Qtm_pm_Ptm_pm_psi);
     
     /*  phi= Bdagger phi  */
     for(j = 0; j < (mnl->MDPolyDegree/2); j++){
-      assign(spinor2, spinor1, VOLUME/2);
-      Qtm_pm_min_cconst_nrm(spinor1,
-				 spinor2,
+      assign(mnl->w_fields[1], mnl->w_fields[0], VOLUME/2);
+      Qtm_pm_min_cconst_nrm(mnl->w_fields[0],
+				 mnl->w_fields[1],
 				 mnl->MDPolyRoots[mnl->MDPolyDegree/2+j]);
     }
 
@@ -326,7 +322,7 @@ void poly_heatbath(const int id, hamiltonian_field_t * const hf){
       zero_spinor_field(mnl->pf,VOLUME/2);
       if(mnl->solver == CG) ITER_MAX_BCG = 0;
       ITER_MAX_CG = mnl->maxiter;
-      mnl->iter0 += bicg(mnl->pf, spinor1, mnl->accprec, g_relative_precision_flag);
+      mnl->iter0 += bicg(mnl->pf, mnl->w_fields[0], mnl->accprec, g_relative_precision_flag);
       
       chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
 			  mnl->csg_N, &mnl->csg_n, VOLUME/2);
@@ -337,7 +333,7 @@ void poly_heatbath(const int id, hamiltonian_field_t * const hf){
       }
     } else {
       /* store constructed phi field */
-      assign(mnl->pf, spinor1, VOLUME/2);
+      assign(mnl->pf, mnl->w_fields[0], VOLUME/2);
     }
     
   }

From 62fe278f815dba5f9a700faf399125e18dfa40fb Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 20 Mar 2012 14:10:26 +0100
Subject: [PATCH 002/110] the functionality of init_phmc is moved to a new
 function init_nd_poly_monomial in ndpoly_monomial.c.

---
 default_input_values.h |   2 +-
 hmc_tm.c               |  69 ++++++++---------
 monomial.c             |   5 +-
 monomial.h             |   1 +
 ndpoly_monomial.c      | 172 ++++++++++++++++++++++++++++++++++++-----
 ndpoly_monomial.h      |   1 +
 read_input.l           |   7 +-
 7 files changed, 197 insertions(+), 60 deletions(-)

diff --git a/default_input_values.h b/default_input_values.h
index 323cf0f29..acab2844a 100644
--- a/default_input_values.h
+++ b/default_input_values.h
@@ -154,7 +154,7 @@
 #define _default_MDPolyDegree 123
 #define _default_MDPolyLmin 0.1
 #define _default_MDPolyLmax 3.0
-#define _default_MDPolyRootsFile ""
+#define _default_MDPolyRootsFile "Square_root_BR_roots.dat"
 #define _default_MDPolyLocNormConst -1.0
 #define _default_MDPolyDetRatio 0
 
diff --git a/hmc_tm.c b/hmc_tm.c
index 7802ce8a7..a2b10f5e8 100644
--- a/hmc_tm.c
+++ b/hmc_tm.c
@@ -122,7 +122,7 @@ int main(int argc,char *argv[]) {
 /* For online measurements */
   measurement * meas;
   int imeas;
-
+  
 #ifdef _KOJAK_INST
 #pragma pomp inst init
 #pragma pomp inst begin(main)
@@ -201,25 +201,6 @@ int main(int argc,char *argv[]) {
 
   tmlqcd_mpi_init(argc, argv);
 
-  if(even_odd_flag) {
-    j = init_monomials(VOLUMEPLUSRAND/2, even_odd_flag);
-  }
-  else {
-    j = init_monomials(VOLUMEPLUSRAND, even_odd_flag);
-  }
-  if (j != 0) {
-    fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n");
-    exit(0);
-  }
-
-  init_integrator();
-
-  if(g_proc_id == 0) {
-    for(j = 0; j < no_monomials; j++) {
-      printf("# monomial id %d type = %d timescale %d\n", j, monomial_list[j].type, monomial_list[j].timescale);
-    }
-  }
-
   if(nstore == -1) {
     countfile = fopen(nstore_filename, "r");
     if(countfile != NULL) {
@@ -233,14 +214,14 @@ int main(int argc,char *argv[]) {
       trajectory_counter = 0;
     }
   }
-
+  
 #ifndef MPI
   g_dbw2rand = 0;
 #endif
-
-
+  
+  
   g_mu = g_mu1;
-
+  
 #ifdef _GAUGE_COPY
   status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
 #else
@@ -289,22 +270,20 @@ int main(int argc,char *argv[]) {
     }
   }
 
-   /* list and initialize measurements*/
-   if(g_proc_id == 0) {
+  /* list and initialize measurements*/
+  if(g_proc_id == 0) {
     printf("\n");
     for(j = 0; j < no_measurements; j++) {
       printf("# measurement id %d, type = %d: Frequency %d\n", j, measurement_list[j].type, measurement_list[j].freq);
     }
-   }
-   init_measurements();
-
-  zero_spinor_field(g_spinor_field[DUM_DERI+4],VOLUME);
-  zero_spinor_field(g_spinor_field[DUM_DERI+5],VOLUME);
-  zero_spinor_field(g_spinor_field[DUM_DERI+6],VOLUME);
+  }
+  init_measurements();
 
   /*construct the filenames for the observables and the parameters*/
-  strcpy(datafilename,filename);  strcat(datafilename,".data");
-  strcpy(parameterfilename,filename);  strcat(parameterfilename,".para");
+  strcpy(datafilename,filename);  
+  strcat(datafilename,".data");
+  strcpy(parameterfilename,filename);  
+  strcat(parameterfilename,".para");
 
   if(g_proc_id == 0){
     parameterfile = fopen(parameterfilename, "a");
@@ -374,8 +353,23 @@ int main(int argc,char *argv[]) {
   xchange_gauge(g_gauge_field);
 #endif
 
-  if(g_running_phmc) {
-    init_phmc();
+  if(even_odd_flag) {
+    j = init_monomials(VOLUMEPLUSRAND/2, even_odd_flag);
+  }
+  else {
+    j = init_monomials(VOLUMEPLUSRAND, even_odd_flag);
+  }
+  if (j != 0) {
+    fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n");
+    exit(0);
+  }
+
+  init_integrator();
+
+  if(g_proc_id == 0) {
+    for(j = 0; j < no_monomials; j++) {
+      printf("# monomial id %d type = %d timescale %d\n", j, monomial_list[j].type, monomial_list[j].timescale);
+    }
   }
 
   plaquette_energy = measure_gauge_action(g_gauge_field);
@@ -392,10 +386,9 @@ int main(int argc,char *argv[]) {
     printf("# Computed plaquette value: %14.12f.\n", plaquette_energy/(6.*VOLUME*g_nproc));
     fclose(parameterfile);
   }
- 
 
   /* set ddummy to zero */
-  for(ix = 0; ix < VOLUME+RAND; ix++){
+  for(ix = 0; ix < VOLUMEPLUSRAND; ix++){
     for(mu=0; mu<4; mu++){
       ddummy[ix][mu].d1=0.;
       ddummy[ix][mu].d2=0.;
diff --git a/monomial.c b/monomial.c
index 88e6756c0..723e6ffde 100644
--- a/monomial.c
+++ b/monomial.c
@@ -114,6 +114,8 @@ int add_monomial(const int type) {
   monomial_list[no_monomials].MDPolyLocNormConst = _default_MDPolyLocNormConst;
   monomial_list[no_monomials].MDPolyDetRatio = _default_MDPolyDetRatio;
   monomial_list[no_monomials].MaxPtildeDegree = NTILDE_CHEBYMAX;
+  monomial_list[no_monomials].StildeMin = _default_stilde_min;
+  monomial_list[no_monomials].StildeMax = _default_stilde_max;
 
   monomial_list[no_monomials].initialised = 1;
   if(monomial_list[no_monomials].type == NDDETRATIO) {
@@ -227,6 +229,7 @@ int init_monomials(const int V, const int even_odd_flag) {
 	no_ndpoly_monomials++;
 	monomial_list[i].pf2 = __pf+no*V;
 	no++;
+	retval = init_nd_poly_monomial(i);
       }
       else if(monomial_list[i].type == NDDETRATIO) {
 	monomial_list[i].hbfunction = &dummy_heatbath;
@@ -291,7 +294,7 @@ void free_monomials() {
 }
 
 
-int init_poly_monomial(const int V,const int id){
+int init_poly_monomial(const int V, const int id){
 
   monomial * mnl = &monomial_list[id];
   int i,j,k;
diff --git a/monomial.h b/monomial.h
index 5ed6eb57e..62d3f925a 100644
--- a/monomial.h
+++ b/monomial.h
@@ -95,6 +95,7 @@ typedef struct {
   double PrecisionPtilde;
   double PrecisionHfinal;
   double StildeMin, StildeMax;
+  double EVMin, EVMax, EVMaxInv;
   /* chronological solver fields */
   spinor ** csg_field;
   spinor ** csg_field2;
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index eb960b9db..a82d23816 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -44,9 +44,13 @@
 #include "reweighting_factor_nd.h"
 #include "monomial.h"
 #include "hamiltonian_field.h"
+#include "boundary.h"
+#include "phmc.h"
+#include "init_chi_spinor_field.h"
 #include "ndpoly_monomial.h"
 
 extern int phmc_exact_poly;
+void ndpoly_set_global_parameter(monomial * const mnl);
 
 /********************************************
  *
@@ -58,8 +62,16 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
   int j, k;
   monomial * mnl = &monomial_list[id];
 
+
   /* This factor 2 a missing factor 2 in trace_lambda */
-  (*mnl).forcefactor = -2.*phmc_Cpol*phmc_invmaxev;
+  ndpoly_set_global_parameter(mnl);
+  if (g_epsbar!=0.0 || phmc_exact_poly==0){
+    phmc_Cpol = sqrt(mnl->MDPolyLocNormConst);
+  }
+  else {
+    phmc_Cpol = mnl->MDPolyLocNormConst;
+  }
+  mnl->forcefactor = -2.*phmc_Cpol*phmc_invmaxev;
 
   /* Recall:  The GAMMA_5 left of  delta M_eo  is done in  deriv_Sb !!! */
 
@@ -76,7 +88,7 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     for(k = 1; k < (phmc_dop_n_cheby-1); k++) {
       Q_tau1_min_cconst_ND(g_chi_up_spinor_field[k], g_chi_dn_spinor_field[k], 
 			   g_chi_up_spinor_field[k-1], g_chi_dn_spinor_field[k-1], 
-			   phmc_root[k-1]);
+			   mnl->MDPolyRoots[k-1]);
     }
     
     /* Here comes the remaining fields  chi_k ; k=n,...,2n-1  */
@@ -91,7 +103,7 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
       
       Q_tau1_min_cconst_ND(g_chi_up_spinor_field[phmc_dop_n_cheby], g_chi_dn_spinor_field[phmc_dop_n_cheby], 
 			   g_chi_up_spinor_field[phmc_dop_n_cheby-1], g_chi_dn_spinor_field[phmc_dop_n_cheby-1], 
-			   phmc_root[2*phmc_dop_n_cheby-3-j]);
+			   mnl->MDPolyRoots[2*phmc_dop_n_cheby-3-j]);
       
       /* Get the even parts of the  (j-1)th  chi_spinors */
       H_eo_ND(mnl->w_fields[0], mnl->w_fields[1], 
@@ -117,7 +129,7 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     for(k = 1; k < (phmc_dop_n_cheby-1); k++) {
       Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[k],
 			    g_chi_up_spinor_field[k-1], 
-			    phmc_root[k-1]);
+			    mnl->MDPolyRoots[k-1]);
     }
     assign(g_chi_up_spinor_field[phmc_dop_n_cheby],
 	   g_chi_up_spinor_field[phmc_dop_n_cheby-2], VOLUME/2);
@@ -128,7 +140,7 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 
       Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[phmc_dop_n_cheby], 
 			   g_chi_up_spinor_field[phmc_dop_n_cheby-1],
-			   phmc_root[2*phmc_dop_n_cheby-3-j]);
+			   mnl->MDPolyRoots[2*phmc_dop_n_cheby-3-j]);
 
       Qtm_minus_psi(mnl->w_fields[3],g_chi_up_spinor_field[j-1]); 
 
@@ -159,23 +171,26 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   double temp;
   monomial * mnl = &monomial_list[id];
 
-  (*mnl).energy0 = 0.;
-  random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, (*mnl).rngrepro);
-  (*mnl).energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
+  ndpoly_set_global_parameter(mnl);
+  mnl->energy0 = 0.;
+  random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, mnl->rngrepro);
+  mnl->energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
 
-  if(g_epsbar!=0.0 || phmc_exact_poly == 0){
-    random_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2, (*mnl).rngrepro);
-     (*mnl).energy0 += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
+  if(g_epsbar!=0.0 || phmc_exact_poly == 0) {
+    phmc_Cpol = sqrt(mnl->MDPolyLocNormConst);
+    random_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2, mnl->rngrepro);
+     mnl->energy0 += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
   } 
   else {
+    phmc_Cpol = mnl->MDPolyLocNormConst;
     zero_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2);
   }
 
   if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
     printf("PHMC: Here comes the computation of H_old with \n \n");
     printf("PHMC: First: random spinors and their norm  \n ");
-    printf("PHMC: OLD Ennergy UP %e \n", (*mnl).energy0);
-    printf("PHMC: OLD Energy  DN + UP %e \n\n", (*mnl).energy0);
+    printf("PHMC: OLD Ennergy UP %e \n", mnl->energy0);
+    printf("PHMC: OLD Energy  DN + UP %e \n\n", mnl->energy0);
   }
 
   if(phmc_exact_poly==0){
@@ -188,7 +203,7 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
 
       Q_tau1_min_cconst_ND(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
 			g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], 
-			phmc_root[phmc_dop_n_cheby-2+j]);
+			mnl->MDPolyRoots[phmc_dop_n_cheby-2+j]);
     }
     Poly_tilde_ND(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], phmc_ptilde_cheby_coef, 
 		  phmc_ptilde_n_cheby, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1]);
@@ -213,7 +228,7 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
       assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);
       Q_tau1_min_cconst_ND(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1],
 			g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0],
-			phmc_root[phmc_dop_n_cheby-2+j]);
+			mnl->MDPolyRoots[phmc_dop_n_cheby-2+j]);
     }
 
     assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
@@ -233,7 +248,7 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
       assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
       Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[1],
 			    g_chi_up_spinor_field[0],
-			    phmc_root[phmc_dop_n_cheby-2+j]);
+			    mnl->MDPolyRoots[phmc_dop_n_cheby-2+j]);
     }
     assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
   }
@@ -268,6 +283,7 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   monomial * mnl = &monomial_list[id];
   spinor *up0, *dn0, *up1, *dn1, *dummy;
 
+  ndpoly_set_global_parameter(mnl);
   mnl->energy1 = 0.;
   Ener[0] = 0;
   factor[0] = 1.0;
@@ -285,9 +301,10 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   assign(dn0, mnl->pf2, VOLUME/2);
 
   if(phmc_exact_poly==0) {
+    phmc_Cpol = sqrt(mnl->MDPolyLocNormConst);
     for(j = 1; j <= (phmc_dop_n_cheby-1); j++) {
       /* Change this name !!*/
-      Q_tau1_min_cconst_ND(up1, dn1, up0, dn0, phmc_root[j-1]);
+      Q_tau1_min_cconst_ND(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
 
       dummy = up1; up1 = up0; up0 = dummy;
       dummy = dn1; dn1 = dn0; dn0 = dummy;
@@ -373,9 +390,10 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     }
   } 
   else if(phmc_exact_poly==1 && g_epsbar!=0.0) {
+    phmc_Cpol = sqrt(mnl->MDPolyLocNormConst);
     /* B(Q*tau1) */
     for(j = 1; j <= (phmc_dop_n_cheby-1); j++){
-      Q_tau1_min_cconst_ND(up1, dn1, up0, dn0, phmc_root[j-1]);
+      Q_tau1_min_cconst_ND(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
 
       dummy = up1; up1 = up0; up0 = dummy;
       dummy = dn1; dn1 = dn0; dn0 = dummy;
@@ -405,11 +423,12 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     }
   } 
   else if(phmc_exact_poly == 1 && g_epsbar == 0.0) {
+    phmc_Cpol = mnl->MDPolyLocNormConst;
     for(j = 1; j < (phmc_dop_n_cheby); j++) {
       assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
       Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[1],
 			    g_chi_up_spinor_field[0],
-			    phmc_root[j-1]);
+			    mnl->MDPolyRoots[j-1]);
     }
     assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
 
@@ -434,3 +453,118 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   /* END IF PHMC */
   return(mnl->energy1 - mnl->energy0);
 }
+
+
+int init_nd_poly_monomial(const int id) {
+  monomial * mnl = &monomial_list[id];
+  int j, k, errcode;
+  FILE * ifs;
+  char title[100];
+
+  phmc_invmaxev = 1.0;
+  g_mubar = mnl->mubar;
+  g_epsbar = mnl->epsbar;
+  g_kappa = mnl->kappa;
+  boundary(g_kappa);
+  if (g_epsbar!=0.0 || phmc_exact_poly==0){
+    phmc_Cpol = sqrt(mnl->MDPolyLocNormConst);
+  }
+  else {
+    phmc_Cpol = mnl->MDPolyLocNormConst;
+  }
+
+
+  /* This is the epsilon parameter */
+  mnl->EVMin = mnl->StildeMin / mnl->StildeMax;
+  
+  /* In the following there is the  "sqrt"  since the value refers to 
+     the hermitian Dirac operator (used in EV-computation), namely 
+     S = Q Q^dag         
+     When  "S"  is applied, we call  phmc_invmaxev  twice !!! */
+  if(g_epsbar!=0.0 || phmc_exact_poly==0) mnl->EVMaxInv = 1./(sqrt(mnl->StildeMax));
+  else if(g_epsbar==0.0 && phmc_exact_poly==1) mnl->EVMaxInv = 1./mnl->StildeMax;
+  phmc_cheb_evmin = mnl->EVMin;
+  phmc_invmaxev = mnl->EVMaxInv;
+  phmc_cheb_evmax = 1.0;
+
+  /* Here we prepare the less precise polynomial first   */
+  /* the routine determines a value for phmc_dop_n_cheby */
+  degree_of_polynomial_nd(mnl->MDPolyDegree);
+  if((g_proc_id == 0) && (g_debug_level > 1)) {
+    printf("# monomial %s approximation interval [stilde_min, stilde_max] = [%e, %e]\n", 
+	   mnl->name, mnl->StildeMin, mnl->StildeMax);
+    printf("# monomial %s degree for P = %d, epsilont = %e, normalisation = %e", 
+	   mnl->name, phmc_dop_n_cheby-1, mnl->EVMin, mnl->EVMaxInv);
+  }
+
+  /* Chi`s-spinors  memory allocation */
+  j = init_chi_spinor_field(VOLUMEPLUSRAND/2, (phmc_dop_n_cheby+1));
+  if ( j!= 0) {
+    fprintf(stderr, "Not enough memory for PHMC Chi fields! Aborting...\n");
+    exit(0);
+  }
+
+  /* End memory allocation */
+  /* Here we prepare the precise polynomial */
+  degree_of_Ptilde();
+
+  /* THIS IS THE OVERALL CONSTANT */
+  /* write phmc_Cpol as the result of the simple-program files (BigC^(1/2))^1/2 
+     since  BigC^(1/2)  is the constant appearing in each factor of the 
+     multiplication defining the monomial basis representation of the 
+     polinomial in s,  while its square phmc_root  (BigC^(1/2))^1/2  is the 
+     constant appearing in the multiplication representing the 
+     polinomial in  sqrt(s) .
+  */
+  if(mnl->MDPolyLocNormConst < 0.0){
+    fprintf(stderr, "Error, please specify MDPolyLocNormConst in the input file! Aborting...\n");
+#ifdef MPI
+    MPI_Finalize();
+#endif
+    exit(6);
+  } 
+
+  mnl->MDPolyRoots = calloc((2*phmc_dop_n_cheby-2),sizeof(_Complex double));
+
+  if((ifs = fopen(mnl->MDPolyRootsFile, "r")) != (FILE*)NULL) {
+    if (fgets(title, 100, ifs) == NULL) {
+      fprintf(stderr, "Error in reading %s! Aborting...\n", mnl->MDPolyRootsFile);
+#ifdef MPI
+      MPI_Finalize();
+#endif
+      exit(6);
+    }
+    
+    /* Here we read in the 2n roots needed for the polinomial in sqrt(s) */
+    double *phmc_darray = (double*)mnl->MDPolyRoots;
+    for(j = 0; j< 2 * phmc_dop_n_cheby - 2; ++j) {
+      errcode = fscanf(ifs, " %d %lf %lf \n", &k, &phmc_darray[2 * j], &phmc_darray[2 * j + 1]);
+    }
+    fclose(ifs);
+  }
+  else {
+    fprintf(stderr, "File %s is missing! Aborting...\n", mnl->MDPolyRootsFile);
+#ifdef MPI
+    MPI_Finalize();
+#endif
+    exit(6);
+  }
+  
+  return(0);
+}
+
+void ndpoly_set_global_parameter(monomial * const mnl) {
+
+  g_mubar = mnl->mubar;
+  g_epsbar = mnl->epsbar;
+  g_kappa = mnl->kappa;
+  boundary(g_kappa);
+
+  phmc_root = mnl->MDPolyRoots;
+  phmc_invmaxev = mnl->EVMaxInv;
+  phmc_cheb_evmin = mnl->EVMin;
+  phmc_invmaxev = mnl->EVMaxInv;
+  phmc_cheb_evmax = 1.0;
+ 
+  return;
+}
diff --git a/ndpoly_monomial.h b/ndpoly_monomial.h
index 203eda658..53ae0b634 100644
--- a/ndpoly_monomial.h
+++ b/ndpoly_monomial.h
@@ -25,5 +25,6 @@
 void ndpoly_derivative(const int id, hamiltonian_field_t * const hf);
 double ndpoly_acc(const int id, hamiltonian_field_t * const hf);
 void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf);
+int init_nd_poly_monomial(const int id);
 
 #endif
diff --git a/read_input.l b/read_input.l
index 4e8b662c1..85a63d47d 100644
--- a/read_input.l
+++ b/read_input.l
@@ -834,7 +834,7 @@ inline void rmQuotes(char *str){
 }
 
 
-<DETMONOMIAL,POLYMONOMIAL>{
+<DETMONOMIAL,POLYMONOMIAL,NDPOLYMONOMIAL>{
   {SPC}*2KappaMu2{EQL}{FLT} {
     sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
     mnl->mu2 = c;
@@ -865,6 +865,11 @@ inline void rmQuotes(char *str){
     mnl->epsbar2 = c;
     if(myverbose) printf("  2KappaEpsbar2 set to %f line %d monomial %d\n", c, line_of_file, current_monomial);
   }
+  {SPC}*Kappa{EQL}{FLT} {
+    sscanf(yytext, " %[a-zA-Z] = %lf", name, &c);
+    mnl->kappa = c;
+    if(myverbose) printf("  Kappa set to %f line %d monomial %d\n", c, line_of_file, current_monomial);
+  }
 }
 
 <DETMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL>{

From 86894d716a77ad6cb5673de734ecb68e104c7559 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 20 Mar 2012 15:17:29 +0100
Subject: [PATCH 003/110] we have now a mapping established between old phmc_*
 variables and monomial counterparts:

phmc_Cpol;                         --> MDPolyLocNormConst
phmc_cheb_evmin, phmc_cheb_evmax;  --> EVMin, EVMax
phmc_invmaxev;                     --> EVMaxInv
phmc_root;                         --> MDPolyRoots
phmc_dop_n_cheby;                  --> MDPolyDegree
phmc_dop_cheby_coef;               --> MDPolyCoefs
phmc_ptilde_n_cheby;               --> PtildeDegree
phmc_ptilde_cheby_coef;            --> PtildeCoefs
---
 Ptilde_nd.c               | 41 +++++++++++++++++-----------------
 Ptilde_nd.h               |  2 +-
 chebyshev_polynomial_nd.c | 46 +++++++++++++++++++--------------------
 chebyshev_polynomial_nd.h |  2 +-
 monomial.c                |  2 +-
 monomial.h                |  7 +++---
 ndpoly_monomial.c         | 25 ++++++++++++++-------
 ndpoly_monomial.h         |  2 +-
 phmc.c                    | 21 +++++++++---------
 phmc.h                    |  2 --
 10 files changed, 79 insertions(+), 71 deletions(-)

diff --git a/Ptilde_nd.c b/Ptilde_nd.c
index 00c1dfc4f..facc82df0 100644
--- a/Ptilde_nd.c
+++ b/Ptilde_nd.c
@@ -266,11 +266,11 @@ double chebtilde_eval(int M, double *dd, double s){
 
 
 
-void degree_of_Ptilde() {
+void degree_of_Ptilde(int * _degree, double ** coefs) {
   int i, j;
   double temp, temp2;
   static int ini=0;
-
+  int degree;
   double sum=0.0;
 
   spinor *ss=NULL, *ss_=NULL, *sc=NULL, *sc_=NULL;
@@ -278,9 +278,9 @@ void degree_of_Ptilde() {
   spinor *aux2s=NULL, *aux2s_=NULL, *aux2c=NULL, *aux2c_=NULL;
 
   if(ini==0){
-    phmc_ptilde_cheby_coef = calloc(phmc_max_ptilde_degree, sizeof(double)); 
+    *coefs = calloc(phmc_max_ptilde_degree, sizeof(double)); 
     ini=1;
-  }   
+  }
 
 #if ( defined SSE || defined SSE2 || defined SSE3)
   ss_   = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
@@ -306,7 +306,7 @@ void degree_of_Ptilde() {
   aux2c=calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
 #endif
 
-  Ptilde_cheb_coefs(phmc_cheb_evmin, phmc_cheb_evmax, phmc_ptilde_cheby_coef, phmc_max_ptilde_degree, -1.0); 
+  Ptilde_cheb_coefs(phmc_cheb_evmin, phmc_cheb_evmax, *coefs, phmc_max_ptilde_degree, -1.0); 
 
   if(g_proc_id == g_stdio_proc && g_debug_level > 0){
     printf("# NDPOLY Acceptance Polynomial: EVmin = %f  EVmax = %f\n", phmc_cheb_evmin, phmc_cheb_evmax);
@@ -314,12 +314,12 @@ void degree_of_Ptilde() {
     fflush(stdout);
   }
 
-  phmc_ptilde_n_cheby = 2*phmc_dop_n_cheby;
+  degree = 2*phmc_dop_n_cheby;
 
   for(i = 0; i < 100 ; i++) {
-    if (phmc_ptilde_n_cheby > phmc_max_ptilde_degree) {
+    if (degree > phmc_max_ptilde_degree) {
       fprintf(stderr, "Error: n_cheby=%d > phmc_max_ptilde_degree=%d in ptilde\n",
-              phmc_ptilde_n_cheby, phmc_max_ptilde_degree);
+              degree, phmc_max_ptilde_degree);
       fprintf(stderr, "Increase n_chebymax\n");
 #ifdef MPI
       MPI_Finalize();
@@ -328,22 +328,22 @@ void degree_of_Ptilde() {
     }
 
     sum=0;
-    for(j=phmc_ptilde_n_cheby; j<phmc_max_ptilde_degree; j++){ 
-      sum += fabs(phmc_ptilde_cheby_coef[j]);
+    for(j=degree; j<phmc_max_ptilde_degree; j++){ 
+      sum += fabs(coefs[0][j]);
     }
 
     if((g_proc_id == g_stdio_proc) && (g_debug_level > 0)) {
-      printf("# NDPOLY Acceptance Polynomial: Sum remaining | d_n | = %e for degree=%d\n", sum, phmc_ptilde_n_cheby);
-      printf("# NDPOLY Acceptance Polynomial: coef[degree] = %e\n", phmc_ptilde_cheby_coef[phmc_ptilde_n_cheby]);
+      printf("# NDPOLY Acceptance Polynomial: Sum remaining | d_n | = %e for degree=%d\n", sum, degree);
+      printf("# NDPOLY Acceptance Polynomial: coef[degree] = %e\n", (*coefs)[degree]);
     }
     if(sum < g_acc_Ptilde) { 
-/*     if(fabs(phmc_ptilde_cheby_coef[phmc_ptilde_n_cheby]) < g_acc_Ptilde) { */
+/*     if(fabs(*coefs[degree]) < g_acc_Ptilde) { */
       if((g_proc_id == g_stdio_proc) && (g_debug_level > 1)) {
-        printf(" sum %e, coef %e\n", sum, phmc_ptilde_cheby_coef[phmc_ptilde_n_cheby]);
+        printf(" sum %e, coef %e\n", sum, (*coefs)[degree]);
       }
       break;
     }
-    phmc_ptilde_n_cheby= (int)(phmc_ptilde_n_cheby*1.2);
+    degree= (int)(degree*1.2);
   }
 
   if(g_debug_level > 0) {
@@ -352,11 +352,11 @@ void degree_of_Ptilde() {
     random_spinor_field(ss,VOLUME/2, 1);
     random_spinor_field(sc,VOLUME/2, 1);
 
-    Poly_tilde_ND(&auxs[0], &auxc[0], phmc_ptilde_cheby_coef, phmc_ptilde_n_cheby, &ss[0], &sc[0]);
+    Poly_tilde_ND(&auxs[0], &auxc[0], *coefs, degree, &ss[0], &sc[0]);
     QdaggerQ_poly(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0]);
     Q_Qdagger_ND(&auxs[0], &auxc[0], &aux2s[0], &aux2c[0]);
     QdaggerQ_poly(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0]);
-    Poly_tilde_ND(&auxs[0], &auxc[0], phmc_ptilde_cheby_coef, phmc_ptilde_n_cheby, &aux2s[0], &aux2c[0]);
+    Poly_tilde_ND(&auxs[0], &auxc[0], *coefs, degree, &aux2s[0], &aux2c[0]);
 
     diff(&aux2s[0],&auxs[0], &ss[0], VOLUME/2);
     temp = square_norm(&aux2s[0], VOLUME/2, 1) / square_norm(&ss[0], VOLUME/2, 1) / 4.0;
@@ -372,20 +372,21 @@ void degree_of_Ptilde() {
       printf("# NDPOLY Acceptance Polynomial: relative squared accuracy in components:\n UP=%e  DN=%e \n", temp, temp2);
     }
 
-    temp = chebtilde_eval(phmc_ptilde_n_cheby, phmc_ptilde_cheby_coef, phmc_cheb_evmin);
+    temp = chebtilde_eval(degree, *coefs, phmc_cheb_evmin);
     temp *= cheb_eval(phmc_dop_n_cheby, phmc_dop_cheby_coef, phmc_cheb_evmin);
     temp *= phmc_cheb_evmin;
     temp *= cheb_eval(phmc_dop_n_cheby, phmc_dop_cheby_coef, phmc_cheb_evmin);
-    temp *= chebtilde_eval(phmc_ptilde_n_cheby, phmc_ptilde_cheby_coef, phmc_cheb_evmin);
+    temp *= chebtilde_eval(degree, *coefs, phmc_cheb_evmin);
     temp = 0.5*fabs(temp - 1);
     if(g_proc_id == g_stdio_proc) {
       printf("# NDPOLY Acceptance Polynomial: Delta_IR at s=%f: | Ptilde P s_low P Ptilde - 1 |/2 = %e \n", phmc_cheb_evmin, temp);
     }
   }
   if(g_proc_id == g_stdio_proc) {
-    printf("# NDPOLY Acceptance Polynomial degree set to %d\n\n", phmc_ptilde_n_cheby);
+    printf("# NDPOLY Acceptance Polynomial degree set to %d\n\n", degree);
   }
 
+  *_degree = degree;
 #if ( defined SSE || defined SSE2 || defined SSE3)
   free(ss_);
   free(auxs_);
diff --git a/Ptilde_nd.h b/Ptilde_nd.h
index c95b46e11..1567a5123 100644
--- a/Ptilde_nd.h
+++ b/Ptilde_nd.h
@@ -29,6 +29,6 @@ void Poly_tilde_ND(spinor *R_s, spinor *R_c, double *dd, int n, spinor *S_s, spi
 
 double chebtilde_eval(int M, double *dd, double s);
 
-void degree_of_Ptilde();
+void degree_of_Ptilde(int * _degree, double ** coefs);
 
 #endif
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index db2f29896..f72345ab3 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -265,10 +265,11 @@ double cheb_eval(int M, double *c, double s){
  *****************************************************************************/
 
 
-void degree_of_polynomial_nd(const int degree_of_p){
+void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs) { 
   int j;
   double temp, temp2;
   static int ini=0;
+  int degree_of_p = *_degree_of_p + 1;
 
   double sum=0.0;
 
@@ -276,13 +277,11 @@ void degree_of_polynomial_nd(const int degree_of_p){
   spinor *auxs=NULL, *auxs_=NULL, *auxc=NULL, *auxc_=NULL;
   spinor *aux2s=NULL, *aux2s_=NULL, *aux2c=NULL, *aux2c_=NULL;
 
-  phmc_dop_n_cheby=degree_of_p+1;
   if(ini==0){
-    phmc_dop_cheby_coef = calloc(phmc_dop_n_cheby,sizeof(double));
+    *coefs = calloc(degree_of_p, sizeof(double));
     ini=1;
   }
 
-
 #if ( defined SSE || defined SSE2 || defined SSE3)
   ss_   = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
   auxs_ = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
@@ -308,21 +307,21 @@ void degree_of_polynomial_nd(const int degree_of_p){
 #endif
   
   
-  chebyshev_coefs(phmc_cheb_evmin, phmc_cheb_evmax, phmc_dop_cheby_coef, phmc_dop_n_cheby, -0.5);
+  chebyshev_coefs(phmc_cheb_evmin, phmc_cheb_evmax, *coefs, degree_of_p, -0.5);
 
   random_spinor_field(ss,VOLUME/2, 1);
   random_spinor_field(sc,VOLUME/2, 1);
 
   if((g_proc_id == g_stdio_proc) && (g_debug_level > 0)){
     printf("NDPOLY MD Polynomial: EVmin = %e  EVmax = %e  \n", phmc_cheb_evmin, phmc_cheb_evmax);
-    printf("NDPOLY MD Polynomial: the degree was set to: %d\n", phmc_dop_n_cheby);
+    printf("NDPOLY MD Polynomial: the degree was set to: %d\n", degree_of_p);
     fflush(stdout);
   }
 
   /* Here we check the accuracy */
-  QdaggerQ_poly(&auxs[0], &auxc[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &ss[0], &sc[0]);
+  QdaggerQ_poly(&auxs[0], &auxc[0], *coefs, degree_of_p, &ss[0], &sc[0]);
   Q_Qdagger_ND(&aux2s[0], &aux2c[0], &auxs[0], &auxc[0]);
-  QdaggerQ_poly(&auxs[0], &auxc[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &aux2s[0], &aux2c[0]);
+  QdaggerQ_poly(&auxs[0], &auxc[0], *coefs, degree_of_p, &aux2s[0], &aux2c[0]);
 
   diff(&aux2s[0],&auxs[0],&ss[0],VOLUME/2);
   temp=square_norm(&aux2s[0],VOLUME/2, 1)/square_norm(&ss[0],VOLUME/2, 1)/4.0;
@@ -343,30 +342,31 @@ void degree_of_polynomial_nd(const int degree_of_p){
   }
 
   if(g_debug_level > 1) {
-    temp = cheb_eval(phmc_dop_n_cheby, phmc_dop_cheby_coef, phmc_cheb_evmin);
+    temp = cheb_eval(degree_of_p, *coefs, phmc_cheb_evmin);
     temp *= phmc_cheb_evmin;
-    temp *= cheb_eval(phmc_dop_n_cheby, phmc_dop_cheby_coef, phmc_cheb_evmin);
+    temp *= cheb_eval(degree_of_p, *coefs, phmc_cheb_evmin);
     temp = 0.5*fabs(temp - 1);
     if(g_proc_id == g_stdio_proc) {
       printf("PHMC: Delta_IR at s=%f:    | P s_low P - 1 |/2 = %e \n", phmc_cheb_evmin, temp);
     }
   }
   /* RECALL THAT WE NEED AN EVEN DEGREE !!!! */
+  *_degree_of_p = degree_of_p;
 
 #if ( defined SSE || defined SSE2 || defined SSE3)
-   free(ss_);   
-   free(auxs_); 
-   free(aux2s_);
-   free(sc_);   
-   free(auxc_); 
-   free(aux2c_);
+  free(ss_);   
+  free(auxs_); 
+  free(aux2s_);
+  free(sc_);   
+  free(auxc_); 
+  free(aux2c_);
 #else
-   free(ss);   
-   free(auxs); 
-   free(aux2s);
-   free(sc);   
-   free(auxc); 
-   free(aux2c);
+  free(ss);   
+  free(auxs); 
+  free(aux2s);
+  free(sc);   
+  free(auxc); 
+  free(aux2c);
 #endif
-
+  return;
 }
diff --git a/chebyshev_polynomial_nd.h b/chebyshev_polynomial_nd.h
index 28b4c37a3..f22c18b12 100644
--- a/chebyshev_polynomial_nd.h
+++ b/chebyshev_polynomial_nd.h
@@ -28,6 +28,6 @@ void QdaggerQ_poly(spinor *R_s, spinor *R_c, double *c, int n, spinor *S_s, spin
 
 double cheb_eval(int M, double *c, double s);
 
-void degree_of_polynomial_nd(const int degree_of_p);
+void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs);
 
 #endif
diff --git a/monomial.c b/monomial.c
index 723e6ffde..dfe7e2d7f 100644
--- a/monomial.c
+++ b/monomial.c
@@ -229,7 +229,7 @@ int init_monomials(const int V, const int even_odd_flag) {
 	no_ndpoly_monomials++;
 	monomial_list[i].pf2 = __pf+no*V;
 	no++;
-	retval = init_nd_poly_monomial(i);
+	retval = init_ndpoly_monomial(i);
       }
       else if(monomial_list[i].type == NDDETRATIO) {
 	monomial_list[i].hbfunction = &dummy_heatbath;
diff --git a/monomial.h b/monomial.h
index 62d3f925a..ab1a12af7 100644
--- a/monomial.h
+++ b/monomial.h
@@ -82,20 +82,19 @@ typedef struct {
   /* second one needed for ND monomials */
   spinor * pf, * pf2;
   /* parameters for the POLY Monomial*/
-  int MDPolyDegree;
-  double MDPolyLmin;
-  double MDPolyLmax;
+  int MDPolyDegree, MaxPtildeDegree, PtildeDegree;
+  double MDPolyLmin, MDPolyLmax;
   char MDPolyRootsFile[256];
   _Complex double *MDPolyRoots;
   spinor **MDPoly_chi_spinor_fields;
   double MDPolyLocNormConst;
   int MDPolyDetRatio;
-  int MaxPtildeDegree;
   int no_wfields;
   double PrecisionPtilde;
   double PrecisionHfinal;
   double StildeMin, StildeMax;
   double EVMin, EVMax, EVMaxInv;
+  double * MDPolyCoefs, * PtildeCoefs;
   /* chronological solver fields */
   spinor ** csg_field;
   spinor ** csg_field2;
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index a82d23816..ba9f2a203 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -455,7 +455,7 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
 }
 
 
-int init_nd_poly_monomial(const int id) {
+int init_ndpoly_monomial(const int id) {
   monomial * mnl = &monomial_list[id];
   int j, k, errcode;
   FILE * ifs;
@@ -487,14 +487,15 @@ int init_nd_poly_monomial(const int id) {
   phmc_invmaxev = mnl->EVMaxInv;
   phmc_cheb_evmax = 1.0;
 
-  /* Here we prepare the less precise polynomial first   */
-  /* the routine determines a value for phmc_dop_n_cheby */
-  degree_of_polynomial_nd(mnl->MDPolyDegree);
+  /* Here we prepare the less precise MD polynomial first   */
+  degree_of_polynomial_nd(&mnl->MDPolyDegree, &mnl->MDPolyCoefs);
+  phmc_dop_n_cheby = mnl->MDPolyDegree;
+  phmc_dop_cheby_coef = mnl->MDPolyCoefs;
   if((g_proc_id == 0) && (g_debug_level > 1)) {
     printf("# monomial %s approximation interval [stilde_min, stilde_max] = [%e, %e]\n", 
 	   mnl->name, mnl->StildeMin, mnl->StildeMax);
     printf("# monomial %s degree for P = %d, epsilont = %e, normalisation = %e", 
-	   mnl->name, phmc_dop_n_cheby-1, mnl->EVMin, mnl->EVMaxInv);
+	   mnl->name, mnl->MDPolyDegree-1, mnl->EVMin, mnl->EVMaxInv);
   }
 
   /* Chi`s-spinors  memory allocation */
@@ -505,8 +506,10 @@ int init_nd_poly_monomial(const int id) {
   }
 
   /* End memory allocation */
-  /* Here we prepare the precise polynomial */
-  degree_of_Ptilde();
+  /* Here we prepare the precise polynomial Ptilde */
+  degree_of_Ptilde(&mnl->PtildeDegree, &mnl->PtildeCoefs);
+  phmc_ptilde_cheby_coef = mnl->PtildeCoefs;
+  phmc_ptilde_n_cheby = mnl->PtildeDegree;
 
   /* THIS IS THE OVERALL CONSTANT */
   /* write phmc_Cpol as the result of the simple-program files (BigC^(1/2))^1/2 
@@ -524,7 +527,7 @@ int init_nd_poly_monomial(const int id) {
     exit(6);
   } 
 
-  mnl->MDPolyRoots = calloc((2*phmc_dop_n_cheby-2),sizeof(_Complex double));
+  mnl->MDPolyRoots = calloc((2*mnl->MDPolyDegree-2),sizeof(_Complex double));
 
   if((ifs = fopen(mnl->MDPolyRootsFile, "r")) != (FILE*)NULL) {
     if (fgets(title, 100, ifs) == NULL) {
@@ -565,6 +568,12 @@ void ndpoly_set_global_parameter(monomial * const mnl) {
   phmc_cheb_evmin = mnl->EVMin;
   phmc_invmaxev = mnl->EVMaxInv;
   phmc_cheb_evmax = 1.0;
+
+  phmc_dop_n_cheby = mnl->MDPolyDegree;
+  phmc_dop_cheby_coef = mnl->MDPolyCoefs;
+
+  phmc_ptilde_cheby_coef = mnl->PtildeCoefs;
+  phmc_ptilde_n_cheby = mnl->PtildeDegree;
  
   return;
 }
diff --git a/ndpoly_monomial.h b/ndpoly_monomial.h
index 53ae0b634..76c457620 100644
--- a/ndpoly_monomial.h
+++ b/ndpoly_monomial.h
@@ -25,6 +25,6 @@
 void ndpoly_derivative(const int id, hamiltonian_field_t * const hf);
 double ndpoly_acc(const int id, hamiltonian_field_t * const hf);
 void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf);
-int init_nd_poly_monomial(const int id);
+int init_ndpoly_monomial(const int id);
 
 #endif
diff --git a/phmc.c b/phmc.c
index a44f87d62..59a565b94 100644
--- a/phmc.c
+++ b/phmc.c
@@ -37,14 +37,15 @@
 #include "phmc.h"
 #include "monomial.h"
 
-double phmc_Cpol;
-double phmc_cheb_evmin, phmc_cheb_evmax;
-double phmc_invmaxev;
-_Complex double * phmc_root;
-int phmc_dop_n_cheby;
-double * phmc_dop_cheby_coef;
-int phmc_ptilde_n_cheby;
-double * phmc_ptilde_cheby_coef;
+                                         // --> in  monomial
+double phmc_Cpol;                        // --> MDPolyLocNormConst
+double phmc_cheb_evmin, phmc_cheb_evmax; // --> EVMin, EVMax
+double phmc_invmaxev;                    // --> EVMaxInv
+_Complex double * phmc_root;             // --> MDPolyRoots
+int phmc_dop_n_cheby;                    // --> MDPolyDegree
+double * phmc_dop_cheby_coef;            // --> MDPolyCoefs
+int phmc_ptilde_n_cheby;                 // --> PtildeDegree
+double * phmc_ptilde_cheby_coef;         // --> PtildeCoefs
 int errcode;
 phmc_vars *phmc_var_stack=NULL;
 int phmc_max_ptilde_degree = NTILDE_CHEBYMAX;
@@ -113,7 +114,7 @@ void init_phmc() {
   phmc_cheb_evmax = 1.0;
 
   /* Here we prepare the less precise polynomial first */
-  degree_of_polynomial_nd(degree_of_p);
+  //degree_of_polynomial_nd(&degree_of_p);
 
   if((g_proc_id == 0) && (g_debug_level > 1)) {
     printf("PHMC: interval of approximation [stilde_min, stilde_max] = [%e, %e]\n", stilde_min, stilde_max);
@@ -130,7 +131,7 @@ void init_phmc() {
 
   /* End memory allocation */
   /* Here we prepare the precise polynomial */
-  degree_of_Ptilde();
+  //degree_of_Ptilde();
 
   /* THIS IS THE OVERALL CONSTANT */
   /* write phmc_Cpol as the result of the simple-program files (BigC^(1/2))^1/2 
diff --git a/phmc.h b/phmc.h
index e6b6899cb..10aa53528 100644
--- a/phmc.h
+++ b/phmc.h
@@ -54,8 +54,6 @@ extern phmc_vars *phmc_var_stack;
 void pushPhmcVars();
 void popPhmcVars();
 
-
-void init_phmc();
 void phmc_compute_ev(const int trajectory_counter,
 		     const double plaquette_energy);
 

From 7e6dbe6d5cfe5ff01d3b235c6464b9cc574cd8c3 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 20 Mar 2012 15:58:02 +0100
Subject: [PATCH 004/110] replaced phmc_* variables with monomial ones where
 possible

---
 ndpoly_monomial.c | 119 ++++++++++++++++++++++------------------------
 1 file changed, 57 insertions(+), 62 deletions(-)

diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index ba9f2a203..8de1dfc54 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -50,7 +50,7 @@
 #include "ndpoly_monomial.h"
 
 extern int phmc_exact_poly;
-void ndpoly_set_global_parameter(monomial * const mnl);
+void ndpoly_set_global_parameter(monomial * const mnl, const int exact);
 
 /********************************************
  *
@@ -64,14 +64,8 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 
 
   /* This factor 2 a missing factor 2 in trace_lambda */
-  ndpoly_set_global_parameter(mnl);
-  if (g_epsbar!=0.0 || phmc_exact_poly==0){
-    phmc_Cpol = sqrt(mnl->MDPolyLocNormConst);
-  }
-  else {
-    phmc_Cpol = mnl->MDPolyLocNormConst;
-  }
-  mnl->forcefactor = -2.*phmc_Cpol*phmc_invmaxev;
+  ndpoly_set_global_parameter(mnl, phmc_exact_poly);
+  mnl->forcefactor = -2.*phmc_Cpol*mnl->EVMaxInv;
 
   /* Recall:  The GAMMA_5 left of  delta M_eo  is done in  deriv_Sb !!! */
 
@@ -85,37 +79,37 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     assign(g_chi_up_spinor_field[0], mnl->pf, VOLUME/2);
     assign(g_chi_dn_spinor_field[0], mnl->pf2, VOLUME/2);
 
-    for(k = 1; k < (phmc_dop_n_cheby-1); k++) {
+    for(k = 1; k < (mnl->MDPolyDegree-1); k++) {
       Q_tau1_min_cconst_ND(g_chi_up_spinor_field[k], g_chi_dn_spinor_field[k], 
 			   g_chi_up_spinor_field[k-1], g_chi_dn_spinor_field[k-1], 
 			   mnl->MDPolyRoots[k-1]);
     }
     
     /* Here comes the remaining fields  chi_k ; k=n,...,2n-1  */
-    /*They are evaluated step-by-step overwriting the same field (phmc_dop_n_cheby)*/
+    /*They are evaluated step-by-step overwriting the same field (mnl->MDPolyDegree)*/
     
-    assign(g_chi_up_spinor_field[phmc_dop_n_cheby], g_chi_up_spinor_field[phmc_dop_n_cheby-2], VOLUME/2);
-    assign(g_chi_dn_spinor_field[phmc_dop_n_cheby], g_chi_dn_spinor_field[phmc_dop_n_cheby-2], VOLUME/2);
+    assign(g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_up_spinor_field[mnl->MDPolyDegree-2], VOLUME/2);
+    assign(g_chi_dn_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree-2], VOLUME/2);
     
-    for(j=(phmc_dop_n_cheby-1); j>=1; j--) {
-      assign(g_chi_up_spinor_field[phmc_dop_n_cheby-1], g_chi_up_spinor_field[phmc_dop_n_cheby], VOLUME/2);
-      assign(g_chi_dn_spinor_field[phmc_dop_n_cheby-1], g_chi_dn_spinor_field[phmc_dop_n_cheby], VOLUME/2);
+    for(j=(mnl->MDPolyDegree-1); j>=1; j--) {
+      assign(g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_up_spinor_field[mnl->MDPolyDegree], VOLUME/2);
+      assign(g_chi_dn_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree], VOLUME/2);
       
-      Q_tau1_min_cconst_ND(g_chi_up_spinor_field[phmc_dop_n_cheby], g_chi_dn_spinor_field[phmc_dop_n_cheby], 
-			   g_chi_up_spinor_field[phmc_dop_n_cheby-1], g_chi_dn_spinor_field[phmc_dop_n_cheby-1], 
-			   mnl->MDPolyRoots[2*phmc_dop_n_cheby-3-j]);
+      Q_tau1_min_cconst_ND(g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], 
+			   g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree-1], 
+			   mnl->MDPolyRoots[2*mnl->MDPolyDegree-3-j]);
       
       /* Get the even parts of the  (j-1)th  chi_spinors */
       H_eo_ND(mnl->w_fields[0], mnl->w_fields[1], 
 	      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
       
       /* \delta M_eo sandwitched by  chi[j-1]_e^\dagger  and  chi[2N-j]_o */
-      deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[phmc_dop_n_cheby], hf);      /* UP */
-      deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[phmc_dop_n_cheby], hf);    /* DN */
+      deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[mnl->MDPolyDegree], hf);      /* UP */
+      deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[mnl->MDPolyDegree], hf);    /* DN */
       
       /* Get the even parts of the  (2N-j)-th  chi_spinors */
       H_eo_ND(mnl->w_fields[0], mnl->w_fields[1], 
-	      g_chi_up_spinor_field[phmc_dop_n_cheby], g_chi_dn_spinor_field[phmc_dop_n_cheby], EO);
+	      g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], EO);
       
       /* \delta M_oe sandwitched by  chi[j-1]_o^\dagger  and  chi[2N-j]_e */
       deriv_Sb(OE, g_chi_up_spinor_field[j-1], mnl->w_fields[0], hf);
@@ -126,31 +120,31 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     /* Here comes the definitions for the chi_j fields */
     /* from  j=0  (chi_0 = phi)  .....  to j = n-1 */
     assign(g_chi_up_spinor_field[0], mnl->pf, VOLUME/2);
-    for(k = 1; k < (phmc_dop_n_cheby-1); k++) {
+    for(k = 1; k < (mnl->MDPolyDegree-1); k++) {
       Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[k],
 			    g_chi_up_spinor_field[k-1], 
 			    mnl->MDPolyRoots[k-1]);
     }
-    assign(g_chi_up_spinor_field[phmc_dop_n_cheby],
-	   g_chi_up_spinor_field[phmc_dop_n_cheby-2], VOLUME/2);
+    assign(g_chi_up_spinor_field[mnl->MDPolyDegree],
+	   g_chi_up_spinor_field[mnl->MDPolyDegree-2], VOLUME/2);
 
-    for(j = (phmc_dop_n_cheby-1); j >= 1; j--) {
-      assign(g_chi_up_spinor_field[phmc_dop_n_cheby-1],
-	     g_chi_up_spinor_field[phmc_dop_n_cheby], VOLUME/2);
+    for(j = (mnl->MDPolyDegree-1); j >= 1; j--) {
+      assign(g_chi_up_spinor_field[mnl->MDPolyDegree-1],
+	     g_chi_up_spinor_field[mnl->MDPolyDegree], VOLUME/2);
 
-      Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[phmc_dop_n_cheby], 
-			   g_chi_up_spinor_field[phmc_dop_n_cheby-1],
-			   mnl->MDPolyRoots[2*phmc_dop_n_cheby-3-j]);
+      Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[mnl->MDPolyDegree], 
+			   g_chi_up_spinor_field[mnl->MDPolyDegree-1],
+			   mnl->MDPolyRoots[2*mnl->MDPolyDegree-3-j]);
 
       Qtm_minus_psi(mnl->w_fields[3],g_chi_up_spinor_field[j-1]); 
 
-      H_eo_tm_inv_psi(mnl->w_fields[2], g_chi_up_spinor_field[phmc_dop_n_cheby], EO, -1.);
+      H_eo_tm_inv_psi(mnl->w_fields[2], g_chi_up_spinor_field[mnl->MDPolyDegree], EO, -1.);
       deriv_Sb(OE, mnl->w_fields[3], mnl->w_fields[2], hf); 
       
       H_eo_tm_inv_psi(mnl->w_fields[2], mnl->w_fields[3], EO, 1.); 
-      deriv_Sb(EO, mnl->w_fields[2], g_chi_up_spinor_field[phmc_dop_n_cheby], hf);
+      deriv_Sb(EO, mnl->w_fields[2], g_chi_up_spinor_field[mnl->MDPolyDegree], hf);
 
-      Qtm_minus_psi(mnl->w_fields[3],g_chi_up_spinor_field[phmc_dop_n_cheby]); 
+      Qtm_minus_psi(mnl->w_fields[3],g_chi_up_spinor_field[mnl->MDPolyDegree]); 
 
       H_eo_tm_inv_psi(mnl->w_fields[2],mnl->w_fields[3], EO, +1.);
       deriv_Sb(OE, g_chi_up_spinor_field[j-1] , mnl->w_fields[2], hf); 
@@ -171,18 +165,16 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   double temp;
   monomial * mnl = &monomial_list[id];
 
-  ndpoly_set_global_parameter(mnl);
+  ndpoly_set_global_parameter(mnl, phmc_exact_poly);
   mnl->energy0 = 0.;
   random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, mnl->rngrepro);
   mnl->energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
 
   if(g_epsbar!=0.0 || phmc_exact_poly == 0) {
-    phmc_Cpol = sqrt(mnl->MDPolyLocNormConst);
     random_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2, mnl->rngrepro);
-     mnl->energy0 += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
+    mnl->energy0 += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
   } 
   else {
-    phmc_Cpol = mnl->MDPolyLocNormConst;
     zero_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2);
   }
 
@@ -197,16 +189,16 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
     QNon_degenerate(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
 		    g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0]);
  
-    for(j = 1; j < (phmc_dop_n_cheby); j++){
+    for(j = 1; j < (mnl->MDPolyDegree); j++){
       assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
       assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);
 
       Q_tau1_min_cconst_ND(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
 			g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], 
-			mnl->MDPolyRoots[phmc_dop_n_cheby-2+j]);
+			mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
     }
-    Poly_tilde_ND(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], phmc_ptilde_cheby_coef, 
-		  phmc_ptilde_n_cheby, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1]);
+    Poly_tilde_ND(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], mnl->PtildeCoefs, 
+		  mnl->PtildeDegree, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1]);
   } 
   else if( phmc_exact_poly==1 && g_epsbar!=0.0) {
     /* Attention this is Q * tau1, up/dn are exchanged in the input spinor  */
@@ -223,12 +215,12 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
 	      1000,1.e-16,0,VOLUME/2, Qtau1_P_ND);
 
     /*  phi= Bdagger phi  */
-    for(j = 1; j < (phmc_dop_n_cheby); j++){
+    for(j = 1; j < (mnl->MDPolyDegree); j++){
       assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
       assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);
       Q_tau1_min_cconst_ND(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1],
 			g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0],
-			mnl->MDPolyRoots[phmc_dop_n_cheby-2+j]);
+			mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
     }
 
     assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
@@ -244,11 +236,11 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
              1000,1.e-16,0,VOLUME/2, Qtm_pm_Ptm_pm_psi);
 
     /*  phi= Bdagger phi  */
-    for(j = 1; j < (phmc_dop_n_cheby); j++){
+    for(j = 1; j < (mnl->MDPolyDegree); j++){
       assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
       Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[1],
 			    g_chi_up_spinor_field[0],
-			    mnl->MDPolyRoots[phmc_dop_n_cheby-2+j]);
+			    mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
     }
     assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
   }
@@ -283,7 +275,7 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   monomial * mnl = &monomial_list[id];
   spinor *up0, *dn0, *up1, *dn1, *dummy;
 
-  ndpoly_set_global_parameter(mnl);
+  ndpoly_set_global_parameter(mnl, phmc_exact_poly);
   mnl->energy1 = 0.;
   Ener[0] = 0;
   factor[0] = 1.0;
@@ -301,8 +293,7 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   assign(dn0, mnl->pf2, VOLUME/2);
 
   if(phmc_exact_poly==0) {
-    phmc_Cpol = sqrt(mnl->MDPolyLocNormConst);
-    for(j = 1; j <= (phmc_dop_n_cheby-1); j++) {
+    for(j = 1; j <= (mnl->MDPolyDegree-1); j++) {
       /* Change this name !!*/
       Q_tau1_min_cconst_ND(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
 
@@ -335,10 +326,10 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
       
       if(j % 2){ /*  Chi[j] = ( Qdag P  Ptilde ) Chi[j-1]  */ 
 	Poly_tilde_ND(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-		      phmc_ptilde_cheby_coef, phmc_ptilde_n_cheby, 
+		      mnl->PtildeCoefs, mnl->PtildeDegree, 
 		      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
 	QdaggerQ_poly(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
-		      phmc_dop_cheby_coef, phmc_dop_n_cheby, 
+		      mnl->MDPolyCoefs, mnl->MDPolyDegree, 
 		      g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j]);
 	QdaggerNon_degenerate(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 			      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
@@ -347,10 +338,10 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
 	QNon_degenerate(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 			g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
 	QdaggerQ_poly(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
-		      phmc_dop_cheby_coef, phmc_dop_n_cheby, g_chi_up_spinor_field[j], 
+		      mnl->MDPolyCoefs, mnl->MDPolyDegree, g_chi_up_spinor_field[j], 
 		      g_chi_dn_spinor_field[j]);
 	Poly_tilde_ND(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-		      phmc_ptilde_cheby_coef, phmc_ptilde_n_cheby, 
+		      mnl->PtildeCoefs, mnl->PtildeDegree, 
 		      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
       }
 
@@ -390,9 +381,8 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     }
   } 
   else if(phmc_exact_poly==1 && g_epsbar!=0.0) {
-    phmc_Cpol = sqrt(mnl->MDPolyLocNormConst);
     /* B(Q*tau1) */
-    for(j = 1; j <= (phmc_dop_n_cheby-1); j++){
+    for(j = 1; j <= (mnl->MDPolyDegree-1); j++){
       Q_tau1_min_cconst_ND(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
 
       dummy = up1; up1 = up0; up0 = dummy;
@@ -423,8 +413,7 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     }
   } 
   else if(phmc_exact_poly == 1 && g_epsbar == 0.0) {
-    phmc_Cpol = mnl->MDPolyLocNormConst;
-    for(j = 1; j < (phmc_dop_n_cheby); j++) {
+    for(j = 1; j < (mnl->MDPolyDegree); j++) {
       assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
       Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[1],
 			    g_chi_up_spinor_field[0],
@@ -473,7 +462,6 @@ int init_ndpoly_monomial(const int id) {
     phmc_Cpol = mnl->MDPolyLocNormConst;
   }
 
-
   /* This is the epsilon parameter */
   mnl->EVMin = mnl->StildeMin / mnl->StildeMax;
   
@@ -499,7 +487,7 @@ int init_ndpoly_monomial(const int id) {
   }
 
   /* Chi`s-spinors  memory allocation */
-  j = init_chi_spinor_field(VOLUMEPLUSRAND/2, (phmc_dop_n_cheby+1));
+  j = init_chi_spinor_field(VOLUMEPLUSRAND/2, (mnl->MDPolyDegree+1));
   if ( j!= 0) {
     fprintf(stderr, "Not enough memory for PHMC Chi fields! Aborting...\n");
     exit(0);
@@ -520,7 +508,7 @@ int init_ndpoly_monomial(const int id) {
      polinomial in  sqrt(s) .
   */
   if(mnl->MDPolyLocNormConst < 0.0){
-    fprintf(stderr, "Error, please specify MDPolyLocNormConst in the input file! Aborting...\n");
+    fprintf(stderr, "Error, please specify LocNormConst in the input file! Aborting...\n");
 #ifdef MPI
     MPI_Finalize();
 #endif
@@ -540,7 +528,7 @@ int init_ndpoly_monomial(const int id) {
     
     /* Here we read in the 2n roots needed for the polinomial in sqrt(s) */
     double *phmc_darray = (double*)mnl->MDPolyRoots;
-    for(j = 0; j< 2 * phmc_dop_n_cheby - 2; ++j) {
+    for(j = 0; j< 2 * mnl->MDPolyDegree - 2; ++j) {
       errcode = fscanf(ifs, " %d %lf %lf \n", &k, &phmc_darray[2 * j], &phmc_darray[2 * j + 1]);
     }
     fclose(ifs);
@@ -556,13 +544,20 @@ int init_ndpoly_monomial(const int id) {
   return(0);
 }
 
-void ndpoly_set_global_parameter(monomial * const mnl) {
+void ndpoly_set_global_parameter(monomial * const mnl, const int exact) {
 
   g_mubar = mnl->mubar;
   g_epsbar = mnl->epsbar;
   g_kappa = mnl->kappa;
   boundary(g_kappa);
 
+  if (g_epsbar!=0.0 || exact == 0){
+    phmc_Cpol = sqrt(mnl->MDPolyLocNormConst);
+  }
+  else {
+    phmc_Cpol = mnl->MDPolyLocNormConst;
+  }
+
   phmc_root = mnl->MDPolyRoots;
   phmc_invmaxev = mnl->EVMaxInv;
   phmc_cheb_evmin = mnl->EVMin;

From fd810d71d2160748b2f5b6085a4f59d858a79e61 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 20 Mar 2012 16:54:27 +0100
Subject: [PATCH 005/110] now several ndpoly_monomials are allowd

---
 Ptilde_nd.c               | 6 +-----
 chebyshev_polynomial_nd.c | 6 +-----
 monomial.c                | 6 ------
 3 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/Ptilde_nd.c b/Ptilde_nd.c
index facc82df0..a98b717a6 100644
--- a/Ptilde_nd.c
+++ b/Ptilde_nd.c
@@ -269,7 +269,6 @@ double chebtilde_eval(int M, double *dd, double s){
 void degree_of_Ptilde(int * _degree, double ** coefs) {
   int i, j;
   double temp, temp2;
-  static int ini=0;
   int degree;
   double sum=0.0;
 
@@ -277,10 +276,7 @@ void degree_of_Ptilde(int * _degree, double ** coefs) {
   spinor *auxs=NULL, *auxs_=NULL, *auxc=NULL, *auxc_=NULL;
   spinor *aux2s=NULL, *aux2s_=NULL, *aux2c=NULL, *aux2c_=NULL;
 
-  if(ini==0){
-    *coefs = calloc(phmc_max_ptilde_degree, sizeof(double)); 
-    ini=1;
-  }
+  *coefs = calloc(phmc_max_ptilde_degree, sizeof(double)); 
 
 #if ( defined SSE || defined SSE2 || defined SSE3)
   ss_   = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index f72345ab3..1392dd653 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -268,7 +268,6 @@ double cheb_eval(int M, double *c, double s){
 void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs) { 
   int j;
   double temp, temp2;
-  static int ini=0;
   int degree_of_p = *_degree_of_p + 1;
 
   double sum=0.0;
@@ -277,10 +276,7 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs) {
   spinor *auxs=NULL, *auxs_=NULL, *auxc=NULL, *auxc_=NULL;
   spinor *aux2s=NULL, *aux2s_=NULL, *aux2c=NULL, *aux2c_=NULL;
 
-  if(ini==0){
-    *coefs = calloc(degree_of_p, sizeof(double));
-    ini=1;
-  }
+  *coefs = calloc(degree_of_p, sizeof(double));
 
 #if ( defined SSE || defined SSE2 || defined SSE3)
   ss_   = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
diff --git a/monomial.c b/monomial.c
index dfe7e2d7f..53effa8a9 100644
--- a/monomial.c
+++ b/monomial.c
@@ -46,7 +46,6 @@
 monomial monomial_list[max_no_monomials];
 int no_monomials = 0;
 int no_gauge_monomials = 0;
-int no_ndpoly_monomials = 0;
 int clover_trlog_monomial = 0;
 static spinor * _pf;
 spinor ** w_fields;
@@ -219,14 +218,9 @@ int init_monomials(const int V, const int even_odd_flag) {
 	if(retval!=0) return retval;
       }
       else if(monomial_list[i].type == NDPOLY) {
-	if(no_ndpoly_monomials > 0) {
-	  fprintf(stderr, "maximal number of ndpoly monomials (1) exceeded!\n");
-	  exit(-1);
-	}
 	monomial_list[i].hbfunction = &ndpoly_heatbath;
 	monomial_list[i].accfunction = &ndpoly_acc;
 	monomial_list[i].derivativefunction = &ndpoly_derivative;
-	no_ndpoly_monomials++;
 	monomial_list[i].pf2 = __pf+no*V;
 	no++;
 	retval = init_ndpoly_monomial(i);

From f34da28cf38e90b3fd4c8c8381276a8e60e68d53 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 20 Mar 2012 17:18:46 +0100
Subject: [PATCH 006/110] removed dependencies in degree_of_polynomial_nd and
 degree_of_Ptilde on phmc_* variables.

removed the global variables g_acc_Hfinal and g_acc_Ptilde which are
now monomial dependent.
---
 Ptilde_nd.c               | 28 +++++++++++++++-------------
 Ptilde_nd.h               |  4 +++-
 chebyshev_polynomial_nd.c | 15 ++++++++-------
 chebyshev_polynomial_nd.h |  3 ++-
 global.h                  |  1 -
 monomial.c                |  2 ++
 ndpoly_monomial.c         | 12 +++++++-----
 ndpoly_monomial.h         |  1 +
 phmc.c                    |  2 +-
 read_input.l              |  4 ----
 10 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/Ptilde_nd.c b/Ptilde_nd.c
index a98b717a6..3e3d61faa 100644
--- a/Ptilde_nd.c
+++ b/Ptilde_nd.c
@@ -266,7 +266,9 @@ double chebtilde_eval(int M, double *dd, double s){
 
 
 
-void degree_of_Ptilde(int * _degree, double ** coefs) {
+void degree_of_Ptilde(int * _degree, double ** coefs,
+		      const double EVMin, const double EVMax,
+		      const int sloppy_degree, const double acc) {
   int i, j;
   double temp, temp2;
   int degree;
@@ -302,15 +304,15 @@ void degree_of_Ptilde(int * _degree, double ** coefs) {
   aux2c=calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
 #endif
 
-  Ptilde_cheb_coefs(phmc_cheb_evmin, phmc_cheb_evmax, *coefs, phmc_max_ptilde_degree, -1.0); 
+  Ptilde_cheb_coefs(EVMin, EVMax, *coefs, phmc_max_ptilde_degree, -1.0); 
 
   if(g_proc_id == g_stdio_proc && g_debug_level > 0){
-    printf("# NDPOLY Acceptance Polynomial: EVmin = %f  EVmax = %f\n", phmc_cheb_evmin, phmc_cheb_evmax);
-    printf("# NDPOLY ACceptance Polynomial: desired accuracy is %e \n", g_acc_Ptilde);
+    printf("# NDPOLY Acceptance Polynomial: EVmin = %f  EVmax = %f\n", EVMin, EVMax);
+    printf("# NDPOLY ACceptance Polynomial: desired accuracy is %e \n", acc);
     fflush(stdout);
   }
 
-  degree = 2*phmc_dop_n_cheby;
+  degree = 2*sloppy_degree;
 
   for(i = 0; i < 100 ; i++) {
     if (degree > phmc_max_ptilde_degree) {
@@ -332,8 +334,8 @@ void degree_of_Ptilde(int * _degree, double ** coefs) {
       printf("# NDPOLY Acceptance Polynomial: Sum remaining | d_n | = %e for degree=%d\n", sum, degree);
       printf("# NDPOLY Acceptance Polynomial: coef[degree] = %e\n", (*coefs)[degree]);
     }
-    if(sum < g_acc_Ptilde) { 
-/*     if(fabs(*coefs[degree]) < g_acc_Ptilde) { */
+    if(sum < acc) { 
+/*     if(fabs(*coefs[degree]) < acc) { */
       if((g_proc_id == g_stdio_proc) && (g_debug_level > 1)) {
         printf(" sum %e, coef %e\n", sum, (*coefs)[degree]);
       }
@@ -368,14 +370,14 @@ void degree_of_Ptilde(int * _degree, double ** coefs) {
       printf("# NDPOLY Acceptance Polynomial: relative squared accuracy in components:\n UP=%e  DN=%e \n", temp, temp2);
     }
 
-    temp = chebtilde_eval(degree, *coefs, phmc_cheb_evmin);
-    temp *= cheb_eval(phmc_dop_n_cheby, phmc_dop_cheby_coef, phmc_cheb_evmin);
-    temp *= phmc_cheb_evmin;
-    temp *= cheb_eval(phmc_dop_n_cheby, phmc_dop_cheby_coef, phmc_cheb_evmin);
-    temp *= chebtilde_eval(degree, *coefs, phmc_cheb_evmin);
+    temp = chebtilde_eval(degree, *coefs, EVMin);
+    temp *= cheb_eval(phmc_dop_n_cheby, phmc_dop_cheby_coef, EVMin);
+    temp *= EVMin;
+    temp *= cheb_eval(phmc_dop_n_cheby, phmc_dop_cheby_coef, EVMin);
+    temp *= chebtilde_eval(degree, *coefs, EVMin);
     temp = 0.5*fabs(temp - 1);
     if(g_proc_id == g_stdio_proc) {
-      printf("# NDPOLY Acceptance Polynomial: Delta_IR at s=%f: | Ptilde P s_low P Ptilde - 1 |/2 = %e \n", phmc_cheb_evmin, temp);
+      printf("# NDPOLY Acceptance Polynomial: Delta_IR at s=%f: | Ptilde P s_low P Ptilde - 1 |/2 = %e \n", EVMin, temp);
     }
   }
   if(g_proc_id == g_stdio_proc) {
diff --git a/Ptilde_nd.h b/Ptilde_nd.h
index 1567a5123..a4201a515 100644
--- a/Ptilde_nd.h
+++ b/Ptilde_nd.h
@@ -29,6 +29,8 @@ void Poly_tilde_ND(spinor *R_s, spinor *R_c, double *dd, int n, spinor *S_s, spi
 
 double chebtilde_eval(int M, double *dd, double s);
 
-void degree_of_Ptilde(int * _degree, double ** coefs);
+void degree_of_Ptilde(int * _degree, double ** coefs, 
+		      const double EVMin, const double EVMax,
+		      const int sloppy_degree, const double acc);
 
 #endif
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index 1392dd653..6db3de2ff 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -265,7 +265,8 @@ double cheb_eval(int M, double *c, double s){
  *****************************************************************************/
 
 
-void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs) { 
+void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
+			     const double EVMin, const double EVMax) { 
   int j;
   double temp, temp2;
   int degree_of_p = *_degree_of_p + 1;
@@ -303,13 +304,13 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs) {
 #endif
   
   
-  chebyshev_coefs(phmc_cheb_evmin, phmc_cheb_evmax, *coefs, degree_of_p, -0.5);
+  chebyshev_coefs(EVMin, EVMax, *coefs, degree_of_p, -0.5);
 
   random_spinor_field(ss,VOLUME/2, 1);
   random_spinor_field(sc,VOLUME/2, 1);
 
   if((g_proc_id == g_stdio_proc) && (g_debug_level > 0)){
-    printf("NDPOLY MD Polynomial: EVmin = %e  EVmax = %e  \n", phmc_cheb_evmin, phmc_cheb_evmax);
+    printf("NDPOLY MD Polynomial: EVmin = %e  EVmax = %e  \n", EVMin, EVMax);
     printf("NDPOLY MD Polynomial: the degree was set to: %d\n", degree_of_p);
     fflush(stdout);
   }
@@ -338,12 +339,12 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs) {
   }
 
   if(g_debug_level > 1) {
-    temp = cheb_eval(degree_of_p, *coefs, phmc_cheb_evmin);
-    temp *= phmc_cheb_evmin;
-    temp *= cheb_eval(degree_of_p, *coefs, phmc_cheb_evmin);
+    temp = cheb_eval(degree_of_p, *coefs, EVMin);
+    temp *= EVMin;
+    temp *= cheb_eval(degree_of_p, *coefs, EVMin);
     temp = 0.5*fabs(temp - 1);
     if(g_proc_id == g_stdio_proc) {
-      printf("PHMC: Delta_IR at s=%f:    | P s_low P - 1 |/2 = %e \n", phmc_cheb_evmin, temp);
+      printf("PHMC: Delta_IR at s=%f:    | P s_low P - 1 |/2 = %e \n", EVMin, temp);
     }
   }
   /* RECALL THAT WE NEED AN EVEN DEGREE !!!! */
diff --git a/chebyshev_polynomial_nd.h b/chebyshev_polynomial_nd.h
index f22c18b12..6edd61fb0 100644
--- a/chebyshev_polynomial_nd.h
+++ b/chebyshev_polynomial_nd.h
@@ -28,6 +28,7 @@ void QdaggerQ_poly(spinor *R_s, spinor *R_c, double *c, int n, spinor *S_s, spin
 
 double cheb_eval(int M, double *c, double s);
 
-void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs);
+void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
+			     const double EVMin, const double EVMax);
 
 #endif
diff --git a/global.h b/global.h
index 2c1a40ae4..9927b3302 100644
--- a/global.h
+++ b/global.h
@@ -208,7 +208,6 @@ EXTERN int g_sf_inc_wrap_sq;
 /*************************/
 
 /* Parameters for non-degenrate case */
-EXTERN double g_acc_Ptilde;
 EXTERN double g_acc_Hfin;
 EXTERN int g_rec_ev;
 EXTERN double g_mubar, g_epsbar;
diff --git a/monomial.c b/monomial.c
index 53effa8a9..45e7b6140 100644
--- a/monomial.c
+++ b/monomial.c
@@ -115,6 +115,8 @@ int add_monomial(const int type) {
   monomial_list[no_monomials].MaxPtildeDegree = NTILDE_CHEBYMAX;
   monomial_list[no_monomials].StildeMin = _default_stilde_min;
   monomial_list[no_monomials].StildeMax = _default_stilde_max;
+  monomial_list[no_monomials].PrecisionHfinal = _default_g_acc_Hfin;
+  monomial_list[no_monomials].PrecisionPtilde = _default_g_acc_Ptilde;
 
   monomial_list[no_monomials].initialised = 1;
   if(monomial_list[no_monomials].type == NDDETRATIO) {
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index 8de1dfc54..5d1b735de 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -50,7 +50,6 @@
 #include "ndpoly_monomial.h"
 
 extern int phmc_exact_poly;
-void ndpoly_set_global_parameter(monomial * const mnl, const int exact);
 
 /********************************************
  *
@@ -368,7 +367,7 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
 	printf("PHMC: Correction aftern %d steps: %e \n", j, Diff);
       }
 
-      if(Diff < g_acc_Hfin) {
+      if(Diff < mnl->PrecisionHfinal) {
 	if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
 	  printf("PHMC: At j = %d  PHMC Only Final Energy %e \n", j, Ener[j]);
 	}
@@ -464,7 +463,7 @@ int init_ndpoly_monomial(const int id) {
 
   /* This is the epsilon parameter */
   mnl->EVMin = mnl->StildeMin / mnl->StildeMax;
-  
+  mnl->EVMax = 1.;
   /* In the following there is the  "sqrt"  since the value refers to 
      the hermitian Dirac operator (used in EV-computation), namely 
      S = Q Q^dag         
@@ -476,7 +475,8 @@ int init_ndpoly_monomial(const int id) {
   phmc_cheb_evmax = 1.0;
 
   /* Here we prepare the less precise MD polynomial first   */
-  degree_of_polynomial_nd(&mnl->MDPolyDegree, &mnl->MDPolyCoefs);
+  degree_of_polynomial_nd(&mnl->MDPolyDegree, &mnl->MDPolyCoefs,
+			  mnl->EVMin, mnl->EVMax);
   phmc_dop_n_cheby = mnl->MDPolyDegree;
   phmc_dop_cheby_coef = mnl->MDPolyCoefs;
   if((g_proc_id == 0) && (g_debug_level > 1)) {
@@ -495,7 +495,9 @@ int init_ndpoly_monomial(const int id) {
 
   /* End memory allocation */
   /* Here we prepare the precise polynomial Ptilde */
-  degree_of_Ptilde(&mnl->PtildeDegree, &mnl->PtildeCoefs);
+  degree_of_Ptilde(&mnl->PtildeDegree, &mnl->PtildeCoefs, 
+		   mnl->EVMin, mnl->EVMax, mnl->MDPolyDegree, 
+		   mnl->PrecisionPtilde);
   phmc_ptilde_cheby_coef = mnl->PtildeCoefs;
   phmc_ptilde_n_cheby = mnl->PtildeDegree;
 
diff --git a/ndpoly_monomial.h b/ndpoly_monomial.h
index 76c457620..d7787d462 100644
--- a/ndpoly_monomial.h
+++ b/ndpoly_monomial.h
@@ -26,5 +26,6 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf);
 double ndpoly_acc(const int id, hamiltonian_field_t * const hf);
 void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf);
 int init_ndpoly_monomial(const int id);
+void ndpoly_set_global_parameter(monomial * const mnl, const int exact);
 
 #endif
diff --git a/phmc.c b/phmc.c
index 59a565b94..481b3a4b3 100644
--- a/phmc.c
+++ b/phmc.c
@@ -37,7 +37,7 @@
 #include "phmc.h"
 #include "monomial.h"
 
-                                         // --> in  monomial
+//                                          --> in  monomial
 double phmc_Cpol;                        // --> MDPolyLocNormConst
 double phmc_cheb_evmin, phmc_cheb_evmax; // --> EVMin, EVMax
 double phmc_invmaxev;                    // --> EVMaxInv
diff --git a/read_input.l b/read_input.l
index 85a63d47d..05df88f0b 100644
--- a/read_input.l
+++ b/read_input.l
@@ -1051,14 +1051,12 @@ inline void rmQuotes(char *str){
   }
   {SPC}*PrecisionPtilde{EQL}{FLT} {
     sscanf(yytext, " %[a-zA-Z] = %lf",name , &c);
-    g_acc_Ptilde = c;
     mnl->PrecisionPtilde = c;
     if(myverbose!=0) printf("  Precision for Ptilde set to %e line %d monomial %d\n", c, line_of_file, current_monomial);
   }
   {SPC}*PrecisionHfinal{EQL}{FLT} {
     sscanf(yytext, " %[a-zA-Z] = %lf",name , &c);
     mnl->PrecisionHfinal = c;
-    g_acc_Hfin = c;
     if(myverbose!=0) printf("  Precision for final H set to %e line %d monomial %d\n", c, line_of_file, current_monomial);
   }
   {SPC}*ComputeEVFreq{EQL}{DIGIT}+ {
@@ -1936,8 +1934,6 @@ int read_input(char * conf_file){
   dfl_poly_iter = 20;
 
   g_kappa = _default_g_kappa;
-  g_acc_Ptilde = _default_g_acc_Ptilde;
-  g_acc_Hfin = _default_g_acc_Hfin;
   g_rec_ev = _default_g_rec_ev;
   g_mubar = _default_g_mubar;
   g_epsbar = _default_g_epsbar;

From cf50c6a0943d4eb6a1bff81dfca78103c7c7b0a6 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 20 Mar 2012 17:49:28 +0100
Subject: [PATCH 007/110] added a first version of a cloverndpoly monomial

---
 cloverndpoly_monomial.c | 296 ++++++++++++++++++++++++++++++++++++++++
 cloverndpoly_monomial.h |  29 ++++
 global.h                |   1 -
 ndpoly_monomial.c       |   3 +-
 4 files changed, 327 insertions(+), 2 deletions(-)
 create mode 100644 cloverndpoly_monomial.c
 create mode 100644 cloverndpoly_monomial.h

diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
new file mode 100644
index 000000000..318c1de7a
--- /dev/null
+++ b/cloverndpoly_monomial.c
@@ -0,0 +1,296 @@
+/***********************************************************************
+ *
+ * Copyright (C) 2008 Thomas Chiarappa, Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#include "global.h"
+#include "su3.h"
+#include "su3adj.h"
+#include "linalg_eo.h"
+#include "start.h"
+#include "linsolve.h"
+#include "solver/solver.h"
+#include "deriv_Sb.h"
+#include "tm_operators.h"
+#include "chebyshev_polynomial.h"
+#include "Nondegenerate_Matrix.h"
+#include "Hopping_Matrix.h"
+#include "phmc.h"
+#include "Nondegenerate_Matrix.h"
+#include "chebyshev_polynomial_nd.h"
+#include "Ptilde_nd.h"
+#include "reweighting_factor_nd.h"
+#include "monomial.h"
+#include "hamiltonian_field.h"
+#include "boundary.h"
+#include "phmc.h"
+#include "init_chi_spinor_field.h"
+#include "cloverndpoly_monomial.h"
+
+/********************************************
+ *
+ * Here \delta S_b is computed
+ *
+ ********************************************/
+
+void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
+  int j, k;
+  monomial * mnl = &monomial_list[id];
+
+
+  /* This factor 2 a missing factor 2 in trace_lambda */
+  ndpoly_set_global_parameter(mnl, phmc_exact_poly);
+  mnl->forcefactor = -2.*phmc_Cpol*mnl->EVMaxInv;
+
+  /* Recall:  The GAMMA_5 left of  delta M_eo  is done in  deriv_Sb !!! */
+
+  /* Here comes the definitions for the chi_j fields */
+  /* from  j=0  (chi_0 = phi)  .....  to j = n-1 */
+  /* in  g_chi_up_spinor_field[0] (g_chi_dn_spinor_field[0] we expect */
+  /* to find the phi field, the pseudo fermion field                  */
+  /* i.e. must be equal to mnl->pf (mnl->pf2)                         */
+  
+  assign(g_chi_up_spinor_field[0], mnl->pf, VOLUME/2);
+  assign(g_chi_dn_spinor_field[0], mnl->pf2, VOLUME/2);
+  
+  for(k = 1; k < (mnl->MDPolyDegree-1); k++) {
+    Q_tau1_min_cconst_ND(g_chi_up_spinor_field[k], g_chi_dn_spinor_field[k], 
+			 g_chi_up_spinor_field[k-1], g_chi_dn_spinor_field[k-1], 
+			 mnl->MDPolyRoots[k-1]);
+  }
+  
+  /* Here comes the remaining fields  chi_k ; k=n,...,2n-1  */
+  /*They are evaluated step-by-step overwriting the same field (mnl->MDPolyDegree)*/
+  
+  assign(g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_up_spinor_field[mnl->MDPolyDegree-2], VOLUME/2);
+  assign(g_chi_dn_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree-2], VOLUME/2);
+  
+  for(j=(mnl->MDPolyDegree-1); j>=1; j--) {
+    assign(g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_up_spinor_field[mnl->MDPolyDegree], VOLUME/2);
+    assign(g_chi_dn_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree], VOLUME/2);
+    
+    Q_tau1_min_cconst_ND(g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], 
+			 g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree-1], 
+			 mnl->MDPolyRoots[2*mnl->MDPolyDegree-3-j]);
+    
+    /* Get the even parts of the  (j-1)th  chi_spinors */
+    H_eo_ND(mnl->w_fields[0], mnl->w_fields[1], 
+	    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
+    
+    /* \delta M_eo sandwitched by  chi[j-1]_e^\dagger  and  chi[2N-j]_o */
+    deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[mnl->MDPolyDegree], hf);      /* UP */
+    deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[mnl->MDPolyDegree], hf);    /* DN */
+    
+    /* Get the even parts of the  (2N-j)-th  chi_spinors */
+    H_eo_ND(mnl->w_fields[0], mnl->w_fields[1], 
+	    g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], EO);
+    
+    /* \delta M_oe sandwitched by  chi[j-1]_o^\dagger  and  chi[2N-j]_e */
+    deriv_Sb(OE, g_chi_up_spinor_field[j-1], mnl->w_fields[0], hf);
+    deriv_Sb(OE, g_chi_dn_spinor_field[j-1], mnl->w_fields[1], hf);
+  }
+
+  /*
+    Normalisation by the largest  EW  is done in update_momenta
+    using mnl->forcefactor
+  */ 
+}
+
+
+void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
+  int j;
+  double temp;
+  monomial * mnl = &monomial_list[id];
+
+  ndpoly_set_global_parameter(mnl, phmc_exact_poly);
+  mnl->energy0 = 0.;
+  random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, mnl->rngrepro);
+  mnl->energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
+
+  random_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2, mnl->rngrepro);
+  mnl->energy0 += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
+
+  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
+    printf("PHMC: Here comes the computation of H_old with \n \n");
+    printf("PHMC: First: random spinors and their norm  \n ");
+    printf("PHMC: OLD Ennergy UP %e \n", mnl->energy0);
+    printf("PHMC: OLD Energy  DN + UP %e \n\n", mnl->energy0);
+  }
+
+  QNon_degenerate(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
+		  g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0]);
+  
+  for(j = 1; j < (mnl->MDPolyDegree); j++){
+    assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
+    assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);
+    
+    Q_tau1_min_cconst_ND(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
+			 g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], 
+			 mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
+  }
+  Poly_tilde_ND(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], mnl->PtildeCoefs, 
+		mnl->PtildeDegree, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1]);
+  
+  assign(mnl->pf, g_chi_up_spinor_field[0], VOLUME/2);
+  assign(mnl->pf2, g_chi_dn_spinor_field[0], VOLUME/2);
+
+  temp = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
+  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
+    printf("PHMC: Then: evaluate Norm of pseudofermion heatbath BHB \n ");
+    printf("PHMC: Norm of BHB up squared %e \n", temp);
+  }
+
+  temp += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
+
+  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)){
+    printf("PHMC: Norm of BHB up + BHB dn squared %e \n\n", temp);
+  }
+  if(g_proc_id == 0 && g_debug_level > 3) {
+    printf("called cloverndpoly_heatbath for id %d with g_running_phmc = %d\n", id, g_running_phmc);
+  }
+  return;
+}
+
+
+double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
+  int j, ij=0;
+  double temp, sgn, fact, Diff;
+  double Ener[8];
+  double factor[8];
+  monomial * mnl = &monomial_list[id];
+  spinor *up0, *dn0, *up1, *dn1, *dummy;
+
+  ndpoly_set_global_parameter(mnl, phmc_exact_poly);
+  mnl->energy1 = 0.;
+  Ener[0] = 0;
+  factor[0] = 1.0;
+  for(j = 1; j < 8; j++){
+    factor[j] = j*factor[j-1];
+    Ener[j] = 0;
+  }
+  /* IF PHMC */
+  up0 = g_chi_up_spinor_field[0];
+  up1 = g_chi_up_spinor_field[1];
+  dn0 = g_chi_dn_spinor_field[0];
+  dn1 = g_chi_dn_spinor_field[1];
+  /* This is needed if we consider only "1" in eq. 9 */
+  assign(up0, mnl->pf , VOLUME/2);
+  assign(dn0, mnl->pf2, VOLUME/2);
+
+  for(j = 1; j <= (mnl->MDPolyDegree-1); j++) {
+    /* Change this name !!*/
+    Q_tau1_min_cconst_ND(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
+    
+    dummy = up1; up1 = up0; up0 = dummy;
+    dummy = dn1; dn1 = dn0; dn0 = dummy;
+    /* result always in up0 and dn0 */
+  }
+  
+  ij=0;
+  if(up0 != g_chi_up_spinor_field[ij]) {
+    assign(g_chi_up_spinor_field[ij], up0, VOLUME/2);
+    assign(g_chi_dn_spinor_field[ij], dn0, VOLUME/2);
+  }
+  
+  temp = square_norm(g_chi_up_spinor_field[ij], VOLUME/2, 1);
+  Ener[ij] = temp;
+  
+  temp = square_norm(g_chi_dn_spinor_field[ij], VOLUME/2, 1);
+  Ener[ij] += temp;
+  
+  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
+    printf("PHMC: Here comes the computation of H_new with \n \n");
+    
+    printf("PHMC: At j=%d  PHMC Final Energy %e \n", ij, mnl->energy1+Ener[ij]);
+    printf("PHMC: At j=%d  PHMC Only Final Energy %e \n", ij, Ener[ij]);
+  }
+  
+  /* Here comes the loop for the evaluation of A, A^2, ...  */
+  for(j = 1; j < 1; j++){ /* To omit corrections just set  j<1 */
+    
+    if(j % 2){ /*  Chi[j] = ( Qdag P  Ptilde ) Chi[j-1]  */ 
+      Poly_tilde_ND(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+		    mnl->PtildeCoefs, mnl->PtildeDegree, 
+		    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
+      QdaggerQ_poly(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
+		    mnl->MDPolyCoefs, mnl->MDPolyDegree, 
+		    g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j]);
+      QdaggerNon_degenerate(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+			    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
+    }
+    else { /*  Chi[j] = ( Ptilde P Q ) Chi[j-1]  */ 
+      QNon_degenerate(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+		      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
+      QdaggerQ_poly(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
+		    mnl->MDPolyCoefs, mnl->MDPolyDegree, g_chi_up_spinor_field[j], 
+		    g_chi_dn_spinor_field[j]);
+      Poly_tilde_ND(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+		    mnl->PtildeCoefs, mnl->PtildeDegree, 
+		    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
+    }
+    
+    Ener[j] = Ener[j-1] + Ener[0];
+    sgn = -1.0;
+    for(ij = 1; ij < j; ij++){
+      fact = factor[j] / (factor[ij] * factor[j-ij]);
+      if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
+	printf("PHMC: Here  j=%d  and  ij=%d   sign=%f  fact=%f \n", j ,ij, sgn, fact);
+      }
+      Ener[j] += sgn*fact*Ener[ij];
+      sgn = -sgn;
+    }
+    temp = square_norm(g_chi_up_spinor_field[j], VOLUME/2, 1);
+    temp += square_norm(g_chi_dn_spinor_field[j], VOLUME/2, 1);
+    if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
+      printf("PHMC: Here  j=%d   sign=%f  temp=%e \n", j, sgn, temp);
+    }
+    
+    Ener[j] += sgn*temp;
+    
+    Diff = fabs(Ener[j] - Ener[j-1]);
+    if((g_proc_id == g_stdio_proc) && (g_debug_level > 0)) {
+      printf("PHMC: Correction aftern %d steps: %e \n", j, Diff);
+    }
+    
+    if(Diff < mnl->PrecisionHfinal) {
+      if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
+	printf("PHMC: At j = %d  PHMC Only Final Energy %e \n", j, Ener[j]);
+      }
+      break;
+    }
+  }
+  mnl->energy1 += Ener[ij];  /* this is quite sticky */
+  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
+    printf("PHMC: At j = %d  P=%e +HMC Final Energy %e \n\n", ij, Ener[ij], mnl->energy1);
+  }
+  
+  if(g_proc_id == 0 && g_debug_level > 3) {
+    printf("called cloverndpoly_acc for id %d %d dH = %1.4e\n", id, g_running_phmc, mnl->energy1 - mnl->energy0);
+  }
+  /* END IF PHMC */
+  return(mnl->energy1 - mnl->energy0);
+}
+
+
diff --git a/cloverndpoly_monomial.h b/cloverndpoly_monomial.h
new file mode 100644
index 000000000..47095f0b7
--- /dev/null
+++ b/cloverndpoly_monomial.h
@@ -0,0 +1,29 @@
+/***********************************************************************
+ * 
+ * Copyright (C) 2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+#ifndef _CLOVERNDPOLY_MONOMIAL_H
+#define _CLOVERNDPOLY_MONOMIAL_H
+
+#include "hamiltonian_field.h"
+
+void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf);
+double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf);
+void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf);
+
+#endif
diff --git a/global.h b/global.h
index 9927b3302..6376fe78b 100644
--- a/global.h
+++ b/global.h
@@ -208,7 +208,6 @@ EXTERN int g_sf_inc_wrap_sq;
 /*************************/
 
 /* Parameters for non-degenrate case */
-EXTERN double g_acc_Hfin;
 EXTERN int g_rec_ev;
 EXTERN double g_mubar, g_epsbar;
 EXTERN int g_use_clover_flag;
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index 5d1b735de..d76f38196 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -447,6 +447,7 @@ int init_ndpoly_monomial(const int id) {
   monomial * mnl = &monomial_list[id];
   int j, k, errcode;
   FILE * ifs;
+  double *phmc_darray;
   char title[100];
 
   phmc_invmaxev = 1.0;
@@ -529,7 +530,7 @@ int init_ndpoly_monomial(const int id) {
     }
     
     /* Here we read in the 2n roots needed for the polinomial in sqrt(s) */
-    double *phmc_darray = (double*)mnl->MDPolyRoots;
+    phmc_darray = (double*)mnl->MDPolyRoots;
     for(j = 0; j< 2 * mnl->MDPolyDegree - 2; ++j) {
       errcode = fscanf(ifs, " %d %lf %lf \n", &k, &phmc_darray[2 * j], &phmc_darray[2 * j + 1]);
     }

From 0c14a02bb7a66efe06739e821d1207abc1ca9e10 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 21 Mar 2012 09:17:36 +0100
Subject: [PATCH 008/110] phmc_exact_poly removed

---
 cloverndpoly_monomial.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 318c1de7a..8258a40a4 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -61,7 +61,7 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 
 
   /* This factor 2 a missing factor 2 in trace_lambda */
-  ndpoly_set_global_parameter(mnl, phmc_exact_poly);
+  ndpoly_set_global_parameter(mnl, 0);
   mnl->forcefactor = -2.*phmc_Cpol*mnl->EVMaxInv;
 
   /* Recall:  The GAMMA_5 left of  delta M_eo  is done in  deriv_Sb !!! */
@@ -124,7 +124,7 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   double temp;
   monomial * mnl = &monomial_list[id];
 
-  ndpoly_set_global_parameter(mnl, phmc_exact_poly);
+  ndpoly_set_global_parameter(mnl, 0);
   mnl->energy0 = 0.;
   random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, mnl->rngrepro);
   mnl->energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
@@ -182,7 +182,7 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   monomial * mnl = &monomial_list[id];
   spinor *up0, *dn0, *up1, *dn1, *dummy;
 
-  ndpoly_set_global_parameter(mnl, phmc_exact_poly);
+  ndpoly_set_global_parameter(mnl, 0);
   mnl->energy1 = 0.;
   Ener[0] = 0;
   factor[0] = 1.0;

From f77fc28888877ac23f2fa041c9e9b71103d46408 Mon Sep 17 00:00:00 2001
From: Andreas Nube <annube@ifh.de>
Date: Wed, 23 May 2012 16:04:56 +0200
Subject: [PATCH 009/110] Corrected sample input file and roots for the POLY
 monomial.

---
 ...BR_roots.dat.oox.70.3.3333333333333333e-02 | 71 ---------------
 ...BR_roots.dat.oox.90.2.5000000000000001e-02 | 91 +++++++++++++++++++
 ...y-monomial.input => sample-hmc-poly.input} | 18 ++--
 3 files changed, 100 insertions(+), 80 deletions(-)
 delete mode 100644 sample-input/Square_root_BR_roots.dat.oox.70.3.3333333333333333e-02
 create mode 100644 sample-input/Square_root_BR_roots.dat.oox.90.2.5000000000000001e-02
 rename sample-input/{sample-poly-monomial.input => sample-hmc-poly.input} (76%)

diff --git a/sample-input/Square_root_BR_roots.dat.oox.70.3.3333333333333333e-02 b/sample-input/Square_root_BR_roots.dat.oox.70.3.3333333333333333e-02
deleted file mode 100644
index d2c9a506f..000000000
--- a/sample-input/Square_root_BR_roots.dat.oox.70.3.3333333333333333e-02
+++ /dev/null
@@ -1,71 +0,0 @@
-Nr    Re   Im
-0 +6.6322525213938666e-01 -1.7507484071221505e-01
-1 +3.0582346700754298e-01 +1.6668010933305000e-01
-2 +1.0207402713021287e+00 +4.0063807932429460e-02
-3 +3.2033459390200150e-02 +6.3286769999307546e-02
-4 +8.9392585423134796e-01 +1.2474487588440222e-01
-5 +5.2809641169581334e-01 +1.8252950567510876e-01
-6 +1.5539178781443772e-01 -1.3051960817907698e-01
-7 +9.5018719536263196e-01 -9.9324081316041257e-02
-8 +7.1137470864652455e-02 +9.2450631513906215e-02
-9 +7.8808470744405457e-01 -1.5535265173080559e-01
-10 +3.9216668706408264e-01 +1.7719429697112907e-01
-11 +1.0328276337019864e+00 +8.0758672651457114e-03
-12 +4.3697138463459373e-01 +1.8038914108846885e-01
-13 +4.9758369887444670e-02 +7.8174611922280840e-02
-14 +8.6125002426560293e-01 -1.3603884221153342e-01
-15 +1.2416167296530280e-01 -1.1872594963715792e-01
-16 +9.7333238525187349e-01 +8.5396205259095900e-02
-17 +5.7370356429788927e-01 -1.8145827488950486e-01
-18 +1.8101460829881462e-02 +4.7903623293715412e-02
-19 +7.4816783839358225e-01 -1.6322133817682749e-01
-20 +2.2606786805226331e-01 -1.5095813976423819e-01
-21 +1.0287879742980310e+00 +2.4164397184421971e-02
-22 +4.8239980584186309e-01 -1.8217219559615450e-01
-23 +8.0714109849250593e-03 +3.2145565785717208e-02
-24 +9.2364911954748807e-01 +1.1247461176812193e-01
-25 +2.6496069728692179e-01 +1.5944305358624117e-01
-26 +7.0643915879345820e-01 +1.6981259634794632e-01
-27 +9.6003441869820058e-02 +1.0600309956473622e-01
-28 +1.0087475089017013e+00 +5.5649665062847360e-02
-29 +6.1886432584251738e-01 -1.7896688707266856e-01
-30 +2.0218085946119052e-03 +1.6135925640876996e-02
-31 +8.2587736232326314e-01 +1.4626812015008012e-01
-32 +1.8944936842517579e-01 -1.4129177382196961e-01
-33 +9.9290354672292058e-01 +7.0799988111543963e-02
-34 +3.4833637063727280e-01 +1.7261266723108312e-01
-35 +3.4833637063727269e-01 -1.7261266723108309e-01
-36 +9.9290354672292058e-01 -7.0799988111544004e-02
-37 +1.8944936842517590e-01 +1.4129177382196964e-01
-38 +8.2587736232326303e-01 -1.4626812015008014e-01
-39 +2.0218085946118480e-03 -1.6135925640876837e-02
-40 +6.1886432584251760e-01 +1.7896688707266856e-01
-41 +1.0087475089017013e+00 -5.5649665062847395e-02
-42 +9.6003441869820003e-02 -1.0600309956473619e-01
-43 +7.0643915879345764e-01 -1.6981259634794638e-01
-44 +2.6496069728692184e-01 -1.5944305358624120e-01
-45 +9.2364911954748785e-01 -1.1247461176812197e-01
-46 +8.0714109849251182e-03 -3.2145565785717249e-02
-47 +4.8239980584186321e-01 +1.8217219559615450e-01
-48 +1.0287879742980310e+00 -2.4164397184421933e-02
-49 +2.2606786805226378e-01 +1.5095813976423833e-01
-50 +7.4816783839358214e-01 +1.6322133817682752e-01
-51 +1.8101460829881517e-02 -4.7903623293715517e-02
-52 +5.7370356429788927e-01 +1.8145827488950486e-01
-53 +9.7333238525187327e-01 -8.5396205259096081e-02
-54 +1.2416167296530280e-01 +1.1872594963715792e-01
-55 +8.6125002426560293e-01 +1.3603884221153348e-01
-56 +4.9758369887444670e-02 -7.8174611922280854e-02
-57 +4.3697138463459378e-01 -1.8038914108846885e-01
-58 +1.0328276337019864e+00 -8.0758672651458363e-03
-59 +3.9216668706408253e-01 -1.7719429697112904e-01
-60 +7.8808470744405423e-01 +1.5535265173080565e-01
-61 +7.1137470864652344e-02 -9.2450631513906159e-02
-62 +9.5018719536263185e-01 +9.9324081316041299e-02
-63 +1.5539178781443799e-01 +1.3051960817907707e-01
-64 +5.2809641169581323e-01 -1.8252950567510876e-01
-65 +8.9392585423134774e-01 -1.2474487588440231e-01
-66 +3.2033459390200150e-02 -6.3286769999307518e-02
-67 +1.0207402713021287e+00 -4.0063807932429502e-02
-68 +3.0582346700754315e-01 -1.6668010933305002e-01
-69 +6.6322525213938688e-01 +1.7507484071221505e-01
diff --git a/sample-input/Square_root_BR_roots.dat.oox.90.2.5000000000000001e-02 b/sample-input/Square_root_BR_roots.dat.oox.90.2.5000000000000001e-02
new file mode 100644
index 000000000..c6111109b
--- /dev/null
+++ b/sample-input/Square_root_BR_roots.dat.oox.90.2.5000000000000001e-02
@@ -0,0 +1,91 @@
+Nr    Re   Im
+0 +1.2211479456106088e-03 -1.0908457734962369e-02
+1 +6.2654197865261119e-01 +1.5414963784835903e-01
+2 +8.4567834800523134e-01 -1.2014206875561331e-01
+3 +3.1433329424252282e-01 -1.4581570816471340e-01
+4 +1.1738649791757295e-01 +1.0070144019247462e-01
+5 +1.0101076814308516e+00 +3.7839128945244924e-02
+6 +9.5808749017252073e-02 -9.2051585958737650e-02
+7 +3.4741327357827378e-01 +1.4968620686084821e-01
+8 +1.0004603037792410e+00 -4.8340446383110115e-02
+9 +5.9179878408001907e-01 -1.5620970143971188e-01
+10 +4.8787724563739004e-03 +2.1764931701133267e-02
+11 +8.7175105065065162e-01 +1.1276416989149235e-01
+12 +3.0238701626782070e-02 -5.3507806113339718e-02
+13 +4.8597230664684260e-01 +1.5790192860978586e-01
+14 +9.3924154162719309e-01 -8.7559498132597072e-02
+15 +4.5072495136914692e-01 -1.5696105478055902e-01
+16 +4.3353395630749879e-02 +6.3645144733833259e-02
+17 +9.5780508397156583e-01 +7.8267761211901932e-02
+18 +1.9296147654739904e-01 -1.2361841155124884e-01
+19 +2.2136681730028274e-01 +1.3012517468972598e-01
+20 +1.0246946220324236e+00 -5.4574807589587862e-03
+21 +7.2686224789747711e-01 -1.4361859618746906e-01
+22 +7.5846795745439066e-01 +1.3871373555362129e-01
+23 +1.0955443285822932e-02 -3.2517685856052560e-02
+24 +5.5667769468881745e-01 +1.5752535466619627e-01
+25 +8.9611175843768931e-01 -1.0484889840531687e-01
+26 +3.8127996634550704e-01 -1.5284338261456590e-01
+27 +7.6216723688019974e-02 +8.2963063997844741e-02
+28 +9.8848757306421742e-01 +5.8611399579841045e-02
+29 +1.4084714258901954e-01 -1.0887140620450983e-01
+30 +2.8219766950433001e-01 +1.4125033121506414e-01
+31 +1.0173837318711476e+00 -2.7157490826110887e-02
+32 +6.6074171126249082e-01 -1.5135498103329828e-01
+33 +8.1801789880297815e-01 +1.2694743594971536e-01
+34 +5.8703786852729915e-02 -7.3479185255829776e-02
+35 +4.1577198233038004e-01 +1.5527219004563012e-01
+36 +9.7424654479998973e-01 -6.8603042766088973e-02
+37 +5.2134607846401149e-01 -1.5809032784104277e-01
+38 +1.9422202330154795e-02 +4.3115478429394900e-02
+39 +9.1864438149993377e-01 +9.6433974170313352e-02
+40 +2.5115954025005527e-01 -1.3601183211212667e-01
+41 +1.6607888237581345e-01 +1.1652255038621386e-01
+42 +1.0222537814062862e+00 +1.6346434895736721e-02
+43 +7.8890151768083949e-01 -1.3314784079997155e-01
+44 +6.9423500460929954e-01 +1.4783904880693313e-01
+45 +6.9423500460929932e-01 -1.4783904880693316e-01
+46 +7.8890151768083971e-01 +1.3314784079997150e-01
+47 +1.0222537814062862e+00 -1.6346434895736763e-02
+48 +1.6607888237581339e-01 -1.1652255038621384e-01
+49 +2.5115954025005549e-01 +1.3601183211212670e-01
+50 +9.1864438149993366e-01 -9.6433974170313380e-02
+51 +1.9422202330154723e-02 -4.3115478429394817e-02
+52 +5.2134607846401160e-01 +1.5809032784104277e-01
+53 +9.7424654479998973e-01 +6.8603042766088945e-02
+54 +4.1577198233038015e-01 -1.5527219004563012e-01
+55 +5.8703786852729881e-02 +7.3479185255829763e-02
+56 +8.1801789880297793e-01 -1.2694743594971541e-01
+57 +6.6074171126249093e-01 +1.5135498103329825e-01
+58 +1.0173837318711478e+00 +2.7157490826110849e-02
+59 +2.8219766950432978e-01 -1.4125033121506411e-01
+60 +1.4084714258901956e-01 +1.0887140620450984e-01
+61 +9.8848757306421731e-01 -5.8611399579841079e-02
+62 +7.6216723688020002e-02 -8.2963063997844755e-02
+63 +3.8127996634550737e-01 +1.5284338261456593e-01
+64 +8.9611175843768931e-01 +1.0484889840531685e-01
+65 +5.5667769468881745e-01 -1.5752535466619627e-01
+66 +1.0955443285822979e-02 +3.2517685856052629e-02
+67 +7.5846795745439033e-01 -1.3871373555362135e-01
+68 +7.2686224789747722e-01 +1.4361859618746903e-01
+69 +1.0246946220324236e+00 +5.4574807589587472e-03
+70 +2.2136681730028260e-01 -1.3012517468972595e-01
+71 +1.9296147654739915e-01 +1.2361841155124885e-01
+72 +9.5780508397156572e-01 -7.8267761211901959e-02
+73 +4.3353395630749747e-02 -6.3645144733833162e-02
+74 +4.5072495136914681e-01 +1.5696105478055902e-01
+75 +9.3924154162719320e-01 +8.7559498132597044e-02
+76 +4.8597230664684260e-01 -1.5790192860978586e-01
+77 +3.0238701626782171e-02 +5.3507806113339801e-02
+78 +8.7175105065065150e-01 -1.1276416989149238e-01
+79 +4.8787724563738736e-03 -2.1764931701133208e-02
+80 +5.9179878408001918e-01 +1.5620970143971188e-01
+81 +1.0004603037792410e+00 +4.8340446383110074e-02
+82 +3.4741327357827345e-01 -1.4968620686084816e-01
+83 +9.5808749017252073e-02 +9.2051585958737650e-02
+84 +1.0101076814308516e+00 -3.7839128945244965e-02
+85 +1.1738649791757295e-01 -1.0070144019247461e-01
+86 +3.1433329424252304e-01 +1.4581570816471343e-01
+87 +8.4567834800523123e-01 +1.2014206875561334e-01
+88 +6.2654197865261108e-01 -1.5414963784835903e-01
+89 +1.2211479456106199e-03 +1.0908457734962419e-02
diff --git a/sample-input/sample-poly-monomial.input b/sample-input/sample-hmc-poly.input
similarity index 76%
rename from sample-input/sample-poly-monomial.input
rename to sample-input/sample-hmc-poly.input
index acff6a8c2..c29312bdc 100644
--- a/sample-input/sample-poly-monomial.input
+++ b/sample-input/sample-hmc-poly.input
@@ -22,10 +22,10 @@ ReversibilityCheck = yes
 ReversibilityCheckIntervall = 100
 InitialStoreCounter = readin
 DebugLevel = 1
-StartCondition = restart
+StartCondition = hot
 ComputeEVs = no
 
-BeginMeasurement CORREATORS
+BeginMeasurement CORRELATORS
   Frequency = 5
 EndMeasurement
 
@@ -38,15 +38,15 @@ EndMonomial
 
 BeginMonomial POLY
  Timescale = 1
- Degree = 70
+ Degree = 90
  Lmin = 0.1
- Lmax = 3.0
- LocNormConst = 2.9035094762297051
+ Lmax = 4.0
+ LocNormConst = 3.0187720224543191
  2KappaMu = 0.177
  Kappa = 0.177
- RootsFile = "Square_root_BR_roots.dat.oox.70.3.3333333333333333e-02"
- AcceptancePrecision =  1e-20
- ForcePrecision = 1e-12
+ RootsFile = "Square_root_BR_roots.dat.oox.90.2.5000000000000001e-02"
+ AcceptancePrecision =  1.e-20
+ ForcePrecision = 1.e-12
 EndMonomial
 
 BeginIntegrator 
@@ -54,7 +54,7 @@ BeginIntegrator
   Type1 = 2MN
   IntegrationSteps0 = 3
   IntegrationSteps1 = 20
-  Tau = 2.0
+  Tau = 2.
   Lambda0 = 0.19
   Lambda1 = 0.20
   NumberOfTimescales = 2

From 6dcd386c05d010796b41eb1d9f5d9391a9bee922 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 29 May 2012 11:52:08 +0200
Subject: [PATCH 010/110] just some cleaning and first steps towards including
 clover for ND

---
 cloverndpoly_monomial.c | 12 ++++++++++++
 ndpoly_monomial.c       | 25 ++++++++++++-------------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 8258a40a4..592a5dbc2 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -125,6 +125,12 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   monomial * mnl = &monomial_list[id];
 
   ndpoly_set_global_parameter(mnl, 0);
+  g_mu3 = 0.;
+  g_c_sw = mnl->c_sw;
+  init_sw_fields();
+  sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
+  sw_invert(EE, mnl->mu);
+
   mnl->energy0 = 0.;
   random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, mnl->rngrepro);
   mnl->energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
@@ -183,6 +189,12 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   spinor *up0, *dn0, *up1, *dn1, *dummy;
 
   ndpoly_set_global_parameter(mnl, 0);
+  g_mu3 = 0.;
+  g_c_sw = mnl->c_sw;
+  init_sw_fields();
+  sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
+  sw_invert(EE, mnl->mu);
+
   mnl->energy1 = 0.;
   Ener[0] = 0;
   factor[0] = 1.0;
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index d76f38196..9a444287d 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -247,20 +247,22 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   assign(mnl->pf, g_chi_up_spinor_field[0], VOLUME/2);
   assign(mnl->pf2, g_chi_dn_spinor_field[0], VOLUME/2);
 
-  temp = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
-  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-    printf("PHMC: Then: evaluate Norm of pseudofermion heatbath BHB \n ");
-    printf("PHMC: Norm of BHB up squared %e \n", temp);
-  }
+  if(g_debug_level > 2) {
+    temp = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
+    if(g_proc_id == g_stdio_proc) {
+      printf("PHMC: Then: evaluate Norm of pseudofermion heatbath BHB \n ");
+      printf("PHMC: Norm of BHB up squared %e \n", temp);
+    }
 
-  if(g_epsbar!=0.0 || phmc_exact_poly==0) 
-    temp += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
+    if(g_epsbar!=0.0 || phmc_exact_poly==0) 
+      temp += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
 
-  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)){
-    printf("PHMC: Norm of BHB up + BHB dn squared %e \n\n", temp);
+    if(g_proc_id == g_stdio_proc){
+      printf("PHMC: Norm of BHB up + BHB dn squared %e \n\n", temp);
+    }
   }
   if(g_proc_id == 0 && g_debug_level > 3) {
-    printf("called ndpoly_heatbath for id %d with g_running_phmc = %d\n", id, g_running_phmc);
+    printf("called ndpoly_heatbath for id %d \n", id);
   }
   return;
 }
@@ -375,9 +377,6 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
       }
     }
     mnl->energy1 += Ener[ij];  /* this is quite sticky */
-    if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-      printf("PHMC: At j = %d  P=%e +HMC Final Energy %e \n\n", ij, Ener[ij], mnl->energy1);
-    }
   } 
   else if(phmc_exact_poly==1 && g_epsbar!=0.0) {
     /* B(Q*tau1) */

From 7a407d4b1fcd117ae6de202fb09c86c99a6c996b Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 29 May 2012 18:17:29 +0200
Subject: [PATCH 011/110] Naming scheme for nd operators adapted to the single
 flavour case

---
 Nondegenerate_Matrix.c    | 170 ++++++++++++++------------------------
 Nondegenerate_Matrix.h    |  33 +++-----
 Ptilde_nd.c               |  12 +--
 Ptilde_nd.h               |   2 +-
 chebyshev_polynomial.c    |  16 ++--
 chebyshev_polynomial_nd.c |   6 +-
 cloverndpoly_monomial.c   |  30 +++----
 eigenvalues_bi.c          |   6 +-
 invert_doublet_eo.c       |  17 ++--
 max_eigenvalues_bi.c      |  12 +--
 nddetratio_monomial.c     |   6 +-
 ndpoly_monomial.c         |  40 ++++-----
 operator.c                |   2 +-
 poly_monomial.c           |  10 +--
 reweighting_factor_nd.c   |   2 +-
 15 files changed, 147 insertions(+), 217 deletions(-)

diff --git a/Nondegenerate_Matrix.c b/Nondegenerate_Matrix.c
index b9a1df54a..a6157de48 100644
--- a/Nondegenerate_Matrix.c
+++ b/Nondegenerate_Matrix.c
@@ -38,24 +38,11 @@
 #include "gamma.h"
 #include "linsolve.h"
 #include "linalg_eo.h"
+#include "tm_operators.h"
 #include "Nondegenerate_Matrix.h"
 
 
-void mul_one_minus_imubar(spinor * const l, spinor * const k);
-/******************************************
- * mul_one_plus_imubar_inv computes
- * l = [(1-i\mubar\gamma_5) * l
- *
-*/
-
-void mul_one_plus_imubar(spinor * const l, spinor * const k);
-/******************************************
- * mul_one_plus_imubar_inv computes
- * l = [(1+i\mubar\gamma_5) * l
- *
-*/
-
-void Qtm_pm_psi(spinor *l,spinor *k);
+void mul_one_minus_iconst(spinor * const l, spinor * const k, const double mu);
 
 /* external functions */
 
@@ -72,7 +59,7 @@ void Qtm_pm_psi(spinor *l,spinor *k);
  * it acts only on the odd part or only 
  * on a half spinor
  ******************************************/
-void QNon_degenerate(spinor * const l_strange, spinor * const l_charm,
+void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
                      spinor * const k_strange, spinor * const k_charm){
 
   double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
@@ -81,8 +68,8 @@ void QNon_degenerate(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_strange);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_charm);
 
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX]);
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1]);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX], g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -96,8 +83,8 @@ void QNon_degenerate(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX], k_strange);
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+1], k_charm);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX], k_strange, -g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+1], k_charm, g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX], k_charm, -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], k_strange, -g_epsbar, VOLUME/2);
@@ -122,7 +109,7 @@ void QNon_degenerate(spinor * const l_strange, spinor * const l_charm,
  *
  *  = Qhat(2x2)  with   g_mubar  ->  - g_mubar
  *
- * With respect to QNon_degenerate the role of charme and strange fields
+ * With respect to Qtm_ndpsi the role of charme and strange fields
  * are interchenged, since Qdagger=tau_1 Q tau_1
  * see documentation for details
  * k_charm and k_strange are the input fields
@@ -131,7 +118,7 @@ void QNon_degenerate(spinor * const l_strange, spinor * const l_charm,
  * it acts only on the odd part or only
  * on a half spinor
  ******************************************/
-void QdaggerNon_degenerate(spinor * const l_strange, spinor * const l_charm,
+void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
                            spinor * const k_strange, spinor * const k_charm){
 
   double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
@@ -140,8 +127,8 @@ void QdaggerNon_degenerate(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX]);
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1]);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -155,8 +142,8 @@ void QdaggerNon_degenerate(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX+2], k_charm);
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+3], k_strange);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], k_charm, -g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], k_strange, g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], k_strange, -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], k_charm, -g_epsbar, VOLUME/2);
@@ -191,7 +178,7 @@ void QdaggerNon_degenerate(spinor * const l_strange, spinor * const l_charm,
  * it acts only on the odd part or only
  * on a half spinor
  ******************************************/
-void Q_Qdagger_ND(spinor * const l_strange, spinor * const l_charm,
+void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
                            spinor * const k_strange, spinor * const k_charm){
 
   double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
@@ -201,8 +188,8 @@ void Q_Qdagger_ND(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX]);
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1]);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -214,8 +201,8 @@ void Q_Qdagger_ND(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX+2], k_charm);
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+3], k_strange);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], k_charm, -g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], k_strange, g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], k_strange, -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], k_charm, -g_epsbar, VOLUME/2);
@@ -245,8 +232,8 @@ void Q_Qdagger_ND(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7]);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6]);
 
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX]);
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1]);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -258,8 +245,8 @@ void Q_Qdagger_ND(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7]);
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6]);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7], -g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6], g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+6], -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+7], -g_epsbar, VOLUME/2);
@@ -284,7 +271,7 @@ void Q_Qdagger_ND(spinor * const l_strange, spinor * const l_charm,
  *
  * This is the implementation of 
  *
- *  Q_tau1_min_cconst_ND =  M - z_k 
+ *  Q_tau1_sub_const_ndpsi =  M - z_k 
  *
  *  with M = Qhat(2x2) tau_1   and z_k \in Complex
  *
@@ -302,7 +289,7 @@ void Q_Qdagger_ND(spinor * const l_strange, spinor * const l_charm,
  * it acts only on the odd part or only
  * on a half spinor
  ******************************************/
-void Q_tau1_min_cconst_ND(spinor * const l_strange, spinor * const l_charm,
+void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
                      spinor * const k_strange, spinor * const k_charm, const _Complex double z){
 
 
@@ -321,10 +308,8 @@ void Q_tau1_min_cconst_ND(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX]);
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1]);
-
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX], g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -338,20 +323,16 @@ void Q_tau1_min_cconst_ND(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(OE, l_strange, g_spinor_field[DUM_MATRIX+4]);
   Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+3]);
 
-
   /* Here the M_oo  implementation  */
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX], k_charm);
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+1], k_strange);
-
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX], k_charm, -g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+1], k_strange, g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX], k_strange, -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], k_charm, -g_epsbar, VOLUME/2);
 
-
   diff(l_strange, g_spinor_field[DUM_MATRIX], l_strange, VOLUME/2);
   diff(l_charm, g_spinor_field[DUM_MATRIX+1], l_charm, VOLUME/2);
 
-
   /* and finally the  gamma_5  multiplication  */
   gamma5(l_strange, l_strange, VOLUME/2);
   gamma5(l_charm, l_charm, VOLUME/2);
@@ -360,20 +341,12 @@ void Q_tau1_min_cconst_ND(spinor * const l_strange, spinor * const l_charm,
   mul_r(l_strange, phmc_invmaxev, l_strange, VOLUME/2);
   mul_r(l_charm, phmc_invmaxev, l_charm, VOLUME/2);
 
-  /*     
-  printf(" IN UP: %f %f \n", l_strange[0].creal(s2.c1), l_strange[0].cimag(s2.c1));
-  printf(" IN DN: %f %f \n", l_charm[0].creal(s2.c1), l_charm[0].cimag(s2.c1));
-  */
-
-  /*  AND FINALLY WE SUBSTRACT THE C-CONSTANT  */
-
-
   /************ loop over all lattice sites ************/
-  for(ix = 0; ix < (VOLUME/2); ix++){
-
+  for(ix = 0; ix < (VOLUME/2); ix++) {
+    
     r=l_strange + ix;
     s=k_strange + ix;
-
+    
     _complex_times_vector(phi1, z, s->s0);
     _vector_sub_assign(r->s0, phi1);
     _complex_times_vector(phi1, z, s->s1);
@@ -385,7 +358,7 @@ void Q_tau1_min_cconst_ND(spinor * const l_strange, spinor * const l_charm,
     
     r=l_charm + ix;
     s=k_charm + ix;
-
+    
     _complex_times_vector(phi1, z, s->s0);
     _vector_sub_assign(r->s0, phi1);
     _complex_times_vector(phi1, z, s->s1);
@@ -395,7 +368,7 @@ void Q_tau1_min_cconst_ND(spinor * const l_strange, spinor * const l_charm,
     _complex_times_vector(phi1, z, s->s3);
     _vector_sub_assign(r->s3, phi1);    
   }
-
+  
   /* Finally, we multiply by the constant  phmc_Cpol  */
   /* which renders the polynomial in monomials  */
   /* identical to the polynomial a la clenshaw */;
@@ -425,7 +398,7 @@ void Q_tau1_min_cconst_ND(spinor * const l_strange, spinor * const l_charm,
  * it acts only on the odd part or only
  * on a half spinor
  ******************************************/
-void Q_Qdagger_ND_BI(bispinor * const bisp_l, bispinor * const bisp_k){
+void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k){
 
   double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
   static int memalloc = 0;
@@ -468,8 +441,8 @@ void Q_Qdagger_ND_BI(bispinor * const bisp_l, bispinor * const bisp_k){
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX]);
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1]);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -483,8 +456,8 @@ void Q_Qdagger_ND_BI(bispinor * const bisp_l, bispinor * const bisp_k){
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX+2], k_charm);
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+3], k_strange);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], k_charm, -g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], k_strange, g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], k_strange, -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], k_charm, -g_epsbar, VOLUME/2);
@@ -514,8 +487,8 @@ void Q_Qdagger_ND_BI(bispinor * const bisp_l, bispinor * const bisp_k){
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7]);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6]);
 
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX]);
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1]);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -529,8 +502,8 @@ void Q_Qdagger_ND_BI(bispinor * const bisp_l, bispinor * const bisp_k){
   Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_plus_imubar(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7]);
-  mul_one_minus_imubar(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6]);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7], -g_mubar);
+  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6], g_mubar);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+6], -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+7], -g_epsbar, VOLUME/2);
@@ -566,7 +539,7 @@ void Q_Qdagger_ND_BI(bispinor * const bisp_l, bispinor * const bisp_k){
  * it acts only on the odd part or only 
  * on a half spinor
  ******************************************/
-void H_eo_ND(spinor * const l_strange, spinor * const l_charm, 
+void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm, 
              spinor * const k_strange, spinor * const k_charm, 
 	     const int ieo) {
 
@@ -577,8 +550,8 @@ void H_eo_ND(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX], k_strange);
   Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX+1], k_charm);
 
-  mul_one_minus_imubar(l_strange, g_spinor_field[DUM_MATRIX+1]);
-  mul_one_plus_imubar(l_charm, g_spinor_field[DUM_MATRIX]);
+  mul_one_minus_iconst(l_strange, g_spinor_field[DUM_MATRIX+1], g_mubar);
+  mul_one_minus_iconst(l_charm, g_spinor_field[DUM_MATRIX], -g_mubar);
 
   assign_add_mul_r(l_strange, g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
   assign_add_mul_r(l_charm, g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
@@ -588,7 +561,7 @@ void H_eo_ND(spinor * const l_strange, spinor * const l_charm,
 
 }
 
-void M_ee_inv_ND(spinor * const l_strange, spinor * const l_charm, 
+void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
 		 spinor * const k_strange, spinor * const k_charm) {
   
   double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
@@ -596,8 +569,8 @@ void M_ee_inv_ND(spinor * const l_strange, spinor * const l_charm,
 
   /* recall:   strange <-> up    while    charm <-> dn   */
 
-  mul_one_minus_imubar(l_strange, k_strange);
-  mul_one_plus_imubar(l_charm, k_charm);
+  mul_one_minus_iconst(l_strange, k_strange, g_mubar);
+  mul_one_minus_iconst(l_charm, k_charm, -g_mubar);
 
   assign_add_mul_r(l_strange, k_charm, g_epsbar, VOLUME/2);
   assign_add_mul_r(l_charm, k_strange, g_epsbar, VOLUME/2);
@@ -655,8 +628,7 @@ void mul_one_pm_itau2(spinor * const p, spinor * const q,
   mul_r(q, fac, q, N);
 }
 
-
-void mul_one_minus_imubar(spinor * const l, spinor * const k)
+void mul_one_minus_iconst(spinor * const l, spinor * const k, const double mu)
 {
   spinor *r, *s;
   static su3_vector phi1;
@@ -666,43 +638,21 @@ void mul_one_minus_imubar(spinor * const l, spinor * const k)
     r=l + ix;
     s=k + ix;
     /* Multiply the spinorfield with the inverse of 1+imu\gamma_5 */
-    _complex_times_vector(phi1, (1. - g_mubar * I), s->s0);
+    _complex_times_vector(phi1, (1. - mu * I), s->s0);
     _vector_assign(r->s0, phi1);
-    _complex_times_vector(phi1, (1. - g_mubar * I), s->s1);
+    _complex_times_vector(phi1, (1. - mu * I), s->s1);
     _vector_assign(r->s1, phi1);
-    _complex_times_vector(phi1, (1. + g_mubar * I), s->s2);
+    _complex_times_vector(phi1, (1. + mu * I), s->s2);
     _vector_assign(r->s2, phi1);
-    _complex_times_vector(phi1, (1. + g_mubar * I), s->s3);
-    _vector_assign(r->s3, phi1);
-  }
-}
-
-
-void mul_one_plus_imubar(spinor * const l, spinor * const k){
-  spinor *r, *s;
-  static su3_vector phi1;
-
-  /************ loop over all lattice sites ************/
-  for(int ix = 0; ix < (VOLUME/2); ++ix){
-    r=l + ix;
-    s=k + ix;
-    /* Multiply the spinorfield with the inverse of 1+imu\gamma_5 */
-    _complex_times_vector(phi1, (1. + g_mubar * I), s->s0);
-    _vector_assign(r->s0, phi1);
-    _complex_times_vector(phi1, (1. + g_mubar * I), s->s1);
-    _vector_assign(r->s1, phi1);
-    _complex_times_vector(phi1, (1. - g_mubar * I), s->s2);
-    _vector_assign(r->s2, phi1);
-    _complex_times_vector(phi1, (1. - g_mubar * I), s->s3);
+    _complex_times_vector(phi1, (1. + mu * I), s->s3);
     _vector_assign(r->s3, phi1);
   }
   return;
 }
 
-
 /*  calculates P(Q Q^dagger) for the nondegenerate case */
 
-void P_ND(spinor * const l_strange, spinor * const l_charm,
+void P_ndpsi(spinor * const l_strange, spinor * const l_charm,
 	  spinor * const k_strange, spinor * const k_charm){
   
   
@@ -722,7 +672,7 @@ void P_ND(spinor * const l_strange, spinor * const l_charm,
       assign(dum_dn,l_charm,VOLUME/2);
     }
     
-    Q_tau1_min_cconst_ND(l_strange, l_charm,
+    Q_tau1_sub_const_ndpsi(l_strange, l_charm,
 			 dum_up, dum_dn,
 			 phmc_root[j]);
   }
@@ -731,7 +681,7 @@ void P_ND(spinor * const l_strange, spinor * const l_charm,
 
 
 /* calculates  Q * \tau^1  for the nondegenerate case */
-void Qtau1_P_ND(spinor * const l_strange, spinor * const l_charm,
+void Qtau1_P_ndpsi(spinor * const l_strange, spinor * const l_charm,
 		spinor * const k_strange, spinor * const k_charm){
   
   
@@ -739,12 +689,12 @@ void Qtau1_P_ND(spinor * const l_strange, spinor * const l_charm,
   dum_up=g_chi_up_spinor_field[DUM_MATRIX+1];
   dum_dn=g_chi_dn_spinor_field[DUM_MATRIX+1];
   
-  P_ND(l_strange, l_charm,k_strange,k_charm);
+  P_ndpsi(l_strange, l_charm,k_strange,k_charm);
   
   assign(dum_up,l_strange,VOLUME/2);
   assign(dum_dn,l_charm,VOLUME/2);
   
-  QNon_degenerate(l_strange,l_charm,dum_dn,dum_up);
+  Qtm_ndpsi(l_strange,l_charm,dum_dn,dum_up);
   return;
 }
 
@@ -752,7 +702,7 @@ void Qtau1_P_ND(spinor * const l_strange, spinor * const l_charm,
 
 /* this is neccessary for the calculation of the polynomial */
 
-void Qtm_pm_min_cconst_nrm(spinor * const l, spinor * const k,
+void Qtm_pm_sub_const_nrm_psi(spinor * const l, spinor * const k,
 			   const _Complex double z){
   static su3_vector phi1;
   spinor *r,*s;
@@ -801,7 +751,7 @@ void Ptm_pm_psi(spinor * const l, spinor * const k){
       assign(spinDum,l,VOLUME/2);
     }
     
-    Qtm_pm_min_cconst_nrm(l,spinDum,phmc_root[j]);
+    Qtm_pm_sub_const_nrm_psi(l,spinDum,phmc_root[j]);
   }
   return;
 }
diff --git a/Nondegenerate_Matrix.h b/Nondegenerate_Matrix.h
index eac773a7b..7176f7009 100644
--- a/Nondegenerate_Matrix.h
+++ b/Nondegenerate_Matrix.h
@@ -22,54 +22,41 @@
 #ifndef _NONDEGENRATE_MATRIX_H
 #define _NONDEGENRATE_MATRIX_H
 
-void mul_one_minus_imubar(spinor * const l, spinor * const k);
-/******************************************
- * mul_one_plus_imubar_inv computes
- * l = [(1-i\mubar\gamma_5) * l
- *
- */
-
-void mul_one_plus_imubar(spinor * const l, spinor * const k);
-/******************************************
- * mul_one_plus_imubar_inv computes
- * l = [(1+i\mubar\gamma_5) * l
- *
-*/
-
 void mul_one_pm_itau2(spinor * const p, spinor * const q,
 		      spinor * const r, spinor * const s,
 		      const double sign, const int N);
 
-void QNon_degenerate(spinor * const l_strange, spinor * const l_charm,
+void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
                      spinor * const k_strange,  spinor * const k_charm);
 
-void QdaggerNon_degenerate(spinor * const l_strange, spinor * const l_charm,
+void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
                            spinor * const k_strange, spinor * const k_charm);
 
-void Q_Qdagger_ND(spinor * const l_strange, spinor * const l_charm,
+void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
                   spinor * const k_strange, spinor * const k_charm);
 
-void Q_Qdagger_ND_BI(bispinor * const bisp_l, bispinor * const bisp_k);
+void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k);
 
-void Q_tau1_min_cconst_ND(spinor * const l_strange, spinor * const l_charm,
+void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
                        spinor * const k_strange, spinor * const k_charm, 
                        const _Complex double z);
 
-void H_eo_ND(spinor * const l_strange, spinor * const l_charm, 
+void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm, 
              spinor * const k_strange, spinor * const k_charm, 
 	     const int ieo);
 
-void M_ee_inv_ND(spinor * const l_strange, spinor * const l_charm, 
+void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
 		 spinor * const k_strange, spinor * const k_charm);
 
 void Q_test_epsilon(spinor * const l_strange, spinor * const l_charm,
                     spinor * const k_strange, spinor * const k_charm);
 
-void Qtau1_P_ND(spinor * const l_strange, spinor * const l_charm,
+void Qtau1_P_ndpsi(spinor * const l_strange, spinor * const l_charm,
 		spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_pm_Ptm_pm_psi(spinor * const l, spinor * const k);
-void Qtm_pm_min_cconst_nrm(spinor * const l, spinor * const k,const _Complex double z);
+
+void Qtm_pm_sub_const_nrm_psi(spinor * const l, spinor * const k,const _Complex double z);
 
 /* ************************************************
  * for noise reduction 
diff --git a/Ptilde_nd.c b/Ptilde_nd.c
index 3e3d61faa..c9bba2e8c 100644
--- a/Ptilde_nd.c
+++ b/Ptilde_nd.c
@@ -103,7 +103,7 @@ void Ptilde_cheb_coefs(double aa, double bb, double dd[], int n, double exponent
  *
  **************************************************************************/
 
-void Poly_tilde_ND(spinor *R_s, spinor *R_c, double *dd, int n, 
+void Ptilde_ndpsi(spinor *R_s, spinor *R_c, double *dd, int n, 
                    spinor *S_s, spinor *S_c){
 
   int j;
@@ -195,7 +195,7 @@ void Poly_tilde_ND(spinor *R_s, spinor *R_c, double *dd, int n,
     /*   } */
 
 
-    Q_Qdagger_ND(&R_s[0], &R_c[0], &auxs[0], &auxc[0]);
+    Qtm_pm_ndpsi(&R_s[0], &R_c[0], &auxs[0], &auxc[0]);
 
     temp1=-1.0;
     temp2=dd[j];
@@ -208,7 +208,7 @@ void Poly_tilde_ND(spinor *R_s, spinor *R_c, double *dd, int n,
   assign(&R_s[0], &ds[0],VOLUME/2);
   assign(&R_c[0], &dc[0],VOLUME/2);
 
-  Q_Qdagger_ND(&auxs[0], &auxc[0], &R_s[0], &R_c[0]);
+  Qtm_pm_ndpsi(&auxs[0], &auxc[0], &R_s[0], &R_c[0]);
 
   temp1=-1.0;
   temp2=dd[0]/2;
@@ -350,11 +350,11 @@ void degree_of_Ptilde(int * _degree, double ** coefs,
     random_spinor_field(ss,VOLUME/2, 1);
     random_spinor_field(sc,VOLUME/2, 1);
 
-    Poly_tilde_ND(&auxs[0], &auxc[0], *coefs, degree, &ss[0], &sc[0]);
+    Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &ss[0], &sc[0]);
     QdaggerQ_poly(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0]);
-    Q_Qdagger_ND(&auxs[0], &auxc[0], &aux2s[0], &aux2c[0]);
+    Qtm_pm_ndpsi(&auxs[0], &auxc[0], &aux2s[0], &aux2c[0]);
     QdaggerQ_poly(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0]);
-    Poly_tilde_ND(&auxs[0], &auxc[0], *coefs, degree, &aux2s[0], &aux2c[0]);
+    Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &aux2s[0], &aux2c[0]);
 
     diff(&aux2s[0],&auxs[0], &ss[0], VOLUME/2);
     temp = square_norm(&aux2s[0], VOLUME/2, 1) / square_norm(&ss[0], VOLUME/2, 1) / 4.0;
diff --git a/Ptilde_nd.h b/Ptilde_nd.h
index a4201a515..4e5ed76a4 100644
--- a/Ptilde_nd.h
+++ b/Ptilde_nd.h
@@ -25,7 +25,7 @@ double func_tilde(double u, double exponent);
 
 void Ptilde_cheb_coefs(double a, double b, double dd[], int n, double exponent);
 
-void Poly_tilde_ND(spinor *R_s, spinor *R_c, double *dd, int n, spinor *S_s, spinor *S_c);
+void Ptilde_ndpsi(spinor *R_s, spinor *R_c, double *dd, int n, spinor *S_s, spinor *S_c);
 
 double chebtilde_eval(int M, double *dd, double s);
 
diff --git a/chebyshev_polynomial.c b/chebyshev_polynomial.c
index 016faa21f..aad75ba68 100644
--- a/chebyshev_polynomial.c
+++ b/chebyshev_polynomial.c
@@ -173,8 +173,8 @@ void QdaggerQ_power(spinor *R_s, spinor *R_c, double *c, int n, spinor *S_s, spi
 	 assign(&auxc[0], &dc[0], VOLUME/2);
 /*       }  */
        
-       QdaggerNon_degenerate(&aux2s[0], &aux2c[0], &auxs[0], &auxc[0]);
-       QNon_degenerate(&R_s[0], &R_c[0], &aux2s[0], &aux2c[0]);
+       Qtm_dagger_ndpsi(&aux2s[0], &aux2c[0], &auxs[0], &auxc[0]);
+       Qtm_ndpsi(&R_s[0], &R_c[0], &aux2s[0], &aux2c[0]);
        temp1=-1.0;
        temp2=c[j];
        assign_mul_add_mul_add_mul_add_mul_r(&ds[0] , &R_s[0], &dds[0], &aux3s[0], fact2, fact1, temp1, temp2,VOLUME/2);
@@ -187,8 +187,8 @@ void QdaggerQ_power(spinor *R_s, spinor *R_c, double *c, int n, spinor *S_s, spi
      assign(&R_s[0], &ds[0],VOLUME/2);   
      assign(&R_c[0], &dc[0],VOLUME/2);  
      
-     QdaggerNon_degenerate(&aux2s[0], &aux2c[0], &R_s[0], &R_c[0]);
-     QNon_degenerate(&auxs[0], &auxc[0], &aux2s[0], &aux2c[0]);
+     Qtm_dagger_ndpsi(&aux2s[0], &aux2c[0], &R_s[0], &R_c[0]);
+     Qtm_ndpsi(&auxs[0], &auxc[0], &aux2s[0], &aux2c[0]);
 
      temp1=-1.0;
      temp2=c[0]/2;
@@ -302,8 +302,8 @@ chebyshev_polynomial(cheb_evmin, cheb_evmax, dop_cheby_coef, N_CHEBYMAX, 0.25);
     temp=square_norm(&auxs[0],VOLUME/2, 1);
       printf("||auxs Carsten||=%e\n",temp);
 
-  QdaggerNon_degenerate(&aux3s[0], &aux3c[0], &ss[0], &sc[0]);
-  QNon_degenerate(&auxs[0], &auxc[0], &aux3s[0], &aux3c[0]);
+  Qtm_dagger_ndpsi(&aux3s[0], &aux3c[0], &ss[0], &sc[0]);
+  Qtm_ndpsi(&auxs[0], &auxc[0], &aux3s[0], &aux3c[0]);
     temp=square_norm(&auxs[0],VOLUME/2, 1);
       printf("||auxs own||=%e\n",temp);
     temp=square_norm(&auxc[0],VOLUME/2, 1);
@@ -337,8 +337,8 @@ chebyshev_polynomial(cheb_evmin, cheb_evmax, dop_cheby_coef, N_CHEBYMAX, 0.25);
     printf("||auxc||=%e\n",temp); */
 
 
-  QdaggerNon_degenerate(&aux2s[0], &aux2c[0], &ss[0], &sc[0]);
-  QNon_degenerate(&aux3s[0], &aux3c[0], &aux2s[0], &aux2c[0]);
+  Qtm_dagger_ndpsi(&aux2s[0], &aux2c[0], &ss[0], &sc[0]);
+  Qtm_ndpsi(&aux3s[0], &aux3c[0], &aux2s[0], &aux2c[0]);
 
 /*    temp=square_norm(&aux3s[0],VOLUME/2, 1);
       printf("||auxs_3||=%e\n",temp);
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index 6db3de2ff..21f3e9323 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -179,7 +179,7 @@ void QdaggerQ_poly(spinor *R_s, spinor *R_c, double *c, int n,
      /*   } */  
 
 
-     Q_Qdagger_ND(&R_s[0], &R_c[0], &auxs[0], &auxc[0]);
+     Qtm_pm_ndpsi(&R_s[0], &R_c[0], &auxs[0], &auxc[0]);
 
      temp1=-1.0;
      temp2=c[j];
@@ -195,7 +195,7 @@ void QdaggerQ_poly(spinor *R_s, spinor *R_c, double *c, int n,
    assign(&R_c[0], &dc[0],VOLUME/2);  
 
 
-   Q_Qdagger_ND(&auxs[0], &auxc[0], &R_s[0], &R_c[0]);
+   Qtm_pm_ndpsi(&auxs[0], &auxc[0], &R_s[0], &R_c[0]);
 
    temp1=-1.0;
    temp2=c[0]/2;
@@ -317,7 +317,7 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
 
   /* Here we check the accuracy */
   QdaggerQ_poly(&auxs[0], &auxc[0], *coefs, degree_of_p, &ss[0], &sc[0]);
-  Q_Qdagger_ND(&aux2s[0], &aux2c[0], &auxs[0], &auxc[0]);
+  Qtm_pm_ndpsi(&aux2s[0], &aux2c[0], &auxs[0], &auxc[0]);
   QdaggerQ_poly(&auxs[0], &auxc[0], *coefs, degree_of_p, &aux2s[0], &aux2c[0]);
 
   diff(&aux2s[0],&auxs[0],&ss[0],VOLUME/2);
diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 592a5dbc2..04164c50d 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -76,7 +76,7 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
   assign(g_chi_dn_spinor_field[0], mnl->pf2, VOLUME/2);
   
   for(k = 1; k < (mnl->MDPolyDegree-1); k++) {
-    Q_tau1_min_cconst_ND(g_chi_up_spinor_field[k], g_chi_dn_spinor_field[k], 
+    Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[k], g_chi_dn_spinor_field[k], 
 			 g_chi_up_spinor_field[k-1], g_chi_dn_spinor_field[k-1], 
 			 mnl->MDPolyRoots[k-1]);
   }
@@ -91,12 +91,12 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     assign(g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_up_spinor_field[mnl->MDPolyDegree], VOLUME/2);
     assign(g_chi_dn_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree], VOLUME/2);
     
-    Q_tau1_min_cconst_ND(g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], 
+    Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], 
 			 g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree-1], 
 			 mnl->MDPolyRoots[2*mnl->MDPolyDegree-3-j]);
     
     /* Get the even parts of the  (j-1)th  chi_spinors */
-    H_eo_ND(mnl->w_fields[0], mnl->w_fields[1], 
+    H_eo_tm_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
 	    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
     
     /* \delta M_eo sandwitched by  chi[j-1]_e^\dagger  and  chi[2N-j]_o */
@@ -104,18 +104,14 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[mnl->MDPolyDegree], hf);    /* DN */
     
     /* Get the even parts of the  (2N-j)-th  chi_spinors */
-    H_eo_ND(mnl->w_fields[0], mnl->w_fields[1], 
+    H_eo_tm_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
 	    g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], EO);
     
     /* \delta M_oe sandwitched by  chi[j-1]_o^\dagger  and  chi[2N-j]_e */
     deriv_Sb(OE, g_chi_up_spinor_field[j-1], mnl->w_fields[0], hf);
     deriv_Sb(OE, g_chi_dn_spinor_field[j-1], mnl->w_fields[1], hf);
   }
-
-  /*
-    Normalisation by the largest  EW  is done in update_momenta
-    using mnl->forcefactor
-  */ 
+  return;
 }
 
 
@@ -145,18 +141,18 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
     printf("PHMC: OLD Energy  DN + UP %e \n\n", mnl->energy0);
   }
 
-  QNon_degenerate(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
+  Qtm_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
 		  g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0]);
   
   for(j = 1; j < (mnl->MDPolyDegree); j++){
     assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
     assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);
     
-    Q_tau1_min_cconst_ND(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
+    Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
 			 g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], 
 			 mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
   }
-  Poly_tilde_ND(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], mnl->PtildeCoefs, 
+  Ptilde_ndpsi(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], mnl->PtildeCoefs, 
 		mnl->PtildeDegree, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1]);
   
   assign(mnl->pf, g_chi_up_spinor_field[0], VOLUME/2);
@@ -213,7 +209,7 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
 
   for(j = 1; j <= (mnl->MDPolyDegree-1); j++) {
     /* Change this name !!*/
-    Q_tau1_min_cconst_ND(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
+    Q_tau1_sub_const_ndpsi(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
     
     dummy = up1; up1 = up0; up0 = dummy;
     dummy = dn1; dn1 = dn0; dn0 = dummy;
@@ -243,22 +239,22 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   for(j = 1; j < 1; j++){ /* To omit corrections just set  j<1 */
     
     if(j % 2){ /*  Chi[j] = ( Qdag P  Ptilde ) Chi[j-1]  */ 
-      Poly_tilde_ND(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+      Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 		    mnl->PtildeCoefs, mnl->PtildeDegree, 
 		    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
       QdaggerQ_poly(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
 		    mnl->MDPolyCoefs, mnl->MDPolyDegree, 
 		    g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j]);
-      QdaggerNon_degenerate(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+      Qtm_dagger_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 			    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
     }
     else { /*  Chi[j] = ( Ptilde P Q ) Chi[j-1]  */ 
-      QNon_degenerate(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+      Qtm_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 		      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
       QdaggerQ_poly(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
 		    mnl->MDPolyCoefs, mnl->MDPolyDegree, g_chi_up_spinor_field[j], 
 		    g_chi_dn_spinor_field[j]);
-      Poly_tilde_ND(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+      Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 		    mnl->PtildeCoefs, mnl->PtildeDegree, 
 		    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
     }
diff --git a/eigenvalues_bi.c b/eigenvalues_bi.c
index 172f0100b..619e6a886 100644
--- a/eigenvalues_bi.c
+++ b/eigenvalues_bi.c
@@ -148,11 +148,11 @@ double eigenvalues_bi(int * nr_of_eigenvalues,
 	    threshold, decay, verbosity,
 	    &converged, (_Complex double*) eigenvectors_bi, eigenvls_bi,
 	    &returncode, maxmin, 1,
-	    &Q_Qdagger_ND_BI);
+	    &Qtm_pm_ndbipsi);
   
   /* IN THE LAST LINE, INSERT:
-     Q_Qdagger_ND_BI;   Non-degenerate case - on 1 bispinor 
-     Q_Qdagger_ND;      Non-degenerate case - on 2 spinors 
+     Qtm_pm_ndbipsi;   Non-degenerate case - on 1 bispinor 
+     Qtm_pm_ndpsi;      Non-degenerate case - on 2 spinors 
      Qtm_pm_psi;        Degenerate case  -  on 1 spinor 
   */
 
diff --git a/invert_doublet_eo.c b/invert_doublet_eo.c
index 5e6d72a1f..e8ba6a44e 100644
--- a/invert_doublet_eo.c
+++ b/invert_doublet_eo.c
@@ -15,12 +15,9 @@
  * 
  * You should have received a copy of the GNU General Public License
  * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-/****************************************************************
  *
- * invert_eo makes an inversion with EO precoditioned
- * tm Operator
+ * invert_doublet_eo makes an inversion with EO precoditioned
+ * tm Operator for the non-degenerate doublet
  *
  * Even and Odd are the numbers of spinor_field that contain
  * the even and the odd sites of the source. The result is stored
@@ -137,7 +134,7 @@ int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
 
   /* here comes the inversion using even/odd preconditioning */
   if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);}
-  M_ee_inv_ND(Even_new_s, Even_new_c, 
+  M_ee_inv_ndpsi(Even_new_s, Even_new_c, 
 	      Even_s, Even_c);
   Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s);
   Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c);
@@ -176,22 +173,22 @@ int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
     else {		// CPU, conjugate gradient
       iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
 		       max_iter, precision, rel_prec, 
-		       VOLUME/2, &Q_Qdagger_ND);
+		       VOLUME/2, &Qtm_pm_ndpsi);
     }
   #else			// CPU, conjugate gradient
     iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
 		     max_iter, precision, rel_prec, 
-		     VOLUME/2, &Q_Qdagger_ND);
+		     VOLUME/2, &Qtm_pm_ndpsi);
   #endif
   
   
-  QdaggerNon_degenerate(Odd_new_s, Odd_new_c,
+  Qtm_dagger_ndpsi(Odd_new_s, Odd_new_c,
 			Odd_new_s, Odd_new_c);
   
   /* Reconstruct the even sites                */
   Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s);
   Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c);
-  M_ee_inv_ND(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3],
+  M_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3],
 	      g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
 
   /* The sign is plus, since in Hopping_Matrix */
diff --git a/max_eigenvalues_bi.c b/max_eigenvalues_bi.c
index 5af4d0b57..05a6903c4 100644
--- a/max_eigenvalues_bi.c
+++ b/max_eigenvalues_bi.c
@@ -214,11 +214,11 @@ double max_eigenvalues_bi(int * nr_of_eigenvalues, const int operator_flag,
 	 threshold_max, decay_max, verbosity,
 	 &converged, (complex*) max_evs, max_evls,
 	 &returncode, JD_MAXIMAL, 1,
-	 &Q_Qdagger_ND_BI);
+	 &Qtm_pm_ndbipsi);
 
 	/* IN THE LAST LINE, INSERT:
-             Q_Qdagger_ND_BI;   Non-degenerate case - on 1 bispinor 
-             Q_Qdagger_ND;      Non-degenerate case - on 2 spinors 
+             Qtm_pm_ndbipsi;   Non-degenerate case - on 1 bispinor 
+             Qtm_pm_ndpsi;      Non-degenerate case - on 2 spinors 
              Qtm_pm_psi;        Degenerate case  -  on 1 spinor 
 	*/
 
@@ -231,11 +231,11 @@ double max_eigenvalues_bi(int * nr_of_eigenvalues, const int operator_flag,
 	threshold_max, decay_max, verbosity,
 	&converged, (complex*) max_evs, max_evls,
 	&returncode, JD_MAXIMAL, 1,
-	&Q_Qdagger_ND_BI);
+	&Qtm_pm_ndbipsi);
 
 	/* IN THE LAST LINE, INSERT:
-             Q_Qdagger_ND_BI;   Non-degenerate case - on 1 bispinor 
-             Q_Qdagger_ND;      Non-degenerate case - on 2 spinors 
+             Qtm_pm_ndbipsi;   Non-degenerate case - on 1 bispinor 
+             Qtm_pm_ndpsi;      Non-degenerate case - on 2 spinors 
              Qtm_pm_psi;        Degenerate case  -  on 1 spinor 
 	*/
 
diff --git a/nddetratio_monomial.c b/nddetratio_monomial.c
index 17ca71d3e..ffb8df4d3 100644
--- a/nddetratio_monomial.c
+++ b/nddetratio_monomial.c
@@ -61,15 +61,15 @@ double nddetratio_acc(const int id, hamiltonian_field_t * const hf) {
 
   iter = cg_her_nd(mnl->w_fields[0], mnl->w_fields[1], mnl->pf, mnl->pf2,
 		   mnl->maxiter, mnl->accprec, g_relative_precision_flag, 
-		   VOLUME/2, &Q_Qdagger_ND);
-  QdaggerNon_degenerate(mnl->w_fields[2], mnl->w_fields[3],
+		   VOLUME/2, &Qtm_pm_ndpsi);
+  Qtm_dagger_ndpsi(mnl->w_fields[2], mnl->w_fields[3],
 			mnl->w_fields[0], mnl->w_fields[1]);
 
   g_mubar = mnl->mubar2;
   g_epsbar = mnl->epsbar2;
   boundary(mnl->kappa2);
 
-  QNon_degenerate(mnl->w_fields[0], mnl->w_fields[1],
+  Qtm_ndpsi(mnl->w_fields[0], mnl->w_fields[1],
 		  mnl->w_fields[2], mnl->w_fields[3]);
   
   mnl->energy1  = scalar_prod_r(mnl->pf , mnl->w_fields[0], VOLUME/2, 1);
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index 9a444287d..cccb9317c 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -79,7 +79,7 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     assign(g_chi_dn_spinor_field[0], mnl->pf2, VOLUME/2);
 
     for(k = 1; k < (mnl->MDPolyDegree-1); k++) {
-      Q_tau1_min_cconst_ND(g_chi_up_spinor_field[k], g_chi_dn_spinor_field[k], 
+      Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[k], g_chi_dn_spinor_field[k], 
 			   g_chi_up_spinor_field[k-1], g_chi_dn_spinor_field[k-1], 
 			   mnl->MDPolyRoots[k-1]);
     }
@@ -94,12 +94,12 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
       assign(g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_up_spinor_field[mnl->MDPolyDegree], VOLUME/2);
       assign(g_chi_dn_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree], VOLUME/2);
       
-      Q_tau1_min_cconst_ND(g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], 
+      Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], 
 			   g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree-1], 
 			   mnl->MDPolyRoots[2*mnl->MDPolyDegree-3-j]);
       
       /* Get the even parts of the  (j-1)th  chi_spinors */
-      H_eo_ND(mnl->w_fields[0], mnl->w_fields[1], 
+      H_eo_tm_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
 	      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
       
       /* \delta M_eo sandwitched by  chi[j-1]_e^\dagger  and  chi[2N-j]_o */
@@ -107,7 +107,7 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
       deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[mnl->MDPolyDegree], hf);    /* DN */
       
       /* Get the even parts of the  (2N-j)-th  chi_spinors */
-      H_eo_ND(mnl->w_fields[0], mnl->w_fields[1], 
+      H_eo_tm_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
 	      g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], EO);
       
       /* \delta M_oe sandwitched by  chi[j-1]_o^\dagger  and  chi[2N-j]_e */
@@ -120,7 +120,7 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     /* from  j=0  (chi_0 = phi)  .....  to j = n-1 */
     assign(g_chi_up_spinor_field[0], mnl->pf, VOLUME/2);
     for(k = 1; k < (mnl->MDPolyDegree-1); k++) {
-      Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[k],
+      Qtm_pm_sub_const_nrm_psi(g_chi_up_spinor_field[k],
 			    g_chi_up_spinor_field[k-1], 
 			    mnl->MDPolyRoots[k-1]);
     }
@@ -131,7 +131,7 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
       assign(g_chi_up_spinor_field[mnl->MDPolyDegree-1],
 	     g_chi_up_spinor_field[mnl->MDPolyDegree], VOLUME/2);
 
-      Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[mnl->MDPolyDegree], 
+      Qtm_pm_sub_const_nrm_psi(g_chi_up_spinor_field[mnl->MDPolyDegree], 
 			   g_chi_up_spinor_field[mnl->MDPolyDegree-1],
 			   mnl->MDPolyRoots[2*mnl->MDPolyDegree-3-j]);
 
@@ -185,24 +185,24 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
 
   if(phmc_exact_poly==0){
-    QNon_degenerate(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
+    Qtm_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
 		    g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0]);
  
     for(j = 1; j < (mnl->MDPolyDegree); j++){
       assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
       assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);
 
-      Q_tau1_min_cconst_ND(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
+      Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
 			g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], 
 			mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
     }
-    Poly_tilde_ND(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], mnl->PtildeCoefs, 
+    Ptilde_ndpsi(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], mnl->PtildeCoefs, 
 		  mnl->PtildeDegree, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1]);
   } 
   else if( phmc_exact_poly==1 && g_epsbar!=0.0) {
     /* Attention this is Q * tau1, up/dn are exchanged in the input spinor  */
     /* this is used as an preconditioner */
-    QNon_degenerate(g_chi_up_spinor_field[1],g_chi_dn_spinor_field[1],
+    Qtm_ndpsi(g_chi_up_spinor_field[1],g_chi_dn_spinor_field[1],
 		    g_chi_dn_spinor_field[0],g_chi_up_spinor_field[0]);
 
     assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
@@ -211,13 +211,13 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
     /* solve Q*tau1*P(Q^2) *x=y */
     cg_her_nd(g_chi_up_spinor_field[1],g_chi_dn_spinor_field[1],
 	      g_chi_up_spinor_field[0],g_chi_dn_spinor_field[0],
-	      1000,1.e-16,0,VOLUME/2, Qtau1_P_ND);
+	      1000,1.e-16,0,VOLUME/2, Qtau1_P_ndpsi);
 
     /*  phi= Bdagger phi  */
     for(j = 1; j < (mnl->MDPolyDegree); j++){
       assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
       assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);
-      Q_tau1_min_cconst_ND(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1],
+      Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1],
 			g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0],
 			mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
     }
@@ -237,7 +237,7 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
     /*  phi= Bdagger phi  */
     for(j = 1; j < (mnl->MDPolyDegree); j++){
       assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
-      Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[1],
+      Qtm_pm_sub_const_nrm_psi(g_chi_up_spinor_field[1],
 			    g_chi_up_spinor_field[0],
 			    mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
     }
@@ -296,7 +296,7 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   if(phmc_exact_poly==0) {
     for(j = 1; j <= (mnl->MDPolyDegree-1); j++) {
       /* Change this name !!*/
-      Q_tau1_min_cconst_ND(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
+      Q_tau1_sub_const_ndpsi(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
 
       dummy = up1; up1 = up0; up0 = dummy;
       dummy = dn1; dn1 = dn0; dn0 = dummy;
@@ -326,22 +326,22 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     for(j = 1; j < 1; j++){ /* To omit corrections just set  j<1 */
       
       if(j % 2){ /*  Chi[j] = ( Qdag P  Ptilde ) Chi[j-1]  */ 
-	Poly_tilde_ND(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+	Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 		      mnl->PtildeCoefs, mnl->PtildeDegree, 
 		      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
 	QdaggerQ_poly(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
 		      mnl->MDPolyCoefs, mnl->MDPolyDegree, 
 		      g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j]);
-	QdaggerNon_degenerate(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+	Qtm_dagger_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 			      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
       }
       else { /*  Chi[j] = ( Ptilde P Q ) Chi[j-1]  */ 
-	QNon_degenerate(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+	Qtm_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 			g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
 	QdaggerQ_poly(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
 		      mnl->MDPolyCoefs, mnl->MDPolyDegree, g_chi_up_spinor_field[j], 
 		      g_chi_dn_spinor_field[j]);
-	Poly_tilde_ND(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+	Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 		      mnl->PtildeCoefs, mnl->PtildeDegree, 
 		      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
       }
@@ -381,7 +381,7 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   else if(phmc_exact_poly==1 && g_epsbar!=0.0) {
     /* B(Q*tau1) */
     for(j = 1; j <= (mnl->MDPolyDegree-1); j++){
-      Q_tau1_min_cconst_ND(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
+      Q_tau1_sub_const_ndpsi(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
 
       dummy = up1; up1 = up0; up0 = dummy;
       dummy = dn1; dn1 = dn0; dn0 = dummy;
@@ -413,7 +413,7 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   else if(phmc_exact_poly == 1 && g_epsbar == 0.0) {
     for(j = 1; j < (mnl->MDPolyDegree); j++) {
       assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
-      Qtm_pm_min_cconst_nrm(g_chi_up_spinor_field[1],
+      Qtm_pm_sub_const_nrm_psi(g_chi_up_spinor_field[1],
 			    g_chi_up_spinor_field[0],
 			    mnl->MDPolyRoots[j-1]);
     }
diff --git a/operator.c b/operator.c
index fdff80443..aaf51c025 100644
--- a/operator.c
+++ b/operator.c
@@ -207,7 +207,7 @@ int init_operators() {
     }
     else if(optr->type == DBTMWILSON) {
       optr->even_odd_flag = 1;
-      optr->applyDbQsq = &Q_Qdagger_ND;
+      optr->applyDbQsq = &Qtm_pm_ndpsi;
       /* TODO: this should be here!       */
       /* Chi`s-spinors  memory allocation */
       /*       if(init_chi_spinor_field(VOLUMEPLUSRAND/2, 20) != 0) { */
diff --git a/poly_monomial.c b/poly_monomial.c
index bbf1c2c43..32da27360 100644
--- a/poly_monomial.c
+++ b/poly_monomial.c
@@ -98,7 +98,7 @@ void poly_derivative(const int id, hamiltonian_field_t * const hf){
     /* Here comes the definitions for the chi_j fields */
     /* from  j=0  (chi_0 = phi)  .....  to j = n-1 */
     for(k = 0; k < degreehalf-1 ; k++) {
-      Qtm_pm_min_cconst_nrm(chi_spinor_field[k+1],
+      Qtm_pm_sub_const_nrm_psi(chi_spinor_field[k+1],
 				 chi_spinor_field[k], 
 				 mnl->MDPolyRoots[k]);
     }
@@ -114,7 +114,7 @@ void poly_derivative(const int id, hamiltonian_field_t * const hf){
       assign(chi_spinor_field[degreehalf],
 	     chi_spinor_field[degreehalf+1], VOLUME/2);
       
-      Qtm_pm_min_cconst_nrm(chi_spinor_field[degreehalf+1], 
+      Qtm_pm_sub_const_nrm_psi(chi_spinor_field[degreehalf+1], 
 			    chi_spinor_field[degreehalf],
 			    mnl->MDPolyRoots[mnl->MDPolyDegree-(j+1)]);
       
@@ -144,7 +144,7 @@ void poly_derivative(const int id, hamiltonian_field_t * const hf){
       * multiply with the last missing monomial *
       * such that we get an evaluation of P     *
       ******************************************/
-      Qtm_pm_min_cconst_nrm(chi_spinor_field[degreehalf], 
+      Qtm_pm_sub_const_nrm_psi(chi_spinor_field[degreehalf], 
 			    chi_spinor_field[degreehalf+1],
 			    mnl->MDPolyRoots[mnl->MDPolyDegree-1]);
       
@@ -223,7 +223,7 @@ double poly_acc(const int id, hamiltonian_field_t * const hf){
     /* apply B */
     for(j = 0; j < mnl->MDPolyDegree/2; j++){
       assign(mnl->w_fields[0], mnl->w_fields[1], VOLUME/2);
-      Qtm_pm_min_cconst_nrm(mnl->w_fields[1],
+      Qtm_pm_sub_const_nrm_psi(mnl->w_fields[1],
 			    mnl->w_fields[0],
 			    mnl->MDPolyRoots[j]);
     }
@@ -310,7 +310,7 @@ void poly_heatbath(const int id, hamiltonian_field_t * const hf){
     /*  phi= Bdagger phi  */
     for(j = 0; j < (mnl->MDPolyDegree/2); j++){
       assign(mnl->w_fields[1], mnl->w_fields[0], VOLUME/2);
-      Qtm_pm_min_cconst_nrm(mnl->w_fields[0],
+      Qtm_pm_sub_const_nrm_psi(mnl->w_fields[0],
 				 mnl->w_fields[1],
 				 mnl->MDPolyRoots[mnl->MDPolyDegree/2+j]);
     }
diff --git a/reweighting_factor_nd.c b/reweighting_factor_nd.c
index d118ff042..ea68fb9b9 100644
--- a/reweighting_factor_nd.c
+++ b/reweighting_factor_nd.c
@@ -56,7 +56,7 @@ double reweighting_factor_nd(const int N)
     temp1 = phmc_ptilde_cheby_coef[0];
     phmc_ptilde_cheby_coef[0] = temp1 - 1;
 
-    Poly_tilde_ND(g_chi_up_spinor_field[3], g_chi_dn_spinor_field[3], phmc_ptilde_cheby_coef, phmc_ptilde_n_cheby, g_chi_up_spinor_field[2], g_chi_dn_spinor_field[2]);
+    Ptilde_ndpsi(g_chi_up_spinor_field[3], g_chi_dn_spinor_field[3], phmc_ptilde_cheby_coef, phmc_ptilde_n_cheby, g_chi_up_spinor_field[2], g_chi_dn_spinor_field[2]);
 
     phmc_ptilde_cheby_coef[0] = temp1;
 

From 32c7580919bbd058ccd413599939a3b291173c58 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 29 May 2012 18:44:31 +0200
Subject: [PATCH 012/110] theoretical background added

---
 doc/basis.tex         |   71 +
 doc/bibliography.bib  | 3829 +++++++++++++++++++++++++++++++++++++++++
 doc/eo_pre.tex        |    2 +-
 doc/main.tex          |    4 +
 doc/martins-trick.tex |    2 +-
 5 files changed, 3906 insertions(+), 2 deletions(-)
 create mode 100644 doc/basis.tex

diff --git a/doc/basis.tex b/doc/basis.tex
new file mode 100644
index 000000000..46a21b7e8
--- /dev/null
+++ b/doc/basis.tex
@@ -0,0 +1,71 @@
+\subsection{QCD on a lattice}
+
+Quantum Chromodynamics on a hyper-cubic Euclidean space-time lattice
+of size $L^3\times T$ with lattice spacing $a$ is formally described
+by the action
+\begin{equation}
+  \label{eq:action}
+  S = S_\mathrm{G}[U] + a^4 \sum_x \bar\psi\ D[U]\ \psi
+\end{equation}
+with $S_\mathrm{G}$ some suitable discretisation of the the Yang-Mills
+action $F_{\mu\nu}^2/4$~\cite{Yang:1954ek}. The particular
+implementation we are  using can be found below in section 4.2 and
+consists of  plaquette and rectangular shaped Wilson loops with
+particular  coefficients. $D$ is a discretisation of the Dirac
+operator, for which Wilson originally proposed~\cite{Wilson:1974sk} to
+use the 
+so called Wilson Dirac operator
+\begin{equation}
+  \label{eq:DW}
+  D_W[U] = \frac{1}{2}\left[\gamma_\mu\left(\nabla_\mu +
+    \nabla^*_\mu\right) -a\nabla^*_\mu\nabla_\mu \right]
+\end{equation}
+with $\nabla_\mu$ and $\nabla_\mu^*$
+the forward and backward gauge covariant difference operators,
+respectively:
+\begin{equation}
+  \label{eq:covariant}
+  \begin{split}
+  \nabla_\mu\psi(x) &= \frac{1}{a}\Bigl[U(x,x+a\hat \mu)\psi(x+a \hat \mu) -
+  \psi(x)\Bigr]\, , \\    
+  \nabla_\mu^* \psi(x) &=
+  \frac{1}{a}\Bigl[\psi(x)-U^\dagger(x,x-a\hat\mu)\psi(x-a\hat\mu)\Bigr]\, ,\\
+  \end{split}
+\end{equation}
+where we denote the $\mathrm{SU}(3)$ link variables by $U_{x,\mu}$.
+We shall set $a\equiv 1$ in the following for convenience. 
+Discretising the theory is by far not a unique procedure. Instead of Wilson's
+original formulation one may equally well chose the 
+Wilson twisted mass formulation and the corresponding Dirac
+operator~\cite{Frezzotti:2000nk}
+\begin{equation}
+  \label{eq:Dtm}
+  D_\mathrm{tm} = (D_W[U] + m_0)\ 1_f + i \mu_q\gamma_5\tau^3
+\end{equation}
+for a mass degenerate doublet of quarks. We denote by $m_0$ the bare
+(Wilson) quark mass, $\mu_q$ is the bare twisted
+mass parameter, $\tau^i$ the $i$-th Pauli matrix and $1_f$ the
+unit matrix acting in flavour space (see appendix~\ref{sec:gammas} for
+our convention). In the framework of Wilson twisted mass QCD only
+flavour doublets of quarks can be simulated, however, the two quarks
+do not need to be degenerate in mass. The corresponding mass
+non-degenerate flavour doublet reads~\cite{Frezzotti:2003xj}
+\begin{equation}
+  \label{eq:Dh}
+  D_h(\bar\mu, \bar\epsilon)  = D_\mathrm{W}\ 1_f +
+  i\bar\mu\gamma_5\tau^3 + \bar\epsilon \tau^1 \, .
+\end{equation}
+Note that this notation is not unique. Equivalently -- as used in
+Ref.~\cite{Chiarappa:2006ae} -- one may write
+\begin{equation}
+  \label{eq:altDh}
+  D_h'(\mu_\sigma,\mu_\delta) = D_\mathrm{W}\cdot 1_f +
+  i\gamma_5\mu_\sigma\tau^1 + \mu_\delta \tau^3\, ,
+\end{equation}
+which is related to $D_h$ by $D_h' = (1+i\tau^2)D_h(1-i\tau^2)/2$
+and $(\mu_\sigma,\mu_\delta)\to(\bar\mu, -\bar\epsilon)$. 
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: "main"
+%%% End: 
diff --git a/doc/bibliography.bib b/doc/bibliography.bib
index b2c4ffc28..32b722a51 100644
--- a/doc/bibliography.bib
+++ b/doc/bibliography.bib
@@ -3738,3 +3738,3832 @@ @MastersThesis{urbach:2002aa
   school = 	 {Freie Universit{\"a}t Berlin, Fachbereich Physik},
   year = 	 {2002}
 }
+
+@Article{'tHooft:1971fh,
+     author    = "'t Hooft, G.",
+     title     = "Renormalization of massless Yang-Mills fields",
+     journal   = "Nucl. Phys.",
+     volume    = "B33",
+     year      = "1971",
+     pages     = "173-199",
+     SLACcitation  = "%%CITATION = NUPHA,B33,173;%%"
+}
+@Article{'tHooft:1971rn,
+     author    = "'t Hooft, G.",
+     title     = "Renormalizable lagrangians for massive Yang-Mills fields",
+     journal   = "Nucl. Phys.",
+     volume    = "B35",
+     year      = "1971",
+     pages     = "167-188",
+     SLACcitation  = "%%CITATION = NUPHA,B35,167;%%"
+}
+@Unpublished{'tHooft:1972aa,
+  author = 	 "'t Hooft, G.",
+  title = 	 "",
+  note = 	 "Unpublished remarks at the 1972 Marseille Conference 
+                  on Yang-Mills Fields"
+}
+@Article{'tHooft:1972fi,
+     author    = "'t Hooft, G. and Veltman, M. J. G.",
+     title     = "Regularization and renormalization of gauge fields",
+     journal   = "Nucl. Phys.",
+     volume    = "B44",
+     year      = "1972",
+     pages     = "189-213",
+     SLACcitation  = "%%CITATION = NUPHA,B44,189;%%"
+}
+@Article{Abdel-Rehim:2004gx,
+     author    = "Abdel-Rehim, A. M. and Lewis, R.",
+     title     = "Twisted mass {QCD} for the pion electromagnetic form factor",
+     journal   = "Phys. Rev.",
+     volume    = "D71",
+     year      = "2005",
+     pages     = "014503",
+     eprint    = "hep-lat/0410047",
+     SLACcitation  = "%%CITATION = HEP-LAT 0410047;%%"
+}
+@Article{Abdel-Rehim:2005gz,
+     author    = "Abdel-Rehim, Abdou M. and Lewis, Randy and Woloshyn, R. M.
+                  ",
+     title     = "Spectrum of quenched twisted mass lattice QCD at maximal
+                  twist",
+     journal   = "Phys. Rev.",
+     volume    = "D71",
+     year      = "2005",
+     pages     = "094505",
+     eprint    = "hep-lat/0503007",
+     SLACcitation  = "%%CITATION = HEP-LAT/0503007;%%"
+}
+@Article{AbdelRehim:2004sp,
+     author    = "Abdel-Rehim, Abdou M. and Lewis, Randy",
+     title     = "Pion form factor with twisted mass QCD",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "140",
+     year      = "2005",
+     pages     = "299-301",
+     eprint    = "hep-lat/0408033",
+     SLACcitation  = "%%CITATION = HEP-LAT/0408033;%%"
+}
+@Article{AbdelRehim:2005gq,
+     author    = "Abdel-Rehim, A. M. and Lewis, R. and Woloshyn, R. M.",
+     title     = "Twisted mass lattice QCD and hadron phenomenology",
+     journal   = "Int. J. Mod. Phys.",
+     volume    = "A20",
+     year      = "2005",
+     pages     = "6159-6168",
+     SLACcitation  = "%%CITATION = IMPAE,A20,6159;%%"
+}
+@Article{AbdelRehim:2005gz,
+     author    = "Abdel-Rehim, Abdou M. and Lewis, Randy and Woloshyn, R. M.
+                  ",
+     title     = "{Spectrum of quenched twisted mass lattice QCD at maximal
+                  twist}",
+     journal   = "Phys. Rev.",
+     volume    = "D71",
+     year      = "2005",
+     pages     = "094505",
+     eprint    = "hep-lat/0503007",
+     archivePrefix = "arXiv",
+     doi       = "10.1103/PhysRevD.71.094505",
+     SLACcitation  = "%%CITATION = HEP-LAT/0503007;%%"
+}
+@Article{AbdelRehim:2005qv,
+     author    = "Abdel-Rehim, Abdou M. and Lewis, Randy and Woloshyn, R. M.
+                  ",
+     title     = "The hadron spectrum from twisted mass QCD with a strange
+                  quark",
+     journal   = "PoS",
+     volume    = "LAT2005",
+     year      = "2006",
+     pages     = "032",
+     eprint    = "hep-lat/0509056",
+     SLACcitation  = "%%CITATION = HEP-LAT/0509056;%%"
+}
+@Article{AbdelRehim:2005yx,
+     author    = "Abdel-Rehim, Abdou M. and Lewis, Randy and Woloshyn, R. M.
+                  ",
+     title     = "Maximal twist and the spectrum of quenched twisted mass
+                  lattice QCD",
+     journal   = "PoS",
+     volume    = "LAT2005",
+     year      = "2006",
+     pages     = "051",
+     eprint    = "hep-lat/0509098",
+     SLACcitation  = "%%CITATION = HEP-LAT/0509098;%%"
+}
+@Article{AbdelRehim:2006qu,
+     author    = "Abdel-Rehim, Abdou M. and Lewis, Randy and Petry, Robert G.
+                  and Woloshyn, R. M.",
+     title     = "The spectrum of tmLQCD with quark and link smearing",
+     journal   = "PoS",
+     volume    = "LAT2006",
+     year      = "2006",
+     pages     = "164",
+     eprint    = "hep-lat/0610004",
+     SLACcitation  = "%%CITATION = HEP-LAT/0610004;%%"
+}
+@Article{AbdelRehim:2006ra,
+     author    = "Abdel-Rehim, Abdou M. and Lewis, Randy and Woloshyn, R. M.
+                  and Wu, Jackson M. S.",
+     title     = "Lattice QCD with a twisted mass term and a strange quark",
+     journal   = "Eur. Phys. J.",
+     volume    = "A31",
+     year      = "2007",
+     pages     = "773-776",
+     eprint    = "hep-lat/0610090",
+     SLACcitation  = "%%CITATION = HEP-LAT/0610090;%%"
+}
+@Article{AbdelRehim:2006ve,
+     author    = "Abdel-Rehim, Abdou M. and Lewis, Randy and Woloshyn, R. M.
+                  and Wu, Jackson M. S.",
+     title     = "Strange quarks in quenched twisted mass lattice QCD",
+     journal   = "Phys. Rev.",
+     volume    = "D74",
+     year      = "2006",
+     pages     = "014507",
+     eprint    = "hep-lat/0601036",
+     SLACcitation  = "%%CITATION = HEP-LAT/0601036;%%"
+}
+@Article{Adler:1974gd,
+     author    = "Adler, Stephen L.",
+     title     = "{Some Simple Vacuum Polarization Phenomenology: e+ e- $\to$
+                  Hadrons: The mu - Mesic Atom x-Ray Discrepancy and (g-2) of
+                  the Muon}",
+     journal   = "Phys. Rev.",
+     volume    = "D10",
+     year      = "1974",
+     pages     = "3714",
+     SLACcitation  = "%%CITATION = PHRVA,D10,3714;%%"
+}
+@Article{Albanese:1987ds,
+     author    = "Albanese, M. and others",
+ collaboration = "APE",
+     title     = "Glueball masses and string tension in lattice {QCD}",
+     journal   = "Phys. Lett.",
+     volume    = "B192",
+     year      = "1987",
+     pages     = "163",
+     SLACcitation  = "%%CITATION = PHLTA,B192,163;%%"
+}
+@Article{Alexandrou:2008tn,
+     author    = "Alexandrou, C. and others",
+ collaboration = "ETM",
+     title     = "{Light baryon masses with dynamical twisted mass
+                  fermions}",
+     year      = "2008",
+     eprint    = "0803.3190",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = 0803.3190;%%"
+}
+@Article{AliKhan:2000iv,
+     author    = "Ali Khan, A. and others",
+ collaboration = "CP-PACS",
+     title     = "Chiral properties of domain-wall quarks in quenched {QCD}",
+     journal   = "Phys. Rev.",
+     volume    = "D63",
+     year      = "2001",
+     pages     = "114504",
+     eprint    = "hep-lat/0007014",
+     SLACcitation  = "%%CITATION = HEP-LAT 0007014;%%"
+}
+@Article{AliKhan:2003br,
+     author    = "Ali Khan, A. and others",
+ collaboration = "QCDSF",
+     title     = "Accelerating the hybrid Monte Carlo algorithm",
+     journal   = "Phys. Lett.",
+     volume    = "B564",
+     year      = "2003",
+     pages     = "235-240",
+     eprint    = "hep-lat/0303026",
+     SLACcitation  = "%%CITATION = HEP-LAT 0303026;%%"
+}
+@Article{AliKhan:2003mu,
+     author    = "Ali Khan, A. and others",
+     title     = "Accelerating Hasenbusch's acceleration of hybrid Monte
+                  Carlo",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "129",
+     year      = "2004",
+     pages     = "853-855",
+     eprint    = "hep-lat/0309078",
+     SLACcitation  = "%%CITATION = HEP-LAT 0309078;%%"
+}
+@Article{Allton:1993wc,
+     author    = "Allton, C. R. and others",
+ collaboration = "UK{QCD}",
+     title     = "Gauge invariant smearing and matrix correlators using
+                  {Wilson} fermions at Beta = 6.2",
+     journal   = "Phys. Rev.",
+     volume    = "D47",
+     year      = "1993",
+     pages     = "5128-5137",
+     eprint    = "hep-lat/9303009",
+     SLACcitation  = "%%CITATION = HEP-LAT 9303009;%%"
+}
+@Article{Allton:2004qq,
+     author    = "Allton, C. R. and others",
+ collaboration = "UKQCD",
+     title     = "Improved Wilson QCD simulations with light quark masses",
+     journal   = "Phys. Rev.",
+     volume    = "D70",
+     year      = "2004",
+     pages     = "014501",
+     eprint    = "hep-lat/0403007",
+     SLACcitation  = "%%CITATION = HEP-LAT/0403007;%%"
+}
+@Article{Aoki:1984qi,
+     author    = "Aoki, S.",
+     title     = "New phase structure for lattice {QCD} with {Wilson} fermions",
+     journal   = "Phys. Rev.",
+     volume    = "D30",
+     year      = "1984",
+     pages     = "2653",
+     SLACcitation  = "%%CITATION = PHRVA,D30,2653;%%"
+}
+@Article{Aoki:1985jj,
+     author    = "Aoki, S. and Higashijima, K.",
+     title     = "The recovery of the chiral symmetry in lattice {Gross-Neveu}
+                  model",
+     journal   = "Prog. Theor. Phys.",
+     volume    = "76",
+     year      = "1986",
+     pages     = "521",
+     SLACcitation  = "%%CITATION = PTPKA,76,521;%%"
+}
+@Article{Aoki:1986ua,
+     author    = "Aoki, Sinya",
+     title     = "NUMERICAL EVIDENCE FOR A PARITY VIOLATING PHASE IN LATTICE
+                  QCD WITH WILSON FERMION",
+     journal   = "Phys. Lett.",
+     volume    = "B190",
+     year      = "1987",
+     pages     = "140",
+     SLACcitation  = "%%CITATION = PHLTA,B190,140;%%"
+}
+@Article{Aoki:1986xr,
+     author    = "Aoki, S.",
+     title     = "A solution to the {U(1)} problem on a lattice",
+     journal   = "Phys. Rev. Lett.",
+     volume    = "57",
+     year      = "1986",
+     pages     = "3136",
+     SLACcitation  = "%%CITATION = PRLTA,57,3136;%%"
+}
+@Article{Aoki:1993vs,
+     author    = "Aoki, S. and Boettcher, S. and Gocksch, A.",
+     title     = "Spontaneous breaking of flavor symmetry and parity in the
+                  Nambu-Jona-Lasinio model with {Wilson} fermions",
+     journal   = "Phys. Lett.",
+     volume    = "B331",
+     year      = "1994",
+     pages     = "157-164",
+     eprint    = "hep-lat/9312084",
+     SLACcitation  = "%%CITATION = HEP-LAT 9312084;%%"
+}
+@Article{Aoki:1995ft,
+     author    = "Aoki, S.",
+     title     = "On the phase structure of {QCD} with {Wilson} fermions",
+     journal   = "Prog. Theor. Phys. Suppl.",
+     volume    = "122",
+     year      = "1996",
+     pages     = "179-186",
+     eprint    = "hep-lat/9509008",
+     SLACcitation  = "%%CITATION = HEP-LAT 9509008;%%"
+}
+@Article{Aoki:1995yf,
+     author    = "Aoki, S. and Ukawa, A. and Umemura, T.",
+     title     = "Finite temperature phase structure of lattice {QCD} with
+                  {Wilson} quark action",
+     journal   = "Phys. Rev. Lett.",
+     volume    = "76",
+     year      = "1996",
+     pages     = "873-876",
+     eprint    = "hep-lat/9508008",
+     SLACcitation  = "%%CITATION = HEP-LAT 9508008;%%"
+}
+@Article{Aoki:1997fm,
+     author    = "Aoki, S.",
+     title     = "Phase structure of lattice {QCD} with {Wilson} fermion at
+                  finite  temperature",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "60A",
+     year      = "1998",
+     pages     = "206-219",
+     eprint    = "hep-lat/9707020",
+     SLACcitation  = "%%CITATION = HEP-LAT 9707020;%%"
+}
+@Article{Aoki:2001xq,
+     author    = "Aoki, S. and others",
+ collaboration = "JL{QCD}",
+     title     = "Non-trivial phase structure of {N(f)} = 3 {QCD} with {O(a)}-
+                  improved {Wilson}  fermion at zero temperature",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "106",
+     year      = "2002",
+     pages     = "263-265",
+     eprint    = "hep-lat/0110088",
+     SLACcitation  = "%%CITATION = HEP-LAT 0110088;%%"
+}
+@Article{Aoki:2002vt,
+     author    = "Aoki, Y. and others",
+     title     = "Domain wall fermions with improved gauge actions",
+     journal   = "Phys. Rev.",
+     volume    = "D69",
+     year      = "2004",
+     pages     = "074504",
+     eprint    = "hep-lat/0211023",
+     SLACcitation  = "%%CITATION = HEP-LAT 0211023;%%"
+}
+@Article{Aoki:2004iq,
+     author    = "Aoki, S. and others",
+ collaboration = "JL{QCD}",
+     title     = "Bulk first-order phase transition in three-flavor lattice
+                  {QCD} with  {O(a)}-improved {Wilson} fermion action at zero
+                  temperature",
+     year      = "2004",
+     eprint    = "hep-lat/0409016",
+     SLACcitation  = "%%CITATION = HEP-LAT 0409016;%%"
+}
+@Article{Aoki:2004ta,
+     author    = "Aoki, Sinya and B{\"a}r, Oliver",
+     title     = "Twisted-mass {QCD}, {O}(a) improvement and {Wilson} chiral
+                  perturbation  theory",
+     journal   = "Phys. Rev.",
+     volume    = "D70",
+     year      = "2004",
+     pages     = "116011",
+     eprint    = "hep-lat/0409006",
+     SLACcitation  = "%%CITATION = HEP-LAT 0409006;%%"
+}
+@Article{Aoki:2005ii,
+     author    = "Aoki, S. and B{\"a}r, O.",
+     title     = "Determining the low energy parameters of {Wilson} chiral
+                  perturbation theory",
+     year      = "2005",
+     eprint    = "hep-lat/0509002",
+     SLACcitation  = "%%CITATION = HEP-LAT 0509002;%%"
+}
+@Article{Arnold:2003sx,
+     author    = "Arnold, Guido and others",
+     title     = "Numerical methods for the QCD overlap operator. II: Optimal
+                  Krylov subspace methods",
+     year      = "2003",
+     eprint    = "hep-lat/0311025",
+     SLACcitation  = "%%CITATION = HEP-LAT 0311025;%%"
+}
+@Article{Atiyah:1971rm,
+     author    = "Atiyah, M. F. and Singer, I. M.",
+     title     = "The Index of elliptic operators. 5",
+     journal   = "Annals Math.",
+     volume    = "93",
+     year      = "1971",
+     pages     = "139-149",
+     SLACcitation  = "%%CITATION = ANMAA,93,139;%%"
+}
+@Article{Aubin:2006cc,
+     author    = "Aubin, C. and Blum, T.",
+     title     = "{Hadronic contributions to the muon g-2 from the lattice}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "162",
+     year      = "2006",
+     pages     = "251-255",
+     SLACcitation  = "%%CITATION = NUPHZ,162,251;%%"
+}
+@Article{Aubin:2006xv,
+     author    = "Aubin, C. and Blum, T.",
+     title     = "{Calculating the hadronic vacuum polarization and leading
+                  hadronic  contribution to the muon anomalous magnetic
+                  moment with improved  staggered quarks}",
+     journal   = "Phys. Rev.",
+     volume    = "D75",
+     year      = "2007",
+     pages     = "114502",
+     eprint    = "hep-lat/0608011",
+     SLACcitation  = "%%CITATION = HEP-LAT/0608011;%%"
+}
+@Article{BAGEL,
+ author="P.A. Boyle",
+ year=2005,
+ eprint=" http://www.ph.ed.ac.uk/\~{ }paboyle/bagel/Bagel.html"
+ }
+@Article{Baikov:2004ku,
+     author    = "Baikov, P. A. and Chetyrkin, K. G. and K{\"u}hn, J. H.",
+     title     = "{Vacuum polarization in pQCD: First complete O(alpha(s)**4)
+                  result}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "135",
+     year      = "2004",
+     pages     = "243-246",
+     SLACcitation  = "%%CITATION = NUPHZ,135,243;%%"
+}
+@Article{Baikov:2005rw,
+     author    = "Baikov, P. A. and Chetyrkin, K. G. and K{\"u}hn, J. H.",
+     title     = "{Scalar correlator at O(alpha(s)**4), Higgs decay into b-
+                  quarks and  bounds on the light quark masses}",
+     journal   = "Phys. Rev. Lett.",
+     volume    = "96",
+     year      = "2006",
+     pages     = "012003",
+     eprint    = "hep-ph/0511063",
+     SLACcitation  = "%%CITATION = HEP-PH/0511063;%%"
+}
+@Article{Baikov:2008jh,
+     author    = "Baikov, P. A. and Chetyrkin, K. G. and K{\"u}hn, J. H.",
+     title     = "{Hadronic Z- and tau-Decays in Order alpha_s^4}",
+     year      = "2008",
+     eprint    = "0801.1821",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = ARXIV:0801.1821;%%"
+}
+@Article{Bali:2000vr,
+     author    = "Bali, G. S. and others",
+ collaboration = "TXL",
+     title     = "Static potentials and glueball masses from {QCD} simulations
+                  with {Wilson}  sea quarks",
+     journal   = "Phys. Rev.",
+     volume    = "D62",
+     year      = "2000",
+     pages     = "054503",
+     eprint    = "hep-lat/0003012",
+     SLACcitation  = "%%CITATION = HEP-LAT 0003012;%%"
+}
+@Article{Bali:2004pb,
+     author    = "Bali, G. S. and others",
+     title     = "String breaking with dynamical {Wilson} fermions",
+     journal   = "Nucl. Phys. Proc. Supl.",
+     volume    = "140",
+     pages     = "609-611",
+     year      = "2004",
+     eprint    = "hep-lat/0409137",
+     SLACcitation  = "%%CITATION = HEP-LAT 0409137;%%"
+}
+@Article{Bali:2005fu,
+     author    = "Bali, G. S. and Neff, H. and Duessel, T. and
+                  Lippert, T. and Schilling, K.",
+ collaboration = "SESAM",
+     title     = "Observation of string breaking in {QCD}",
+     journal   = "Phys. Rev.",
+     volume    = "D71",
+     year      = "2005",
+     pages     = "114513",
+     eprint    = "hep-lat/0505012",
+     SLACcitation  = "%%CITATION = HEP-LAT 0505012;%%"
+}
+@Article{Bar:2006zj,
+     author    = "B{\"a}r, O. and Jansen, K. and Schaefer, S. and Scorzato, L.
+                  and Shindler, A.",
+     title     = "Overlap fermions on a twisted mass sea",
+     year      = "2006",
+     eprint    = "hep-lat/0609039",
+     SLACcitation  = "%%CITATION = HEP-LAT 0609039;%%"
+}
+@Article{Baxter:1993bv,
+     author    = "Baxter, R. M. and others",
+ collaboration = "UK{QCD}",
+     title     = "Quenched heavy light decay constants",
+     journal   = "Phys. Rev.",
+     volume    = "D49",
+     year      = "1994",
+     pages     = "1594-1605",
+     eprint    = "hep-lat/9308020",
+     SLACcitation  = "%%CITATION = HEP-LAT 9308020;%%"
+}
+@Article{Beane:2004tw,
+     author    = "Beane, Silas R.",
+     title     = "{Nucleon masses and magnetic moments in a finite volume}",
+     journal   = "Phys. Rev.",
+     volume    = "D70",
+     year      = "2004",
+     pages     = "034507",
+     eprint    = "hep-lat/0403015",
+     archivePrefix = "arXiv",
+     doi       = "10.1103/PhysRevD.70.034507",
+     SLACcitation  = "%%CITATION = HEP-LAT/0403015;%%"
+}
+@Article{Becher:1999he,
+     author    = "Becher, Thomas and Leutwyler, H.",
+     title     = "Baryon chiral perturbation theory in manifestly Lorentz
+                  invariant form",
+     journal   = "Eur. Phys. J.",
+     volume    = "C9",
+     year      = "1999",
+     pages     = "643-671",
+     eprint    = "hep-ph/9901384",
+     SLACcitation  = "%%CITATION = HEP-PH/9901384;%%"
+}
+@Article{Bietenholz:2004sa,
+     author    = "Bietenholz, W. and others",
+ collaboration = "\xlf",
+     title     = "Comparison between overlap and twisted mass fermions
+                  towards the chiral  limit",
+     year      = "2004",
+     eprint    = "hep-lat/0409109",
+     SLACcitation  = "%%CITATION = HEP-LAT 0409109;%%"
+}
+@Article{Bietenholz:2004wv,
+     author    = "Bietenholz, W. and others",
+ collaboration = "\xlf",
+     title     = "Going chiral: Overlap versus twisted mass fermions",
+     journal   = "JHEP",
+     volume    = "12",
+     year      = "2004",
+     pages     = "044",
+     eprint    = "hep-lat/0411001",
+     SLACcitation  = "%%CITATION = HEP-LAT 0411001;%%"
+}
+@Article{Blossier:2007vv,
+     author    = "Blossier, B. and others",
+ collaboration = "ETM",
+     title     = "{Light quark masses and pseudoscalar decay constants from
+                  Nf=2 Lattice QCD with twisted mass fermions}",
+     year      = "2007",
+     eprint    = "0709.4574",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = ARXIV:0709.4574;%%"
+}
+@Article{Blum:1994eh,
+     author    = "Blum, Tom and others",
+     title     = "QCD thermodynamics with Wilson quarks at large kappa",
+     journal   = "Phys. Rev.",
+     volume    = "D50",
+     year      = "1994",
+     pages     = "3377-3381",
+     eprint    = "hep-lat/9404006",
+     SLACcitation  = "%%CITATION = HEP-LAT 9404006;%%"
+}
+@Article{Blum:2000kn,
+     author    = "Blum, T. and others",
+     title     = "Quenched lattice {QCD} with domain wall fermions and the
+                  chiral limit",
+     journal   = "Phys. Rev.",
+     volume    = "D69",
+     year      = "2004",
+     pages     = "074502",
+     eprint    = "hep-lat/0007038",
+     SLACcitation  = "%%CITATION = HEP-LAT 0007038;%%"
+}
+@Article{Bodin:2005gg,
+     author    = "Bodin, F. and others",
+ collaboration = "ApeNEXT",
+     title     = "The {apeNEXT} project",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "140",
+     year      = "2005",
+     pages     = "176-182",
+     SLACcitation  = "%%CITATION = NUPHZ,140,176;%%"
+}
+@Article{Bolder:2000un,
+     author    = "Bolder, B. and others",
+     title     = "A high precision study of the Q anti-Q potential from
+                  {Wilson} loops in  the regime of string breaking",
+     journal   = "Phys. Rev.",
+     volume    = "D63",
+     year      = "2001",
+     pages     = "074504",
+     eprint    = "hep-lat/0005018",
+     SLACcitation  = "%%CITATION = HEP-LAT 0005018;%%"
+}
+@Article{Boucaud:2007uk,
+     author    = "Boucaud, Ph. and others",
+ collaboration = "ETM",
+     title     = "Dynamical twisted mass fermions with light quarks",
+     year      = "2007",
+     eprint    = "hep-lat/0701012",
+     SLACcitation  = "%%CITATION = HEP-LAT 0701012;%%"
+}
+@Article{Boucaud:2008xu,
+     author    = "Boucaud, Ph. and others",
+ collaboration = "ETM",
+     title     = "{Dynamical Twisted Mass Fermions with Light Quarks:
+                  Simulation and Analysis Details}",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "179",
+     year      = "2008",
+     pages     = "695-715",
+     eprint    = "0803.0224",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     doi       = "10.1016/j.cpc.2008.06.013",
+     SLACcitation  = "%%CITATION = 0803.0224;%%"
+}
+@Article{Boughezal:2006px,
+     author    = "Boughezal, R. and Czakon, M. and Schutzmeier, T.",
+     title     = "{Charm and bottom quark masses from perturbative QCD}",
+     journal   = "Phys. Rev.",
+     volume    = "D74",
+     year      = "2006",
+     pages     = "074006",
+     eprint    = "hep-ph/0605023",
+     SLACcitation  = "%%CITATION = HEP-PH/0605023;%%"
+}
+@Article{Boyle:2005fb,
+     author    = "Boyle, P. A. and others",
+     title     = "{QCDOC}: Project status and first results",
+     journal   = "J. Phys. Conf. Ser.",
+     volume    = "16",
+     year      = "2005",
+     pages     = "129-139",
+     SLACcitation  = "%%CITATION = 00462,16,129;%%"
+}
+@Article{Brower:1994er,
+     author    = "Brower, R. C. and Levi, A. R. and Orginos, K.",
+     title     = "Extrapolation methods for the Dirac inverter in hybrid
+                  Monte Carlo",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "42",
+     year      = "1995",
+     pages     = "855-857",
+     eprint    = "hep-lat/9412004",
+     SLACcitation  = "%%CITATION = HEP-LAT 9412004;%%"
+}
+
+@Article{Brower:1995vx,
+     author    = "Brower, R. C. and Ivanenko, T. and Levi, A. R. and Orginos,
+                  K. N.",
+     title     = "Chronological inversion method for the Dirac matrix in
+                  hybrid Monte  Carlo",
+     journal   = "Nucl. Phys.",
+     volume    = "B484",
+     year      = "1997",
+     pages     = "353-374",
+     eprint    = "hep-lat/9509012",
+     SLACcitation  = "%%CITATION = HEP-LAT 9509012;%%"
+}
+
+@Article{Bunk:1995uv,
+     author    = "Bunk, B. and others",
+     title     = "A New simulation algorithm for lattice {QCD} with dynamical
+                  quarks",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "42",
+     year      = "1995",
+     pages     = "49-55",
+     eprint    = "hep-lat/9411016",
+     SLACcitation  = "%%CITATION = HEP-LAT 9411016;%%"
+}
+@Article{Bunk:1998rm,
+     author    = "Bunk, B. and Elser, Stephan and Frezzotti, R. and Jansen,
+                  K.",
+     title     = "{Ordering monomial factors of polynomials in the product
+                  representation}",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "118",
+     year      = "1999",
+     pages     = "95-109",
+     eprint    = "hep-lat/9805026",
+     archivePrefix = "arXiv",
+     doi       = "10.1016/S0010-4655(99)00198-8",
+     SLACcitation  = "%%CITATION = HEP-LAT/9805026;%%"
+}
+@Article{Bunk:1998rm,
+     author    = "Bunk, B. and Elser, S. and Frezzotti, R. and Jansen,
+                  K.",
+     title     = "Ordering monomial factors of polynomials in the product
+                  representation",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "118",
+     year      = "1999",
+     pages     = "95-109",
+     eprint    = "hep-lat/9805026",
+     SLACcitation  = "%%CITATION = HEP-LAT 9805026;%%"
+}
+@Article{Burrage:1998a,
+  author       = " K. Burrage and J. Erhel",
+  title        = "On the performance of various adaptive preconditioned GMRES strategies",
+  journal      = "Num. Lin. Alg. with Appl.",
+  year         = "1998",
+  volume       = "5",
+  pages        = "101-121"
+}
+@Article{Campbell:1987nv,
+     author    = "Campbell, N. A. and Huntley, A. and Michael, C.",
+     title     = "Heavy quark potentials and hybrid mesons from SU(3) lattice
+                  gauge theory",
+     journal   = "Nucl. Phys.",
+     volume    = "B306",
+     year      = "1988",
+     pages     = "51",
+     SLACcitation  = "%%CITATION = NUPHA,B306,51;%%"
+}
+@Article{Capitani:2005jp,
+     author    = "Capitani, S. and others",
+     title     = "Parton distribution functions with twisted mass fermions",
+     journal   = "Phys. Lett.",
+     volume    = "B639",
+     year      = "2006",
+     pages     = "520-526",
+     eprint    = "hep-lat/0511013",
+     SLACcitation  = "%%CITATION = HEP-LAT 0511013;%%"
+}
+@Article{Chen:2003im,
+     author    = "Chen, Y. and others",
+     title     = "Chiral logarithms in quenched {QCD}",
+     journal   = "Phys. Rev.",
+     volume    = "D70",
+     year      = "2004",
+     pages     = "034502",
+     eprint    = "hep-lat/0304005",
+     SLACcitation  = "%%CITATION = HEP-LAT 0304005;%%"
+}
+@Book{Cheng:2000ct,
+     author    = "Cheng, T. P. and Li, L. F.",
+     title     = "Gauge theory of elementary particle physics: Problems and
+                  solutions",
+     publisher = "Oxford, UK: Clarendon",
+     year      = "2000",
+     pages     = "306",
+     edition   = "",
+}
+@Article{Chetyrkin:1990kr,
+     author    = "Chetyrkin, K. G. and K{\"u}hn, Johann H.",
+     title     = "{Mass corrections to the Z decay rate}",
+     journal   = "Phys. Lett.",
+     volume    = "B248",
+     year      = "1990",
+     pages     = "359-364",
+     SLACcitation  = "%%CITATION = PHLTA,B248,359;%%"
+}
+@Article{Chetyrkin:1996cf,
+     author    = "Chetyrkin, K. G. and K{\"u}hn, Johann H. and Steinhauser, M.",
+     title     = "{Three-loop polarization function and O(alpha(s)**2)
+                  corrections to the  production of heavy quarks}",
+     journal   = "Nucl. Phys.",
+     volume    = "B482",
+     year      = "1996",
+     pages     = "213-240",
+     eprint    = "hep-ph/9606230",
+     SLACcitation  = "%%CITATION = HEP-PH/9606230;%%"
+}
+@Article{Chetyrkin:1997mb,
+     author    = "Chetyrkin, K. G. and K{\"u}hn, Johann H. and Steinhauser, M.",
+     title     = "{Heavy quark current correlators to O(alpha(s)**2)}",
+     journal   = "Nucl. Phys.",
+     volume    = "B505",
+     year      = "1997",
+     pages     = "40-64",
+     eprint    = "hep-ph/9705254",
+     SLACcitation  = "%%CITATION = HEP-PH/9705254;%%"
+}
+@Article{Chetyrkin:1998ix,
+     author    = "Chetyrkin, K. G. and Harlander, R. and Steinhauser, M.",
+     title     = "{Singlet polarization functions at O(alpha(s)**2)}",
+     journal   = "Phys. Rev.",
+     volume    = "D58",
+     year      = "1998",
+     pages     = "014012",
+     eprint    = "hep-ph/9801432",
+     SLACcitation  = "%%CITATION = HEP-PH/9801432;%%"
+}
+@Article{Chetyrkin:2000zk,
+     author    = "Chetyrkin, K. G. and Harlander, R. V. and K{\"u}hn, Johann H.",
+     title     = "{Quartic mass corrections to R(had) at O(alpha(s)**3)}",
+     journal   = "Nucl. Phys.",
+     volume    = "B586",
+     year      = "2000",
+     pages     = "56-72",
+     eprint    = "hep-ph/0005139",
+     SLACcitation  = "%%CITATION = HEP-PH/0005139;%%"
+}
+@Article{Chetyrkin:2006xg,
+     author    = "Chetyrkin, K. G. and K{\"u}hn, J. H. and Sturm, C.",
+     title     = "{Four-loop moments of the heavy quark vacuum polarization
+                  function in  perturbative QCD}",
+     journal   = "Eur. Phys. J.",
+     volume    = "C48",
+     year      = "2006",
+     pages     = "107-110",
+     eprint    = "hep-ph/0604234",
+     SLACcitation  = "%%CITATION = HEP-PH/0604234;%%"
+}
+@Article{Chiarappa:2004ry,
+     author    = "Chiarappa, T. and others",
+     title     = "{Comparing iterative methods for overlap and twisted mass
+                   fermions}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "140",
+     year      = "2005",
+     pages     = "853-855",
+     eprint    = "hep-lat/0409107",
+     archivePrefix = "arXiv",
+     doi       = "10.1016/j.nuclphysbps.2004.11.281",
+     SLACcitation  = "%%CITATION = HEP-LAT/0409107;%%"
+}
+@Article{Chiarappa:2006ae,
+     author    = "Chiarappa, T. and others",
+     title     = "{Numerical simulation of {QCD} with u, d, s and c quarks in
+                  the twisted-mass {W}ilson formulation}",
+     journal   = "Eur. Phys. J.",
+     volume    = "C50",
+     year      = "2007",
+     pages     = "373-383",
+     eprint    = "hep-lat/0606011",
+     archivePrefix = "arXiv",
+     doi       = "10.1140/epjc/s10052-006-0204-4",
+     SLACcitation  = "%%CITATION = HEP-LAT/0606011;%%"
+}
+@Article{Chiarappa:2006hz,
+     author    = "Chiarappa, T. and others",
+     title     = "{Iterative methods for overlap and twisted mass fermions}",
+     year      = "2008",
+     journal   = "Comput. Sci. Disc.",
+     volume    = "01",
+     pages     = "015001",
+     eprint    = "hep-lat/0609023",
+     archivePrefix = "arXiv",
+     SLACcitation  = "%%CITATION = HEP-LAT/0609023;%%"
+}
+@Article{Cichy:2008gk,
+     author    = "Cichy, K. and Gonzalez Lopez, J. and Jansen, K. and Kujawa,
+                  A. and Shindler, A.",
+     title     = "{Twisted Mass, Overlap and Creutz Fermions: Cut-off Effects
+                  at Tree-level of Perturbation Theory}",
+     journal   = "Nucl. Phys.",
+     volume    = "B800",
+     year      = "2008",
+     pages     = "94-108",
+     eprint    = "0802.3637",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     doi       = "10.1016/j.nuclphysb.2008.03.004",
+     SLACcitation  = "%%CITATION = 0802.3637;%%"
+}
+@Article{Clark:2004cq,
+     author    = "Clark, M. A. and Kennedy, A. D.",
+     title     = "Accelerating fermionic molecular dynamics",
+     year      = "2004",
+     eprint    = "hep-lat/0409134",
+     SLACcitation  = "%%CITATION = HEP-LAT 0409134;%%"
+}
+
+@Article{Clark:2005sq,
+     author    = "Clark, M. A. and de Forcrand, Ph. and Kennedy, A. D.",
+     title     = "Algorithm shootout: R versus RHMC",
+     journal   = "PoS",
+     volume    = "LAT2005",
+     year      = "2005",
+     pages     = "115",
+     eprint    = "hep-lat/0510004",
+     SLACcitation  = "%%CITATION = HEP-LAT 0510004;%%"
+}
+@Article{Clark:2006fx,
+     author    = "Clark, M. A. and Kennedy, A. D.",
+     title     = "{Accelerating Dynamical Fermion Computations using the
+                  Rational Hybrid Monte Carlo (RHMC) Algorithm with Multiple
+                  Pseudofermion Fields}",
+     journal   = "Phys. Rev. Lett.",
+     volume    = "98",
+     year      = "2007",
+     pages     = "051601",
+     eprint    = "hep-lat/0608015",
+     archivePrefix = "arXiv",
+     doi       = "10.1103/PhysRevLett.98.051601",
+     SLACcitation  = "%%CITATION = HEP-LAT/0608015;%%"
+}
+@Article{Clark:2006wp,
+     author    = "Clark, M. A. and Kennedy, A. D.",
+     title     = "{Accelerating Staggered Fermion Dynamics with the Rational
+                  Hybrid Monte Carlo (RHMC) Algorithm}",
+     journal   = "Phys. Rev.",
+     volume    = "D75",
+     year      = "2007",
+     pages     = "011502",
+     eprint    = "hep-lat/0610047",
+     archivePrefix = "arXiv",
+     doi       = "10.1103/PhysRevD.75.011502",
+     SLACcitation  = "%%CITATION = HEP-LAT/0610047;%%"
+}
+@Article{Colangelo:2001df,
+     author    = "Colangelo, G. and Gasser, J. and Leutwyler, H.",
+     title     = "{pi pi scattering}",
+     journal   = "Nucl. Phys.",
+     volume    = "B603",
+     year      = "2001",
+     pages     = "125-179",
+     eprint    = "hep-ph/0103088",
+     archivePrefix = "arXiv",
+     doi       = "10.1016/S0550-3213(01)00147-X",
+     SLACcitation  = "%%CITATION = HEP-PH/0103088;%%"
+}
+@Article{Colangelo:2003hf,
+     author    = "Colangelo, Gilberto and D{\"u}rr, Stephan",
+     title     = "The pion mass in finite volume",
+     journal   = "Eur. Phys. J.",
+     volume    = "C33",
+     year      = "2004",
+     pages     = "543-553",
+     eprint    = "hep-lat/0311023",
+     SLACcitation  = "%%CITATION = HEP-LAT/0311023;%%"
+}
+@Article{Colangelo:2005gd,
+     author    = "Colangelo, Gilberto and D{\"u}rr, Stephan and Haefeli,
+                  Christoph",
+     title     = "Finite volume effects for meson masses and decay
+                  constants",
+     journal   = "Nucl. Phys.",
+     volume    = "B721",
+     year      = "2005",
+     pages     = "136-174",
+     eprint    = "hep-lat/0503014",
+     SLACcitation  = "%%CITATION = HEP-LAT 0503014;%%"
+}
+@Article{Colangelo:2006mp,
+     author    = "Colangelo, Gilberto and Haefeli, Christoph",
+     title     = "{Finite volume effects for the pion mass at two loops}",
+     journal   = "Nucl. Phys.",
+     volume    = "B744",
+     year      = "2006",
+     pages     = "14-33",
+     eprint    = "hep-lat/0602017",
+     archivePrefix = "arXiv",
+     doi       = "10.1016/j.nuclphysb.2006.03.010",
+     SLACcitation  = "%%CITATION = HEP-LAT/0602017;%%"
+}
+@Book{Collins:1994ab,
+     author    = "Collins, J.C.",
+     title     = "Renormalisation",
+     publisher = "Cambridge University Press",
+     series    = "Cambridge Monographs on Mathematical Physics",
+     year      = "1994",
+     edition   = "",
+}
+@Article{Creutz:1984fj,
+     author    = "Creutz, M. and Gocksch, A. and Ogilvie, M. and
+                  Okawa, M.",
+     title     = "Microcanonical renormalization group",
+     journal   = "Phys. Rev. Lett.",
+     volume    = "53",
+     year      = "1984",
+     pages     = "875",
+     SLACcitation  = "%%CITATION = PRLTA,53,875;%%"
+}
+@Article{Creutz:1989wt,
+     author    = "Creutz, M. and Gocksch, A.",
+     title     = "Higher order hybrid monte carlo algorithms",
+     note     = "BNL-42601"
+}
+@Article{Creutz:1996bg,
+     author    = "Creutz, Michael",
+     title     = "Wilson fermions at finite temperature",
+     year      = "1996",
+     eprint    = "hep-lat/9608024",
+     SLACcitation  = "%%CITATION = HEP-LAT 9608024;%%"
+}
+@Article{Creutz:1998ee,
+     author    = "Creutz, M.",
+     title     = "Evaluating Grassmann integrals",
+     journal   = "Phys. Rev. Lett.",
+     volume    = "81",
+     year      = "1998",
+     pages     = "3555-3558",
+     eprint    = "hep-lat/9806037",
+     SLACcitation  = "%%CITATION = HEP-LAT 9806037;%%"
+}
+@Article{Cundy:2005pi,
+     author    = "Cundy, N. and others",
+     title     = "Numerical Methods for the {QCD} Overlap Operator IV: Hybrid
+                  Monte Carlo",
+     year      = "2005",
+     eprint    = "hep-lat/0502007",
+     SLACcitation  = "%%CITATION = HEP-LAT 0502007;%%"
+}
+@Article{David:1984ys,
+     author    = "David, F. and Hamber, H. W.",
+     title     = "Chiral condensate with {Wilson} fermions",
+     journal   = "Nucl. Phys.",
+     volume    = "B248",
+     year      = "1984",
+     pages     = "381",
+     SLACcitation  = "%%CITATION = NUPHA,B248,381;%%"
+}
+@Article{Davies:2008sw,
+     author    = "Davies, C. T. H. and others",
+ collaboration = "HPQCD",
+     title     = "{Update: Accurate Determinations of $\alpha_s$ from
+                  Realistic Lattice QCD}",
+     year      = "2008",
+     eprint    = "0807.1687",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = 0807.1687;%%"
+}
+@Article{DeGrand:1990dk,
+     author    = "DeGrand, T. A. and Rossi, P.",
+     title     = "Conditioning techniques for dynamical fermions",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "60",
+     year      = "1990",
+     pages     = "211-214",
+     SLACcitation  = "%%CITATION = CPHCB,60,211;%%"
+}
+@Article{DeGrand:1990ip,
+     author    = "DeGrand, T. A.",
+     title     = "Resonance masses from Monte Carlo simulations (with
+                  emphasis on the rho meson)",
+     journal   = "Phys. Rev.",
+     volume    = "D43",
+     year      = "1991",
+     pages     = "2296-2300",
+     SLACcitation  = "%%CITATION = PHRVA,D43,2296;%%"
+}
+@Article{DeGrand:2002vu,
+     author    = "DeGrand, Thomas and Hasenfratz, Anna and Kovacs, Tamas G.",
+     title     = "Improving the chiral properties of lattice fermions",
+     journal   = "Phys. Rev.",
+     volume    = "D67",
+     year      = "2003",
+     pages     = "054501",
+     eprint    = "hep-lat/0211006",
+     SLACcitation  = "%%CITATION = HEP-LAT 0211006;%%"
+}
+@Article{DeTar:2007ni,
+     author    = "DeTar, Carleton and Levkova, L.",
+     title     = "Effects of the disconnected flavor singlet corrections on
+                  the hyperfine splitting in charmonium",
+     journal   = "PoS",
+     volume    = "LAT2007",
+     year      = "2007",
+     pages     = "116",
+     eprint    = "0710.1322",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = ARXIV:0710.1322;%%"
+}
+@Article{DelDebbio:2006cn,
+     author    = "Del Debbio, L. and Giusti, L. and L{\"u}scher, M. and
+                  Petronzio, R. and Tantalo, N.",
+     title     = "QCD with light Wilson quarks on fine lattices. I: First
+                  experiences and physics results",
+     journal   = "JHEP",
+     volume    = "02",
+     year      = "2007",
+     pages     = "056",
+     eprint    = "hep-lat/0610059",
+     SLACcitation  = "%%CITATION = HEP-LAT 0610059;%%"
+}
+@Article{DellaMorte:2000yp,
+     author    = "Della Morte, M. and Frezzotti, R. and Heitger, J. and Sint,
+                  S.",
+     title     = "Non-perturbative scaling tests of twisted mass {QCD}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "94",
+     year      = "2001",
+     pages     = "617-621",
+     eprint    = "hep-lat/0010091",
+     SLACcitation  = "%%CITATION = HEP-LAT 0010091;%%"
+}
+@Article{DellaMorte:2001tu,
+     author    = "Della Morte, M. and Frezzotti, R. and Heitger, J.",
+     title     = "Quenched twisted mass {QCD} at small quark masses and in
+                  large volume",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "106",
+     year      = "2002",
+     pages     = "260-262",
+     eprint    = "hep-lat/0110166",
+     SLACcitation  = "%%CITATION = HEP-LAT 0110166;%%"
+}
+
+@Article{DellaMorte:2001ys,
+     author    = "Della Morte, M. and Frezzotti, R. and Heitger,
+                  J. and Sint, S.",
+ collaboration = "ALPHA",
+     title     = "Cutoff effects in twisted mass lattice {QCD}",
+     journal   = "JHEP",
+     volume    = "10",
+     year      = "2001",
+     pages     = "041",
+     eprint    = "hep-lat/0108019",
+     SLACcitation  = "%%CITATION = HEP-LAT 0108019;%%"
+}                                                                               
+@Article{DellaMorte:2003jj,
+     author    = "Della Morte, M. and others",
+ collaboration = "ALPHA",
+     title     = "Simulating the Schroedinger functional with two pseudo-
+                  fermions",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "156",
+     year      = "2003",
+     pages     = "62-72",
+     eprint    = "hep-lat/0307008",
+     SLACcitation  = "%%CITATION = HEP-LAT 0307008;%%"
+}                                                                               
+@Article{DellaMorte:2003mn,
+     author    = "Della Morte, M. and others",
+ collaboration = "ALPHA",
+     title     = "Lattice HQET with exponentially improved statistical
+                  precision",
+     journal   = "Phys. Lett.",
+     volume    = "B581",
+     year      = "2004",
+     pages     = "93-98",
+     eprint    = "hep-lat/0307021",
+     SLACcitation  = "%%CITATION = HEP-LAT 0307021;%%"
+}             
+@Article{DellaMorte:2003mw,
+     author    = "Della Morte, M. and others",
+ collaboration = "ALPHA",
+     title     = "Static quarks with improved statistical precision",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "129",
+     year      = "2004",
+     pages     = "346-348",
+     eprint    = "hep-lat/0309080",
+     SLACcitation  = "%%CITATION = HEP-LAT 0309080;%%"
+}                                                                  
+@Article{DellaMorte:2005yc,
+     author    = "Della Morte, M. and Shindler, A. and Sommer,
+                  R.",
+     title     = "On lattice actions for static quarks",
+     year      = "2005",
+     eprint    = "hep-lat/0506008",
+     SLACcitation  = "%%CITATION = HEP-LAT 0506008;%%"
+}
+@Article{Dimopoulos:2006dm,
+     author    = "Dimopoulos, P. and others",
+ collaboration = "ALPHA",
+     title     = "A precise determination of B(K) in quenched QCD",
+     journal   = "Nucl. Phys.",
+     volume    = "B749",
+     year      = "2006",
+     pages     = "69-108",
+     eprint    = "hep-ph/0601002",
+     SLACcitation  = "%%CITATION = HEP-PH 0601002;%%"
+}
+@Article{Dimopoulos:2007fn,
+     author    = "Dimopoulos, P. and others",
+     title     = "{Renormalisation of quark bilinears with Nf=2 Wilson
+                  fermions and tree-level improved gauge action}",
+     journal   = "PoS",
+     volume    = "LAT2007",
+     year      = "2007",
+     pages     = "241",
+     eprint    = "0710.0975",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = 0710.0975;%%"
+}
+@Article{Dimopoulos:2007qy,
+     author    = "Dimopoulos, Petros and Frezzotti, Roberto and Herdoiza,
+                  Gregorio and Urbach, Carsten and Wenger, Urs",
+ collaboration = "ETM",
+     title     = "{Scaling and low energy constants in lattice QCD with N_f=2
+                  maximally twisted Wilson quarks}",
+     journal   = "PoS",
+     volume    = "LAT2007",
+     year      = "2007",
+     pages     = "102",
+     eprint    = "0710.2498",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = 0710.2498;%%"
+}
+@Article{Dimopoulos:2008sy,
+     author    = "Dimopoulos, Petros and others",
+ collaboration = "ETM",
+     title     = "{Scaling and chiral extrapolation of pion mass and decay
+                  constant with maximally twisted mass QCD}",
+     year      = "2008",
+     eprint    = "0810.2873",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = 0810.2873;%%"
+}
+@Article{Dong:2001fm,
+     author    = "Dong, S. J. and others",
+     title     = "Chiral properties of pseudoscalar mesons on a quenched
+                  20**4 lattice  with overlap fermions",
+     journal   = "Phys. Rev.",
+     volume    = "D65",
+     year      = "2002",
+     pages     = "054507",
+     eprint    = "hep-lat/0108020",
+     SLACcitation  = "%%CITATION = HEP-LAT 0108020;%%"
+}
+@Article{Duane:1987de,
+     author    = "Duane, S. and Kennedy, A. D. and Pendleton, B. J. and
+                  Roweth, D.",
+     title     = "{H}ybrid monte carlo",
+     journal   = "Phys. Lett.",
+     volume    = "B195",
+     year      = "1987",
+     pages     = "216-222",
+     SLACcitation  = "%%CITATION = PHLTA,B195,216;%%"
+}
+@Article{Edwards:1996vs,
+     author    = "Edwards, R. G. and Horvath, I. and Kennedy, A. D.",
+     title     = "Instabilities and non-reversibility of molecular dynamics
+                  trajectories",
+     journal   = "Nucl. Phys.",
+     volume    = "B484",
+     year      = "1997",
+     pages     = "375-402",
+     eprint    = "hep-lat/9606004",
+     SLACcitation  = "%%CITATION = HEP-LAT 9606004;%%"
+}
+@Article{Edwards:2004sx,
+     author    = "Edwards, Robert G. and Joo, Balint",
+ collaboration = "SciDAC",
+     title     = "The {Chroma} software system for lattice {QCD}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "140",
+     year      = "2005",
+     pages     = "832",
+     eprint    = "hep-lat/0409003",
+     SLACcitation  = "%%CITATION = HEP-LAT 0409003;%%"
+}
+@Article{Eichten:1989zv,
+     author    = "Eichten, E. and Hill, B.",
+     title     = "An effective field theory for the calculation of matrix
+                  elements involving heavy quarks",
+     journal   = "Phys. Lett.",
+     volume    = "B234",
+     year      = "1990",
+     pages     = "511",
+     SLACcitation  = "%%CITATION = PHLTA,B234,511;%%"
+}
+@Article{Farchioni:2002vn,
+     author    = "Farchioni, F. and Gebert, C. and Montvay, I.
+                  and Scorzato, L.",
+     title     = "Numerical simulation tests with light dynamical quarks",
+     journal   = "Eur. Phys. J.",
+     volume    = "C26",
+     year      = "2002",
+     pages     = "237-251",
+     eprint    = "hep-lat/0206008",
+     SLACcitation  = "%%CITATION = HEP-LAT 0206008;%%"
+}
+@Article{Farchioni:2004fs,
+     author    = "Farchioni, F. and others",
+     title     = "The phase structure of lattice {QCD} with {Wilson} quarks and
+                  renormalization group improved gluons",
+     journal   = "Eur. Phys. J.",
+     volume    = "C42",
+     year      = "2005",
+     pages     = "73-87",
+     eprint    = "hep-lat/0410031",
+     SLACcitation  = "%%CITATION = HEP-LAT 0410031;%%"
+}
+@Article{Farchioni:2004ma,
+     author    = "Farchioni, F. and others",
+     title     = "Exploring the phase structure of lattice {{QCD}} with twisted
+                  mass quarks",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "140",
+     year      = "2005",
+     pages     = "240-245",
+     eprint    = "hep-lat/0409098",
+     SLACcitation  = "%%CITATION = HEP-LAT 0409098;%%"
+}
+@Article{Farchioni:2004us,
+     author    = "Farchioni, F. and others",
+     title     = "Twisted mass quarks and the phase structure of lattice
+                  {QCD}",
+     journal   = "Eur. Phys. J.",
+     volume    = "C39",
+     year      = "2005",
+     pages     = "421-433",
+     eprint    = "hep-lat/0406039",
+     SLACcitation  = "%%CITATION = HEP-LAT 0406039;%%"
+}
+@Article{Farchioni:2005ec,
+     author    = "Farchioni, Federico and others",
+     title     = "Dynamical twisted mass fermions",
+     journal   = "PoS",
+     volume    = "LAT2005",
+     year      = "2006",
+     pages     = "072",
+     eprint    = "hep-lat/0509131",
+     SLACcitation  = "%%CITATION = HEP-LAT 0509131;%%"
+}
+@Article{Farchioni:2005hf,
+     author    = "Farchioni, F. and others",
+     title     = "Twisted mass fermions: Neutral pion masses from
+                  disconnected contributions",
+     journal   = "PoS",
+     volume    = "LAT2005",
+     year      = "2006",
+     pages     = "033",
+     eprint    = "hep-lat/0509036",
+     SLACcitation  = "%%CITATION = HEP-LAT 0509036;%%"
+}
+@Article{Farchioni:2005tu,
+     author    = "Farchioni, F. and others",
+     title     = "Lattice spacing dependence of the first order phase
+                  transition for  dynamical twisted mass fermions",
+     journal   = "Phys. Lett.",
+     volume    = "B624",
+     year      = "2005",
+     pages     = "324-333",
+     eprint    = "hep-lat/0506025",
+     SLACcitation  = "%%CITATION = HEP-LAT 0506025;%%"
+}
+@Article{Feldmann:1999uf,
+     author    = "Feldmann, Thorsten",
+     title     = "{Quark structure of pseudoscalar mesons}",
+     journal   = "Int. J. Mod. Phys.",
+     volume    = "A15",
+     year      = "2000",
+     pages     = "159-207",
+     eprint    = "hep-ph/9907491",
+     SLACcitation  = "%%CITATION = HEP-PH/9907491;%%"
+}
+@Article{Feynman:1948aa,
+     author    = "Feynman, R. P.",
+     title     = "Space-time approach to non-relativistic quantum mechanics",
+     journal   = "Rev. Mod. Phys.",
+     volume    = "20",
+     year      = "1948",
+     pages     = "367-387",
+     SLACcitation  = "%%CITATION = RMPHA,20,367;%%"
+}
+@Article{Fischer:1996th,
+     author    = "Fischer, S. and others",
+     title     = "A Parallel SSOR Preconditioner for Lattice {QCD}",
+     journal   = "Comp. Phys. Commun.",
+     volume    = "98",
+     year      = "1996",
+     pages     = "20-34",
+     eprint    = "hep-lat/9602019",
+     SLACcitation  = "%%CITATION = HEP-LAT 9602019;%%"
+}
+@Article{Fokkema:1998aa,
+     author    = "Fokkema, D.~R. and Sleijpen, G.~L.~G. and Van~der~Vorst, H.~A.",
+     title     = "{J}acobi-{D}avidson style {QR} and {QZ} algorithms for
+                  the reduction of matrix pencils",
+     journal   = "J. Sci. Comput.",
+     volume    = "20",
+     year      = "1998",
+     pages     = "94-125",
+}
+@Article{Foster:1998vw,
+     author    = "Foster, M. and Michael, C.",
+     collaboration = "UKQCD",
+     title     = "Quark mass dependence of hadron masses from lattice {QCD}",
+     journal   = "Phys. Rev.",
+     volume    = "D59",
+     year      = "1999",
+     pages     = "074503",
+     eprint    = "hep-lat/9810021",
+     SLACcitation  = "%%CITATION = HEP-LAT 9810021;%%"
+}
+@Article{Freund,
+     author    = "Freund, R.W.",
+     journal   = "in Numerical Linear Algebra, L.\ Reichel, A.\ Ruttan and R.S.\ Varga (eds.)",
+     year      = "1993",
+     pages     = "p. 101",
+}
+@Article{Frezzotti:1997ym,
+     author    = "Frezzotti, R. and Jansen, K.",
+     title     = "A polynomial hybrid Monte Carlo algorithm",
+     journal   = "Phys. Lett.",
+     volume    = "B402",
+     year      = "1997",
+     pages     = "328-334",
+     eprint    = "hep-lat/9702016",
+     SLACcitation  = "%%CITATION = HEP-LAT 9702016;%%"
+}
+@Article{Frezzotti:1998eu,
+     author    = "Frezzotti, R. and Jansen, K.",
+     title     = "The {PHMC} algorithm for simulations of dynamical fermions.
+                  {I}: Description and properties",
+     journal   = "Nucl. Phys.",
+     volume    = "B555",
+     year      = "1999",
+     pages     = "395-431",
+     eprint    = "hep-lat/9808011",
+     SLACcitation  = "%%CITATION = HEP-LAT 9808011;%%"
+}
+@Article{Frezzotti:1998yp,
+     author    = "Frezzotti, R. and Jansen, K.",
+     title     = "The {PHMC} algorithm for simulations of dynamical fermions.
+                  {II}:  Performance analysis",
+     journal   = "Nucl. Phys.",
+     volume    = "B555",
+     year      = "1999",
+     pages     = "432-453",
+     eprint    = "hep-lat/9808038",
+     SLACcitation  = "%%CITATION = HEP-LAT 9808038;%%"
+}
+@Article{Frezzotti:1999vv,
+     author    = "Frezzotti, R. and Grassi, P. A. and Sint,
+                  S. and Weisz, P.",
+     title     = "A local formulation of lattice {QCD} without unphysical
+                  fermion zero modes",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "83",
+     year      = "2000",
+     pages     = "941-946",
+     eprint    = "hep-lat/9909003",
+     SLACcitation  = "%%CITATION = HEP-LAT 9909003;%%"
+}
+@Article{Frezzotti:2000nk,
+     author    = "Frezzotti, R. and Grassi, P. A. and Sint,
+                  S. and Weisz, P.",
+ collaboration = "ALPHA",
+     title     = "Lattice {QCD} with a chirally twisted mass term",
+     journal   = "JHEP",
+     volume    = "08",
+     year      = "2001",
+     pages     = "058",
+     eprint    = "hep-lat/0101001",
+     SLACcitation  = "%%CITATION = HEP-LAT 0101001;%%"
+}
+@Article{Frezzotti:2001du,
+     author    = "Frezzotti, R. and Sint, S.",
+     title     = "Some remarks on {O(a)} improved twisted mass {QCD}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "106",
+     year      = "2002",
+     pages     = "814-816",
+     eprint    = "hep-lat/0110140",
+     SLACcitation  = "%%CITATION = HEP-LAT 0110140;%%"
+}
+@Article{Frezzotti:2001ea,
+     author    = "Frezzotti, R. and Sint, S. and Weisz, P.",
+ collaboration = "ALPHA",
+     title     = "{O(a)} improved twisted mass lattice {QCD}",
+     journal   = "JHEP",
+     volume    = "07",
+     year      = "2001",
+     pages     = "048",
+     eprint    = "hep-lat/0104014",
+     SLACcitation  = "%%CITATION = HEP-LAT 0104014;%%"
+}
+@Article{Frezzotti:2003ni,
+     author    = "Frezzotti, R. and Rossi, G. C.",
+     title     = "Chirally improving {Wilson} fermions. {I}: {O(a)} improvement",
+     journal   = "JHEP",
+     volume    = "08",
+     year      = "2004",
+     pages     = "007",
+     eprint    = "hep-lat/0306014",
+     SLACcitation  = "%%CITATION = HEP-LAT 0306014;%%"
+}
+@Article{Frezzotti:2003xj,
+     author    = "Frezzotti, R. and Rossi, G. C.",
+     title     = "Twisted-mass lattice {QCD} with mass non-degenerate quarks",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "128",
+     year      = "2004",
+     pages     = "193-202",
+     eprint    = "hep-lat/0311008",
+     SLACcitation  = "%%CITATION = HEP-LAT 0311008;%%"
+}
+@Article{Frezzotti:2004wz,
+     author    = "Frezzotti, R. and Rossi, G. C.",
+     title     = "Chirally improving {Wilson} fermions. {II}: Four-quark
+                  operators",
+     journal   = "JHEP",
+     volume    = "10",
+     year      = "2004",
+     pages     = "070",
+     eprint    = "hep-lat/0407002",
+     SLACcitation  = "%%CITATION = HEP-LAT 0407002;%%"
+}
+@Article{Frezzotti:2005gi,
+     author    = "Frezzotti, R. and Martinelli, G. and Papinutto, M. and
+                  Rossi, G. C.",
+     title     = "Reducing cutoff effects in maximally twisted lattice {QCD}
+                  close to the  chiral limit",
+     journal   = "JHEP",
+     volume    = "04",
+     year      = "2006",
+     pages     = "038",
+     eprint    = "hep-lat/0503034",
+     SLACcitation  = "%%CITATION = HEP-LAT 0503034;%%"
+}
+@Article{Frezzotti:2007qv,
+     author    = "Frezzotti, R. and Rossi, G.",
+     title     = "{O(a^2) cutoff effects in Wilson fermion simulations}",
+     journal   = "PoS",
+     volume    = "LAT2007",
+     year      = "2007",
+     pages     = "277",
+     eprint    = "0710.2492",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = 0710.2492;%%"
+}
+@Article{Frezzotti:2008dr,
+     author    = "Frezzotti, R. and Lubicz, V. and Simula, S.",
+ collaboration = "ETM",
+     title     = "{Electromagnetic form factor of the pion from twisted-mass
+                  lattice {QCD} at {Nf}=2}",
+     year      = "2008",
+     eprint    = "0812.4042",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = 0812.4042;%%"
+}
+@Article{Fritzsch:1973pi,
+     author    = "Fritzsch, H. and Gell-Mann, M. and Leutwyler, H.",
+     title     = "Advantages of the color octet gluon picture",
+     journal   = "Phys. Lett.",
+     volume    = "B47",
+     year      = "1973",
+     pages     = "365-368",
+     SLACcitation  = "%%CITATION = PHLTA,B47,365;%%"
+}
+@Article{Frommer:1994vn,
+     author    = "Frommer, A. and Hannemann, V. and Nockel, B. and Lippert,
+                  T. and Schilling, K.",
+     title     = "Accelerating {Wilson} fermion matrix inversions by means of
+                  the stabilized biconjugate gradient algorithm",
+     journal   = "Int. J. Mod. Phys.",
+     volume    = "C5",
+     year      = "1994",
+     pages     = "1073-1088",
+     eprint    = "hep-lat/9404013",
+     SLACcitation  = "%%CITATION = HEP-LAT 9404013;%%"
+}
+@Article{Frommer:1995ik,
+     author    = "Frommer, Andreas and Nockel, Bertold and Gusken, Stephan
+                  and Lippert, Thomas and Schilling, Klaus",
+     title     = "Many masses on one stroke: Economic computation of quark
+                  propagators",
+     journal   = "Int. J. Mod. Phys.",
+     volume    = "C6",
+     year      = "1995",
+     pages     = "627-638",
+     eprint    = "hep-lat/9504020",
+     SLACcitation  = "%%CITATION = HEP-LAT 9504020;%%"
+}
+@Article{Furman:1994ky,
+     author    = "Furman, V. and Shamir, Y.",
+     title     = "Axial symmetries in lattice QCD with Kaplan fermions",
+     journal   = "Nucl. Phys.",
+     volume    = "B439",
+     year      = "1995",
+     pages     = "54-78",
+     eprint    = "hep-lat/9405004",
+     SLACcitation  = "%%CITATION = HEP-LAT 9405004;%%"
+}
+@Article{Garden:1999fg,
+     author    = "Garden, J. and Heitger, J. and Sommer, R. and
+                  Wittig H.",
+ collaboration = "ALPHA",
+     title     = "Precision computation of the strange quark's mass in
+                  quenched {QCD}",
+     journal   = "Nucl. Phys.",
+     volume    = "B571",
+     year      = "2000",
+     pages     = "237-256",
+     eprint    = "hep-lat/9906013",
+     SLACcitation  = "%%CITATION = HEP-LAT 9906013;%%"
+}
+@Article{Garron:2003cb,
+     author    = "Garron, N. and Giusti, L. and Hoelbling,
+                  C. and Lellouch, L. and Rebbi, C.",
+     title     = "B(K) from quenched {QCD} with exact chiral symmetry",
+     journal   = "Phys. Rev. Lett.",
+     volume    = "92",
+     year      = "2004",
+     pages     = "042001",
+     eprint    = "hep-ph/0306295",
+     SLACcitation  = "%%CITATION = HEP-PH 0306295;%%"
+}
+@Article{Gasser:1982ap,
+     author    = "Gasser, J. and Leutwyler, H.",
+     title     = "Quark masses",
+     journal   = "Phys. Rept.",
+     volume    = "87",
+     year      = "1982",
+     pages     = "77-169",
+     SLACcitation  = "%%CITATION = PRPLC,87,77;%%"
+}
+@Article{Gasser:1983yg,
+     author    = "Gasser, J. and Leutwyler, H.",
+     title     = "Chiral perturbation theory to one loop",
+     journal   = "Ann. Phys.",
+     volume    = "158",
+     year      = "1984",
+     pages     = "142",
+     SLACcitation  = "%%CITATION = APNYA,158,142;%%"
+}
+
+@Article{Gasser:1985gg,
+     author    = "Gasser, J. and Leutwyler, H.",
+     title     = "Chiral perturbation theory: expansions in the mass of the
+                  strange quark",
+     journal   = "Nucl. Phys.",
+     volume    = "B250",
+     year      = "1985",
+     pages     = "465",
+     SLACcitation  = "%%CITATION = NUPHA,B250,465;%%"
+}
+@Article{Gasser:1986vb,
+     author    = "Gasser, J. and Leutwyler, H.",
+     title     = "LIGHT QUARKS AT LOW TEMPERATURES",
+     journal   = "Phys. Lett.",
+     volume    = "B184",
+     year      = "1987",
+     pages     = "83",
+     SLACcitation  = "%%CITATION = PHLTA,B184,83;%%"
+}
+@Article{Gattringer:2003qx,
+     author    = "Gattringer, C. and others",
+ collaboration = "BGR",
+     title     = "Quenched spectroscopy with fixed-point and chirally
+                  improved fermions",
+     journal   = "Nucl. Phys.",
+     volume    = "B677",
+     year      = "2004",
+     pages     = "3-51",
+     eprint    = "hep-lat/0307013",
+     SLACcitation  = "%%CITATION = HEP-LAT 0307013;%%"
+}
+@Article{Gell-Mann:1964nj,
+     author    = "Gell-Mann, M.",
+     title     = "A Schematic model of baryons and mesons",
+     journal   = "Phys. Lett.",
+     volume    = "8",
+     year      = "1964",
+     pages     = "214-215",
+     SLACcitation  = "%%CITATION = PHLTA,8,214;%%"
+}
+@Article{Gell-Mann:1968rz,
+     author    = "Gell-Mann, M. and Oakes, R. J. and Renner, B.",
+     title     = "Behavior of current divergences under SU(3) x SU(3)",
+     journal   = "Phys. Rev.",
+     volume    = "175",
+     year      = "1968",
+     pages     = "2195-2199",
+     SLACcitation  = "%%CITATION = PHRVA,175,2195;%%"
+}
+@PhdThesis{Geus:2002,
+  author = 	 {R. Geus},
+  title = 	 {The Jacobi-Davidson algorithm for solving large
+                  sparse symmetric eigenvalue problems with
+                  application to the design of accelerator cavities}, 
+  school = 	 {Swiss Federal Institute Of Technology Z{\"u}rich},
+  year = 	 {2002},
+  OPTkey = 	 {DISS. ETH NO. 14734},
+  OPTtype = 	 {},
+  OPTaddress = 	 {},
+  OPTmonth = 	 {},
+  OPTnote = 	 {},
+  OPTannote = 	 {}
+}
+@Article{Gimenez:1998ue,
+     author    = "Gimenez, V. and Giusti, L. and Rapuano, F. and Talevi, M.",
+     title     = "Non-perturbative renormalization of quark bilinears",
+     journal   = "Nucl. Phys.",
+     volume    = "B531",
+     year      = "1998",
+     pages     = "429-445",
+     eprint    = "hep-lat/9806006",
+     SLACcitation  = "%%CITATION = HEP-LAT 9806006;%%"
+}
+@Article{Gimenez:2005nt,
+     author    = "Gimenez, V. and Lubicz, V. and Mescia, F. and Porretti, V.
+                  and Reyes, J.",
+     title     = "{Operator product expansion and quark condensate from
+                  lattice QCD in  coordinate space}",
+     journal   = "Eur. Phys. J.",
+     volume    = "C41",
+     year      = "2005",
+     pages     = "535-544",
+     eprint    = "hep-lat/0503001",
+     SLACcitation  = "%%CITATION = HEP-LAT/0503001;%%"
+}
+@Article{Ginsparg:1981bj,
+     author    = "Ginsparg, P. H. and {Wilson}, K. G.",
+     title     = "A remnant of chiral symmetry on the lattice",
+     journal   = "Phys. Rev.",
+     volume    = "D25",
+     year      = "1982",
+     pages     = "2649",
+     SLACcitation  = "%%CITATION = PHRVA,D25,2649;%%"
+}
+@Article{Giusti:1998wy,
+     author    = "Giusti, L. and Rapuano, F. and Talevi, M. and Vladikas, A.
+                  ",
+     title     = "The QCD chiral condensate from the lattice",
+     journal   = "Nucl. Phys.",
+     volume    = "B538",
+     year      = "1999",
+     pages     = "249-277",
+     eprint    = "hep-lat/9807014",
+     SLACcitation  = "%%CITATION = HEP-LAT 9807014;%%"
+}
+@Article{Giusti:2001pk,
+     author    = "Giusti, L. and Hoelbling, C. and Rebbi, C.",
+     title     = "Light quark masses with overlap fermions in quenched {QCD}",
+     journal   = "Phys. Rev.",
+     volume    = "D64",
+     year      = "2001",
+     pages     = "114508",
+     eprint    = "hep-lat/0108007",
+     note      = "Erratum-ibid.D65:079903,2002",
+     SLACcitation  = "%%CITATION = HEP-LAT 0108007;%%"
+}
+@Article{Giusti:2002sm,
+     author    = "Giusti, L. and Hoelbling, C. and L{\"u}scher, M. and Wittig, H.
+                  ",
+     title     = "Numerical techniques for lattice QCD in the epsilon-
+                  regime",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "153",
+     year      = "2003",
+     pages     = "31-51",
+     eprint    = "hep-lat/0212012",
+     SLACcitation  = "%%CITATION = HEP-LAT 0212012;%%"
+}
+@Article{Giusti:2007hk,
+     author    = "Giusti, Leonardo",
+     title     = "Light dynamical fermions on the lattice: Toward the chiral
+                  regime of QCD",
+     journal   = "PoS.",
+     volume    = "LAT2006",
+     year      = "2007",
+     pages     = "",
+     eprint    = "hep-lat/0702014",
+     SLACcitation  = "%%CITATION = HEP-LAT/0702014;%%"
+}
+@Article{Glassner:1996gz,
+     author    = "Gl{\"a}ssner, U. and others",
+     title     = "How to compute {G}reen's functions for entire mass
+                  trajectories within {K}rylov solvers",
+     year      = "1996",
+     eprint    = "hep-lat/9605008",
+     SLACcitation  = "%%CITATION = HEP-LAT 9605008;%%"
+}
+@Article{Gockeler:1998fn,
+     author    = "G{\"o}ckeler, M. and others",
+     title     = "Scaling of non-perturbatively {O(a)} improved {Wilson}
+                  fermions: Hadron  spectrum, quark masses and decay
+                  constants",
+     journal   = "Phys. Rev.",
+     volume    = "D57",
+     year      = "1998",
+     pages     = "5562-5580",
+     eprint    = "hep-lat/9707021",
+     SLACcitation  = "%%CITATION = HEP-LAT 9707021;%%"
+}
+@Article{Gorishnii:1990vf,
+     author    = "Gorishnii, S. G. and Kataev, A. L. and Larin, S. A.",
+     title     = "{The O (alpha-s**3) corrections to sigma-tot (e+ e- $\to$
+                  hadrons) and Gamma (tau- $\to$ tau-neutrino + hadrons) in
+                  QCD}",
+     journal   = "Phys. Lett.",
+     volume    = "B259",
+     year      = "1991",
+     pages     = "144-150",
+     SLACcitation  = "%%CITATION = PHLTA,B259,144;%%"
+}
+@Article{Greenberg:1964pe,
+     author    = "Greenberg, O. W.",
+     title     = "Spin and unitary spin independence in a paraquark model of
+                  baryons and mesons",
+     journal   = "Phys. Rev. Lett.",
+     volume    = "13",
+     year      = "1964",
+     pages     = "598-602",
+     SLACcitation  = "%%CITATION = PRLTA,13,598;%%"
+}
+@Article{Gregory:2007ce,
+     author    = "Gregory, Eric B. and Irving, Alan and Richards, Chris M.
+                  and McNeile, Craig and Hart, Alistair",
+     title     = "Pseudoscalar Flavor-Singlet Physics with Staggered
+                  Fermions",
+     year      = "2007",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     eprint    = "0710.1725",
+     SLACcitation  = "%%CITATION = ARXIV:0710.1725;%%"
+}
+@Article{Gross:1973id,
+     author    = "Gross, D. J. and Wilczek, F.",
+     title     = "Ultraviolet behavior of non-Abelian gauge theories",
+     journal   = "Phys. Rev. Lett.",
+     volume    = "30",
+     year      = "1973",
+     pages     = "1343-1346",
+     SLACcitation  = "%%CITATION = PRLTA,30,1343;%%"
+}
+@Article{Gross:1973ju,
+     author    = "Gross, D. J. and Wilczek, F.",
+     title     = "Asymptotically free gauge theories. 1",
+     journal   = "Phys. Rev.",
+     volume    = "D8",
+     year      = "1973",
+     pages     = "3633-3652",
+     SLACcitation  = "%%CITATION = PHRVA,D8,3633;%%"
+}
+@Article{Gross:1974jv,
+     author    = "Gross, D. J. and Neveu, A.",
+     title     = "Dynamical symmetry breaking in asymptotically free field
+                  theories",
+     journal   = "Phys. Rev.",
+     volume    = "D10",
+     year      = "1974",
+     pages     = "3235",
+     SLACcitation  = "%%CITATION = PHRVA,D10,3235;%%"
+}
+@Article{Guagnelli:1998ud,
+     author    = "Guagnelli, M. and Sommer, R. and Wittig, H.",
+ collaboration = "ALPHA",
+     title     = "Precision computation of a low-energy reference scale in
+                  quenched  lattice {QCD}",
+     journal   = "Nucl. Phys.",
+     volume    = "B535",
+     year      = "1998",
+     pages     = "389-402",
+     eprint    = "hep-lat/9806005",
+     SLACcitation  = "%%CITATION = HEP-LAT 9806005;%%"
+}
+@Article{Guagnelli:2004ga,
+     author    = "Guagnelli, M. and others",
+ collaboration = "Zeuthen-Rome (ZeRo)",
+     title     = "Non-perturbative pion matrix element of a twist-2 operator
+                  from the  lattice",
+     journal   = "Eur. Phys. J.",
+     volume    = "C40",
+     year      = "2005",
+     pages     = "69-80",
+     eprint    = "hep-lat/0405027",
+     SLACcitation  = "%%CITATION = HEP-LAT 0405027;%%"
+}
+@Article{Guagnelli:2004ww,
+     author    = "Guagnelli, M. and others",
+ collaboration = "Zeuthen-Rome (ZeRo)",
+     title     = "Finite size effects of a pion matrix element",
+     journal   = "Phys. Lett.",
+     volume    = "B597",
+     year      = "2004",
+     pages     = "216-221",
+     eprint    = "hep-lat/0403009",
+     SLACcitation  = "%%CITATION = HEP-LAT 0403009;%%"
+}
+@Article{Guagnelli:2005zc,
+     author    = "Guagnelli, M. and Heitger, J. and Pena, C. and Sint, S. and
+                  Vladikas, A.",
+ collaboration = "ALPHA",
+     title     = "Non-perturbative renormalization of left-left four-fermion
+                  operators in  quenched lattice QCD",
+     journal   = "JHEP",
+     volume    = "03",
+     year      = "2006",
+     pages     = "088",
+     eprint    = "hep-lat/0505002",
+     SLACcitation  = "%%CITATION = HEP-LAT 0505002;%%"
+}
+@Article{Gupta:1988js,
+     author    = "Gupta, R. and Kilcup, G. W. and Sharpe, S. R.
+                  ",
+     title     = "Tuning the hybrid monte carlo algorithm",
+     journal   = "Phys. Rev.",
+     volume    = "D38",
+     year      = "1988",
+     pages     = "1278",
+     SLACcitation  = "%%CITATION = PHRVA,D38,1278;%%"
+}
+@Article{Gupta:1989kx,
+     author    = "Gupta, R. and others",
+     title     = "{QCD} with dynamical {Wilson} fermions",
+     journal   = "Phys. Rev.",
+     volume    = "D40",
+     year      = "1989",
+     pages     = "2072",
+     SLACcitation  = "%%CITATION = PHRVA,D40,2072;%%"
+}
+@Article{Gupta:1990ka,
+     author    = "Gupta, S. and Irback, A. and Karsch, F. and
+                  Petersson, B.",
+     title     = "The acceptance probability in the hybrid monte carlo
+                  method",
+     journal   = "Phys. Lett.",
+     volume    = "B242",
+     year      = "1990",
+     pages     = "437-443",
+     SLACcitation  = "%%CITATION = PHLTA,B242,437;%%"
+}
+@Article{Gupta:1991sn,
+     author    = "Gupta, R. and others",
+     title     = "{QCD} with dynamical {Wilson} fermions. 2",
+     journal   = "Phys. Rev.",
+     volume    = "D44",
+     year      = "1991",
+     pages     = "3272-3292",
+     SLACcitation  = "%%CITATION = PHRVA,D44,3272;%%"
+}
+@Unpublished{Gupta:1997nd,
+     author    = "Gupta, R.",
+     title     = "Introduction to lattice {QCD}",
+     year      = "1997",
+     eprint    = "hep-lat/9807028",
+     note      = "Lectures given at Les Houches Summer School in Theoretical Physics, Session 68",
+     SLACcitation  = "%%CITATION = HEP-LAT 9807028;%%"
+}
+@Article{Han:1965pf,
+     author    = "Han, M. Y. and Nambu, Yoichiro",
+     title     = "Three-triplet model with double SU(3) symmetry",
+     journal   = "Phys. Rev.",
+     volume    = "139",
+     year      = "1965",
+     pages     = "B1006-B1010",
+     SLACcitation  = "%%CITATION = PHRVA,139,B1006;%%"
+}
+@Article{Hasenbusch:2001ne,
+     author    = "Hasenbusch, M.",
+     title     = "Speeding up the {H}ybrid-{M}onte-{C}arlo algorithm for dynamical
+                  fermions",
+     journal   = "Phys. Lett.",
+     volume    = "B519",
+     year      = "2001",
+     pages     = "177-182",
+     eprint    = "hep-lat/0107019",
+     SLACcitation  = "%%CITATION = HEP-LAT 0107019;%%"
+}
+@Article{Hasenbusch:2002ai,
+     author    = "Hasenbusch, M. and Jansen, K.",
+     title     = "Speeding up lattice {QCD} simulations with clover-improved
+                  {Wilson} fermions",
+     journal   = "Nucl. Phys.",
+     volume    = "B659",
+     year      = "2003",
+     pages     = "299-320",
+     eprint    = "hep-lat/0211042",
+     SLACcitation  = "%%CITATION = HEP-LAT 0211042;%%"
+}
+@Article{Hasenbusch:2003vg,
+     author    = "Hasenbusch, Martin",
+     title     = "{Full QCD algorithms towards the chiral limit}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "129",
+     year      = "2004",
+     pages     = "27-33",
+     eprint    = "hep-lat/0310029",
+     archivePrefix = "arXiv",
+     doi       = "10.1016/S0920-5632(03)02504-0",
+     SLACcitation  = "%%CITATION = HEP-LAT/0310029;%%"
+}
+@Article{Hasenfratz:1998jp,
+     author    = "Hasenfratz, P.",
+     title     = "Lattice {QCD} without tuning, mixing and current
+                  renormalization",
+     journal   = "Nucl. Phys.",
+     volume    = "B525",
+     year      = "1998",
+     pages     = "401-409",
+     eprint    = "hep-lat/9802007",
+     SLACcitation  = "%%CITATION = HEP-LAT 9802007;%%"
+}
+@Article{Hasenfratz:1998ri,
+     author    = "Hasenfratz, P. and Laliena, V. and Niedermayer,
+                  F.",
+     title     = "The index theorem in {QCD} with a finite cut-off",
+     journal   = "Phys. Lett.",
+     volume    = "B427",
+     year      = "1998",
+     pages     = "125-131",
+     eprint    = "hep-lat/9801021",
+     SLACcitation  = "%%CITATION = HEP-LAT 9801021;%%"
+}
+@Article{Hasenfratz:2001hp,
+     author    = "Hasenfratz, A. and Knechtli, F.",
+     title     = "Flavor symmetry and the static potential with hypercubic
+                  blocking",
+     journal   = "Phys. Rev.",
+     volume    = "D64",
+     year      = "2001",
+     pages     = "034504",
+     eprint    = "hep-lat/0103029",
+     SLACcitation  = "%%CITATION = HEP-LAT 0103029;%%"
+}
+@Article{Hasenfratz:2001tw,
+     author    = "Hasenfratz, A. and Hoffmann, R. and Knechtli, F.",
+     title     = "The static potential with hypercubic blocking",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "106",
+     year      = "2002",
+     pages     = "418-420",
+     eprint    = "hep-lat/0110168",
+     SLACcitation  = "%%CITATION = HEP-LAT 0110168;%%"
+}
+@Article{Hashimoto:2008xg,
+     author    = "Hashimoto, Koichi and Izubuchi, Taku",
+     title     = "{eta' meson from two flavor dynamical domain wall
+                  fermions}",
+     year      = "2008",
+     eprint    = "0803.0186",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = ARXIV:0803.0186;%%"
+}
+@Article{Heitger:2000ay,
+     author    = "Heitger, J. and Sommer, R. and Wittig, H.",
+ collaboration = "ALPHA",
+     title     = "Effective chiral Lagrangians and lattice {{QCD}}",
+     journal   = "Nucl. Phys.",
+     volume    = "B588",
+     year      = "2000",
+     pages     = "377-399",
+     eprint    = "hep-lat/0006026",
+     note      = "and references therein",
+     SLACcitation  = "%%CITATION = HEP-LAT 0006026;%%"
+}
+@Article{Hernandez:1998et,
+     author    = "Hernandez, P. and Jansen, K. and L{\"u}scher, M.",
+     title     = "Locality properties of Neuberger's lattice Dirac operator",
+     journal   = "Nucl. Phys.",
+     volume    = "B552",
+     year      = "1999",
+     pages     = "363-378",
+     eprint    = "hep-lat/9808010",
+     SLACcitation  = "%%CITATION = HEP-LAT 9808010;%%"
+}
+@Article{Hernandez:2000sb,
+     author    = "Hernandez, P. and Jansen, K. and Lellouch, L.",
+     title     = "A numerical treatment of Neuberger's lattice Dirac
+                  operator",
+     year      = "2000",
+     eprint    = "hep-lat/0001008",
+     SLACcitation  = "%%CITATION = HEP-LAT 0001008;%%"
+}
+@Article{Hernandez:2001hq,
+     author    = "Hernandez, P. and Jansen, K. and Lellouch, L. and
+                  Wittig, H.",
+     title     = "Scalar condensate and light quark masses from overlap
+                  fermions",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "106",
+     year      = "2002",
+     pages     = "766-771",
+     eprint    = "hep-lat/0110199",
+     SLACcitation  = "%%CITATION = HEP-LAT 0110199;%%"
+}
+@Article{Hernandez:2001yn,
+     author    = "Hernandez, P. and Jansen, K. and Lellouch, L. and
+                  Wittig, H.",
+     title     = "Non-perturbative renormalization of the quark condensate in
+                  {Ginsparg}-{Wilson} regularizations",
+     journal   = "JHEP",
+     volume    = "07",
+     year      = "2001",
+     pages     = "018",
+     eprint    = "hep-lat/0106011",
+     SLACcitation  = "%%CITATION = HEP-LAT 0106011;%%"
+}
+@Article{Horsley:2004mx,
+     author    = "Horsley, R. and Perlt, H. and Rakow, P. E. L. and
+                  Schierholz, G. and Schiller, A.",
+ collaboration = "QCDSF",
+     title     = "One-loop renormalisation of quark bilinears for overlap
+                  fermions with  improved gauge actions",
+     journal   = "Nucl. Phys.",
+     volume    = "B693",
+     year      = "2004",
+     pages     = "3-35",
+     eprint    = "hep-lat/0404007",
+     SLACcitation  = "%%CITATION = HEP-LAT 0404007;%%"
+}
+@Article{Ilgenfritz:2003gw,
+     author    = "Ilgenfritz, E.-M. and Kerler, W. and
+                  M{\"u}ller-Preu{\ss}ker, M. and Sternbeck, A. and St{\"u}ben, H.",
+     title     = "A numerical reinvestigation of the {Aoki} phase with {N(f)} = 2
+                  {Wilson}  fermions at zero temperature",
+     journal   = "Phys. Rev.",
+     volume    = "D69",
+     year      = "2004",
+     pages     = "074511",
+     eprint    = "hep-lat/0309057",
+     SLACcitation  = "%%CITATION = HEP-LAT 0309057;%%"
+}
+@Article{Ilgenfritz:2006tz,
+     author    = "Ilgenfritz, E. -M. and others",
+     title     = "Twisted mass QCD thermodynamics: First results on apeNEXT",
+     year      = "2006",
+     eprint    = "hep-lat/0610112",
+     SLACcitation  = "%%CITATION = HEP-LAT 0610112;%%"
+}
+@Article{Iwasaki:1983ck,
+     author    = "Iwasaki, Y.",
+     title     = "Renormalization group analysis of lattice theories and
+                  improved lattice action. 2. four-dimensional nonabelian
+                  SU(N) gauge model",
+     note     = "UTHEP-118"
+}
+@Article{Iwasaki:1985we,
+     author    = "Iwasaki, Y.",
+     title     = "Renormalization group analysis of lattice theories and
+                  improved lattice action: two-dimensional nonlinear O(N)
+                  sigma model",
+     journal   = "Nucl. Phys.",
+     volume    = "B258",
+     year      = "1985",
+     pages     = "141-156",
+     SLACcitation  = "%%CITATION = NUPHA,B258,141;%%"
+}
+@Article{Iwasaki:1992hn,
+     author    = "Iwasaki, Y. and Kanaya, K. and Sakai, S. and Yoshie, T.",
+     title     = "Quark confinement in multi - flavor quantum
+                  chromodynamics",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "30",
+     year      = "1993",
+     pages     = "327-330",
+     eprint    = "hep-lat/9211035",
+     SLACcitation  = "%%CITATION = HEP-LAT 9211035;%%"
+}
+@Article{Izubuchi:1998hy,
+     author    = "Izubuchi, T. and Noaki, J. and Ukawa, A.",
+     title     = "Two-dimensional lattice Gross-Neveu model with {Wilson}
+                  fermion action at  finite temperature and chemical
+                  potential",
+     journal   = "Phys. Rev.",
+     volume    = "D58",
+     year      = "1998",
+     pages     = "114507",
+     eprint    = "hep-lat/9805019",
+     SLACcitation  = "%%CITATION = HEP-LAT 9805019;%%"
+}
+@Article{Jacobs:1983ph,
+     author    = "Jacobs, L.",
+     title     = "Undoubling chirally symmetric lattice fermions",
+     journal   = "Phys. Rev. Lett.",
+     volume    = "51",
+     year      = "1983",
+     pages     = "172",
+     SLACcitation  = "%%CITATION = PRLTA,51,172;%%"
+}
+@Article{Jagels:1994a,
+     author    = "Jagels, C. F. and Reichel, L.",
+     title     = " fast minimal residual algorithm for shifted unitary matrices",
+     journal   = "Numer. Linear Algebra Appl.",
+     volume    = "1(6)",
+     pages     = "555-570",
+     year      = "1994"
+}
+@Article{Jagels:1994aa,
+     author    = "Jagels, C. F. and Reichel, L.",
+     title     = "A Fast Minimal Residual Algorithm for Shifted Unitary 
+                  Matrices",
+     journal   = "Numerical Linear Algebra with Aplications",
+     volume    = "1(6)",
+     year      = "1994",
+     pages     = "555-570",
+}
+@Article{Jansen:1994ym,
+     author    = "Jansen, K.",
+     title     = "Domain wall fermions and chiral gauge theories",
+     journal   = "Phys. Rept.",
+     volume    = "273",
+     year      = "1996",
+     pages     = "1-54",
+     eprint    = "hep-lat/9410018",
+     SLACcitation  = "%%CITATION = HEP-LAT 9410018;%%"
+}
+@Article{Jansen:1995ck,
+     author    = "Jansen, Karl and others",
+     title     = "Non-perturbative renormalization of lattice QCD at all
+                  scales",
+     journal   = "Phys. Lett.",
+     volume    = "B372",
+     year      = "1996",
+     pages     = "275-282",
+     eprint    = "hep-lat/9512009",
+     SLACcitation  = "%%CITATION = HEP-LAT 9512009;%%"
+}
+@Article{Jansen:1996cq,
+     author    = "Jansen, K. and Liu, C.",
+     title     = "Study of Liapunov exponents and the reversibility of
+                  molecular dynamics  algorithms",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "53",
+     year      = "1997",
+     pages     = "974-976",
+     eprint    = "hep-lat/9607057",
+     SLACcitation  = "%%CITATION = HEP-LAT 9607057;%%"
+}
+@Article{Jansen:1996xp,
+     author    = "Jansen, K.",
+     title     = "Recent developments in fermion simulation algorithms",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "53",
+     year      = "1997",
+     pages     = "127-133",
+     eprint    = "hep-lat/9607051",
+     SLACcitation  = "%%CITATION = HEP-LAT 9607051;%%"
+}
+@Article{Jansen:1997yt,
+     author    = "Jansen, K. and Liu, C.",
+     title     = "Implementation of Symanzik's improvement program for
+                  simulations of  dynamical {Wilson} fermions in lattice {QCD}",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "99",
+     year      = "1997",
+     pages     = "221-234",
+     eprint    = "hep-lat/9603008",
+     SLACcitation  = "%%CITATION = HEP-LAT 9603008;%%"
+}
+@Article{Jansen:1998mx,
+     author    = "Jansen, K. and Sommer, R.",
+ collaboration = "ALPHA",
+     title     = "O(alpha) improvement of lattice {QCD} with two flavors of
+                  {Wilson} quarks",
+     journal   = "Nucl. Phys.",
+     volume    = "B530",
+     year      = "1998",
+     pages     = "185-203",
+     eprint    = "hep-lat/9803017",
+     SLACcitation  = "%%CITATION = HEP-LAT 9803017;%%"
+}
+@Article{Jansen:2003ir,
+     author    = "Jansen, K. and Shindler, A. and Urbach, C. and
+                  Wetzorke, I.",
+ collaboration = "\xlf",
+     title     = "Scaling test for {Wilson} twisted mass {QCD}",
+     journal   = "Phys. Lett.",
+     volume    = "B586",
+     year      = "2004",
+     pages     = "432-438",
+     eprint    = "hep-lat/0312013",
+     SLACcitation  = "%%CITATION = HEP-LAT 0312013;%%"
+}
+@Article{Jansen:2003jq,
+     author    = "Jansen, K. and Nagai, K.-I.",
+     title     = "Reducing residual-mass effects for domain-wall fermions",
+     journal   = "JHEP",
+     volume    = "12",
+     year      = "2003",
+     pages     = "038",
+     eprint    = "hep-lat/0305009",
+     SLACcitation  = "%%CITATION = HEP-LAT 0305009;%%"
+}
+@Article{Jansen:2003nt,
+     author    = "Jansen, K.",
+     title     = "Actions for dynamical fermion simulations: Are we ready to
+                  go?",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "129",
+     year      = "2004",
+     pages     = "3-16",
+     eprint    = "hep-lat/0311039",
+     SLACcitation  = "%%CITATION = HEP-LAT 0311039;%%"
+}
+@Article{Jansen:2005cg,
+     author    = "Jansen, K. and others",
+ collaboration = "\xlf",
+     title     = "Flavour breaking effects of {Wilson} twisted mass fermions",
+     journal   = "Phys. Lett.",
+     volume    = "B624",
+     year      = "2005",
+     pages     = "334-341",
+     eprint    = "hep-lat/0507032",
+     SLACcitation  = "%%CITATION = HEP-LAT 0507032;%%"
+}
+@Unpublished{Jansen:2005chi,
+  author = 	 {Jansen, K. and others},
+collaborations = {\xlf},
+  title = 	 {},
+  note = 	 {in preparation},
+  OPTkey = 	 {},
+  OPTmonth = 	 {},
+  year = 	 {2005},
+  OPTannote = 	 {}
+}
+@Article{Jansen:2005gf,
+     author    = "Jansen, K. and Papinutto, M. and Shindler, A. and Urbach,
+                  C. and Wetzorke, I.",
+ collaboration = "\xlf",
+     title     = "Light quarks with twisted mass fermions",
+     journal   = "Phys. Lett.",
+     volume    = "B619",
+     year      = "2005",
+     pages     = "184-191",
+     eprint    = "hep-lat/0503031",
+     SLACcitation  = "%%CITATION = HEP-LAT 0503031;%%"
+}
+@Article{Jansen:2005kk,
+     author    = "Jansen, K. and Papinutto, M. and Shindler, A. and Urbach,
+                  C. and Wetzorke, I.",
+ collaboration = "\xlf",
+     title     = "Quenched scaling of {Wilson} twisted mass fermions",
+     journal   = "JHEP",
+     volume    = "09",
+     year      = "2005",
+     pages     = "071",
+     eprint    = "hep-lat/0507010",
+     SLACcitation  = "%%CITATION = HEP-LAT 0507010;%%"
+}
+@Article{Jansen:2005yp,
+     author    = "Jansen, Karl and Shindler, Andrea and Urbach, Carsten and
+                  Wenger, Urs",
+     title     = "{HMC} algorithm with multiple time scale integration and mass
+                  preconditioning",
+     journal   = "PoS",
+     volume    = "LAT2005",
+     year      = "2006",
+     pages     = "118",
+     eprint    = "hep-lat/0510064",
+     SLACcitation  = "%%CITATION = HEP-LAT 0510064;%%"
+}
+@Article{Jansen:2006ks,
+     author    = "Jansen, Karl",
+     title     = "Status report on ILDG activities",
+     year      = "2006",
+     eprint    = "hep-lat/0609012",
+     SLACcitation  = "%%CITATION = HEP-LAT 0609012;%%"
+}
+@Article{Jansen:2006rf,
+     author    = "Jansen, Karl and Urbach, Carsten",
+ collaboration = "ETM",
+     title     = "First results with two light flavours of quarks with
+                  maximally twisted mass",
+     year      = "2006",
+     eprint    = "hep-lat/0610015",
+     SLACcitation  = "%%CITATION = HEP-LAT 0610015;%%"
+}
+@Article{Jansen:2008wv,
+     author    = "Jansen, K. and Michael, C. and Urbach, C.",
+ collaboration = "ETM",
+     title     = "The eta' meson from lattice {QCD}",
+     year      = "2008",
+     eprint    = "0804.3871",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = 0804.3871;%%"
+}
+@Article{Jansen:2008zz,
+     author    = "Jansen, K. and Michael, C. and Urbach, C.",
+     title     = "{The eta-prime meson from lattice QCD}",
+     journal   = "Eur. Phys. J.",
+     volume    = "C58",
+     year      = "2008",
+     pages     = "261-269",
+     doi       = "10.1140/epjc/s10052-008-0764-6",
+     SLACcitation  = "%%CITATION = EPHJA,C58,261;%%"
+}
+@Unpublished{Jegerlehner:1996pm,
+     author    = "Jegerlehner, Beat",
+     title     = "Krylov space solvers for shifted linear systems",
+     year      = "1996",
+     eprint    = "hep-lat/9612014",
+     note      = "unpublished",
+     SLACcitation  = "%%CITATION = HEP-LAT 9612014;%%"
+}
+@Article{Jegerlehner:1997rn,
+     author    = "Jegerlehner, B.",
+     title     = "Multiple mass solvers",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "63",
+     year      = "1998",
+     pages     = "958-960",
+     eprint    = "hep-lat/9708029",
+     SLACcitation  = "%%CITATION = HEP-LAT 9708029;%%"
+}
+@Article{Jegerlehner:2003qp,
+     author    = "Jegerlehner, F.",
+     title     = "Theoretical precision in estimates of the hadronic
+                  contributions to  (g-2)mu and alpha(QED)(M(Z))",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "126",
+     year      = "2004",
+     pages     = "325-334",
+     eprint    = "hep-ph/0310234",
+     SLACcitation  = "%%CITATION = HEP-PH 0310234;%%"
+}
+@Article{Jenkins:1990jv,
+     author    = "Jenkins, Elizabeth Ellen and Manohar, Aneesh V.",
+     title     = "Baryon chiral perturbation theory using a heavy fermion
+                  Lagrangian",
+     journal   = "Phys. Lett.",
+     volume    = "B255",
+     year      = "1991",
+     pages     = "558-562",
+     SLACcitation  = "%%CITATION = PHLTA,B255,558;%%"
+}
+@Article{Kaiser:1998ds,
+     author    = "Kaiser, Roland and Leutwyler, H.",
+     title     = "{Pseudoscalar decay constants at large N(c)}",
+     year      = "1998",
+     eprint    = "hep-ph/9806336",
+     SLACcitation  = "%%CITATION = HEP-PH/9806336;%%"
+}
+
+@Article{Kalkreuter:1995mm,
+     author    = "Kalkreuter, Thomas and Simma, Hubert",
+     title     = "An Accelerated conjugate gradient algorithm to compute low
+                  lying eigenvalues: A Study for the Dirac operator in SU(2)
+                  lattice QCD",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "93",
+     year      = "1996",
+     pages     = "33-47",
+     eprint    = "hep-lat/9507023",
+     SLACcitation  = "%%CITATION = HEP-LAT 9507023;%%"
+}
+@Article{Kalkreuter:1996mm,
+     author    = "Kalkreuter, T. and Simma, H.",
+     title     = "An Accelerated conjugate gradient algorithm to compute low
+                  lying eigenvalues: A Study for the Dirac operator in SU(2)
+                  lattice {QCD}",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "93",
+     year      = "1996",
+     pages     = "33-47",
+     eprint    = "hep-lat/9507023",
+     SLACcitation  = "%%CITATION = HEP-LAT 9507023;%%"
+}
+@Article{Kamleh:2005wg,
+     author    = "Kamleh, W. and Peardon, M. J.",
+ collaboration = "TrinLat",
+     title     = "{Polynomial filtering for HMC in lattice QCD}",
+     journal   = "PoS",
+     volume    = "LAT2005",
+     year      = "2006",
+     pages     = "106",
+     SLACcitation  = "%%CITATION = POSCI,LAT2005,106;%%"
+}
+@Article{Kaplan:1992bt,
+     author    = "Kaplan, D. B.",
+     title     = "A Method for simulating chiral fermions on the lattice",
+     journal   = "Phys. Lett.",
+     volume    = "B288",
+     year      = "1992",
+     pages     = "342-347",
+     eprint    = "hep-lat/9206013",
+     SLACcitation  = "%%CITATION = HEP-LAT 9206013;%%"
+}
+@Article{Karsten:1980wd,
+     author    = "Karsten, L. H. and Smit, J.",
+     title     = "Lattice fermions: species doubling, chiral invariance, and
+                  the triangle anomaly",
+     journal   = "Nucl. Phys.",
+     volume    = "B183",
+     year      = "1981",
+     pages     = "103",
+     SLACcitation  = "%%CITATION = NUPHA,B183,103;%%"
+}
+@Article{Kennedy:1990bv,
+     author    = "Kennedy, A. D. and Pendleton, B.",
+     title     = "Acceptances and autocorrelations in hybrid Monte Carlo",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "20",
+     year      = "1991",
+     pages     = "118-121",
+     SLACcitation  = "%%CITATION = NUPHZ,20,118;%%"
+}
+@Article{Knechtli:1998gf,
+     author    = "Knechtli, F. and Sommer, R.",
+ collaboration = "ALPHA",
+     title     = "String breaking in SU(2) gauge theory with scalar matter
+                  fields",
+     journal   = "Phys. Lett.",
+     volume    = "B440",
+     year      = "1998",
+     pages     = "345-352",
+     eprint    = "hep-lat/9807022",
+     SLACcitation  = "%%CITATION = HEP-LAT 9807022;%%"
+}
+@Article{Knechtli:2000df,
+     author    = "Knechtli, F. and Sommer, R.",
+ collaboration = "ALPHA",
+     title     = "String breaking as a mixing phenomenon in the SU(2) Higgs
+                  model",
+     journal   = "Nucl. Phys.",
+     volume    = "B590",
+     year      = "2000",
+     pages     = "309-328",
+     eprint    = "hep-lat/0005021",
+     SLACcitation  = "%%CITATION = HEP-LAT 0005021;%%"
+}
+@Article{Lacock:1994qx,
+     author    = "Lacock, P. and McKerrell, A. and Michael, C. and Stopher,
+                            I. M. and Stephenson, P. W.",
+     collaboration = "UKQCD",
+     title     = "Efficient hadronic operators in lattice gauge theory",
+     journal   = "Phys. Rev.",
+     volume    = "D51",
+     year      = "1995",
+     pages     = "6403-6410",
+     eprint    = "hep-lat/9412079",
+     SLACcitation  = "%%CITATION = HEP-LAT 9412079;%%"
+}
+@Article{Lepage:1992xa,
+     author    = "Lepage, G. Peter and Mackenzie, Paul B.",
+     title     = "On the viability of lattice perturbation theory",
+     journal   = "Phys. Rev.",
+     volume    = "D48",
+     year      = "1993",
+     pages     = "2250-2264",
+     eprint    = "hep-lat/9209022",
+     SLACcitation  = "%%CITATION = HEP-LAT 9209022;%%"
+}
+@Article{Lepage:2001ym,
+     author    = "Lepage, G. P. and others",
+     title     = "{Constrained curve fitting}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "106",
+     year      = "2002",
+     pages     = "12-20",
+     eprint    = "hep-lat/0110175",
+     archivePrefix = "arXiv",
+     doi       = "10.1016/S0920-5632(01)01638-3",
+     SLACcitation  = "%%CITATION = HEP-LAT/0110175;%%"
+}
+@Article{Lesk:2002gd,
+     author    = "Lesk, V. I. and others",
+ collaboration = "CP-PACS",
+     title     = "Flavor singlet meson mass in the continuum limit in two-
+                  flavor lattice QCD",
+     journal   = "Phys. Rev.",
+     volume    = "D67",
+     year      = "2003",
+     pages     = "074503",
+     eprint    = "hep-lat/0211040",
+     SLACcitation  = "%%CITATION = HEP-LAT/0211040;%%"
+}
+@Article{Leutwyler:1997yr,
+     author    = "Leutwyler, H.",
+     title     = "{On the 1/N-expansion in chiral perturbation theory}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "64",
+     year      = "1998",
+     pages     = "223-231",
+     eprint    = "hep-ph/9709408",
+     SLACcitation  = "%%CITATION = HEP-PH/9709408;%%"
+}
+@Article{Leutwyler:2006qq,
+     author    = "Leutwyler, H.",
+     title     = "pi pi scattering",
+     year      = "2006",
+     eprint    = "hep-ph/0612112",
+     SLACcitation  = "%%CITATION = HEP-PH 0612112;%%"
+}
+@Article{Liu:1997fs,
+     author    = "Liu, C. and Jaster, A. and Jansen, K.",
+     title     = "Liapunov exponents and the reversibility of molecular
+                  dynamics  algorithms",
+     journal   = "Nucl. Phys.",
+     volume    = "B524",
+     year      = "1998",
+     pages     = "603-617",
+     eprint    = "hep-lat/9708017",
+     SLACcitation  = "%%CITATION = HEP-LAT 9708017;%%"
+}
+@Article{Luscher:1985dn,
+     author    = "L{\"u}scher, M.",
+     title     = "{Volume Dependence of the Energy Spectrum in Massive
+                  Quantum Field Theories. 1. Stable Particle States}",
+     journal   = "Commun. Math. Phys.",
+     volume    = "104",
+     year      = "1986",
+     pages     = "177",
+     doi       = "10.1007/BF01211589",
+     SLACcitation  = "%%CITATION = CMPHA,104,177;%%"
+}
+@Article{Luscher:1990ck,
+     author    = "L{\"u}scher, M. and Wolff, U.",
+     title     = "How to calculate the elastic scattering matrix in two-
+                  dimensional quantum field theories by numerical
+                  simulation",
+     journal   = "Nucl. Phys.",
+     volume    = "B339",
+     year      = "1990",
+     pages     = "222-252",
+     SLACcitation  = "%%CITATION = NUPHA,B339,222;%%"
+}
+@Article{Luscher:1993dy,
+     author    = "L{\"u}scher, Martin",
+     title     = "{A Portable high quality random number generator for
+                  lattice field theory simulations}",
+     journal   = "Comput. Phys. Commun.",
+     volume    = 79,
+     year      = 1994,
+     pages     = "100-110",
+     eprint    = "hep-lat/9309020",
+     archivePrefix = "arXiv",
+     doi       = "10.1016/0010-4655(94)90232-1",
+     SLACcitation  = "%%CITATION = HEP-LAT/9309020;%%"
+}
+@Article{Luscher:1993xx,
+     author    = "L{\"u}scher, Martin",
+     title     = "A New approach to the problem of dynamical quarks in
+                  numerical simulations of lattice {QCD}",
+     journal   = "Nucl. Phys.",
+     volume    = "B418",
+     year      = "1994",
+     pages     = "637-648",
+     eprint    = "hep-lat/9311007",
+     archivePrefix = "arXiv",
+     doi       = "10.1016/0550-3213(94)90533-9",
+     SLACcitation  = "%%CITATION = HEP-LAT/9311007;%%"
+}
+@Article{Luscher:1993xx,
+     author    = "L{\"u}scher, M.",
+     title     = "A New approach to the problem of dynamical quarks in
+                  numerical simulations of lattice {QCD}",
+     journal   = "Nucl. Phys.",
+     volume    = "B418",
+     year      = "1994",
+     pages     = "637-648",
+     eprint    = "hep-lat/9311007",
+     SLACcitation  = "%%CITATION = HEP-LAT 9311007;%%"
+}
+@Article{Luscher:1996sc,
+     author    = "L{\"u}scher, M. and Sint, S. and Sommer, R. and
+                  Weisz, P.",
+     title     = "Chiral symmetry and {O(a)} improvement in lattice {QCD}",
+     journal   = "Nucl. Phys.",
+     volume    = "B478",
+     year      = "1996",
+     pages     = "365-400",
+     eprint    = "hep-lat/9605038",
+     SLACcitation  = "%%CITATION = HEP-LAT 9605038;%%"
+}
+@Article{Luscher:1996ug,
+     author    = "L{\"u}scher, M. and Sint, S. and Sommer, R. and
+                  Weisz, P. and Wolff, U.",
+     title     = "Non-perturbative {O(a)} improvement of lattice {QCD}",
+     journal   = "Nucl. Phys.",
+     volume    = "B491",
+     year      = "1997",
+     pages     = "323-343",
+     eprint    = "hep-lat/9609035",
+     SLACcitation  = "%%CITATION = HEP-LAT 9609035;%%"
+}
+@Article{Luscher:1998pq,
+     author    = "L{\"u}scher, M.",
+     title     = "Exact chiral symmetry on the lattice and the {Ginsparg}-
+                  {Wilson} relation",
+     journal   = "Phys. Lett.",
+     volume    = "B428",
+     year      = "1998",
+     pages     = "342-345",
+     eprint    = "hep-lat/9802011",
+     SLACcitation  = "%%CITATION = HEP-LAT 9802011;%%"
+}
+@Article{Luscher:2001tx,
+     author    = "L{\"u}scher, Martin",
+     title     = "{Lattice QCD on PCs?}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "106",
+     year      = "2002",
+     pages     = "21-28",
+     eprint    = "hep-lat/0110007",
+     archivePrefix = "arXiv",
+     doi       = "10.1016/S0920-5632(01)01639-5",
+     SLACcitation  = "%%CITATION = HEP-LAT/0110007;%%"
+}
+@Article{Luscher:2003qa,
+     author    = "L{\"u}scher, M.",
+     title     = "Solution of the {D}irac equation in lattice {QCD} using a
+                  domain  decomposition method",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "156",
+     year      = "2004",
+     pages     = "209-220",
+     eprint    = "hep-lat/0310048",
+     SLACcitation  = "%%CITATION = HEP-LAT 0310048;%%"
+}
+@Article{Luscher:2004rx,
+     author    = "L{\"u}scher, M.",
+     title     = "Schwarz-preconditioned {HMC} algorithm for two-flavour
+                  lattice {QCD}",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "165",
+     year      = "2005",
+     pages     = "199",
+     eprint    = "hep-lat/0409106",
+     SLACcitation  = "%%CITATION = HEP-LAT 0409106;%%"
+}
+
+@Article{Luscher:2005mv,
+     author    = "L{\"u}scher, Martin",
+     title     = "Lattice {QCD} with light {W}ilson quarks",
+     journal   = "\href{http://pos.sissa.it/archive/conferences/020/008/LAT2005_002.pdf}{PoS(LAT2005)002}", 
+     year      = "2005",
+     eprint    = "hep-lat/0509152",
+     howpublished="Talk presented at International Symposium on Lattice Field Theory (Lattice 2005)",
+     SLACcitation  = "%%CITATION = HEP-LAT 0509152;%%"
+}
+@Article{Luscher:2007es,
+     author    = "L{\"u}scher, Martin",
+     title     = "{Deflation acceleration of lattice {QCD} simulations}",
+     journal   = "JHEP",
+     volume    = "12",
+     year      = "2007",
+     pages     = "011",
+     eprint    = "0710.5417",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     doi       = "10.1088/1126-6708/2007/12/011",
+     SLACcitation  = "%%CITATION = 0710.5417;%%"
+}
+@Article{Luscher:ranluxweb,
+     author    = "L{\"u}scher, M.",
+     title     = "Ranlux random number generator",
+     eprint    = "http://luscher.web.cern.ch/luscher/ranlux/"
+}
+@Article{Luscher:sse,
+     author    = "L{\"u}scher, M.",
+     title     = "Lattice QCD parallel benchmark programs",
+     eprint    = "http://luscher.web.cern.ch/luscher/QCDpbm/"
+}
+@Article{Madras:1988ei,
+     author    = "Madras, N. and Sokal, A. D.",
+     title     = "The Pivot algorithm: a highly efficient Monte Carlo method
+                  for selfavoiding walk",
+     journal   = "J. Statist. Phys.",
+     volume    = "50",
+     year      = "1988",
+     pages     = "109-186",
+     SLACcitation  = "%%CITATION = JSTPB,50,109;%%"
+}
+@Article{Martinelli:1982mw,
+     author    = "Martinelli, G. and Zhang, Yi-Cheng",
+     title     = "THE CONNECTION BETWEEN LOCAL OPERATORS ON THE LATTICE AND
+                  IN THE CONTINUUM AND ITS RELATION TO MESON DECAY
+                  CONSTANTS",
+     journal   = "Phys. Lett.",
+     volume    = "B123",
+     year      = "1983",
+     pages     = "433",
+     SLACcitation  = "%%CITATION = PHLTA,B123,433;%%"
+}
+@Article{Martinelli:1994ty,
+     author    = "Martinelli, G. and Pittori, C. and Sachrajda, Christopher
+                  T. and Testa, M. and Vladikas, A.",
+     title     = "{A General method for nonperturbative renormalization of
+                  lattice operators}",
+     journal   = "Nucl. Phys.",
+     volume    = "B445",
+     year      = "1995",
+     pages     = "81-108",
+     eprint    = "hep-lat/9411010",
+     archivePrefix = "arXiv",
+     doi       = "10.1016/0550-3213(95)00126-D",
+     SLACcitation  = "%%CITATION = HEP-LAT/9411010;%%"
+}
+@Article{McNeile:2000hf,
+     author    = "McNeile, C. and Michael, C.",
+     collaboration = "UKQCD",
+     title     = "The eta and eta' mesons in {QCD}",
+     journal   = "Phys. Lett.",
+     volume    = "B491",
+     year      = "2000",
+     pages     = "123-129",
+     eprint    = "hep-lat/0006020",
+     SLACcitation  = "%%CITATION = HEP-LAT 0006020;%%"
+}
+@Article{McNeile:2000xx,
+     author    = "McNeile, Craig and Michael, Chris",
+     collaboration = "UKQCD",
+     title     = "Mixing of scalar glueballs and flavour-singlet scalar
+                  mesons",
+     journal   = "Phys. Rev.",
+     volume    = "D63",
+     year      = "2001",
+     pages     = "114503",
+     eprint    = "hep-lat/0010019",
+     SLACcitation  = "%%CITATION = HEP-LAT0010019;%%"
+}
+@Article{McNeile:2001cr,
+     author    = "McNeile, C. and Michael, C. and Sharkey, K. J.",
+ collaboration = "UKQCD",
+     title     = "The flavor singlet mesons in {QCD}",
+     journal   = "Phys. Rev.",
+     volume    = "D65",
+     year      = "2002",
+     pages     = "014508",
+     eprint    = "hep-lat/0107003",
+     SLACcitation  = "%%CITATION = HEP-LAT 0107003;%%"
+}
+@Article{McNeile:2002fh,
+     author    = "McNeile, C. and Michael, C.",
+ collaboration = "UKQCD",
+     title     = "Hadronic decay of a vector meson from the lattice",
+     journal   = "Phys. Lett.",
+     volume    = "B556",
+     year      = "2003",
+     pages     = "177-184",
+     eprint    = "hep-lat/0212020",
+     SLACcitation  = "%%CITATION = HEP-LAT 0212020;%%"
+}
+@Article{McNeile:2006bz,
+     author    = "McNeile, C. and Michael, C.",
+     collaboration = "UKQCD",
+     title     = "Decay width of light quark hybrid meson from the lattice",
+     journal   = "Phys. Rev.",
+     volume    = "D73",
+     year      = "2006",
+     pages     = "074506",
+     eprint    = "hep-lat/0603007",
+     SLACcitation  = "%%CITATION = HEP-LAT 0603007;%%"
+}
+@Article{Meyer:2006ty,
+     author    = "Meyer, Harvey B. and others",
+     title     = "{Exploring the HMC trajectory-length dependence of
+                  autocorrelation times in lattice QCD}",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "176",
+     year      = "2007",
+     pages     = "91-97",
+     eprint    = "hep-lat/0606004",
+     archivePrefix = "arXiv",
+     doi       = "10.1016/j.cpc.2006.08.002",
+     SLACcitation  = "%%CITATION = HEP-LAT/0606004;%%"
+}
+@Article{Michael:1982gb,
+     author    = "Michael, C. and Teasdale, I.",
+     title     = "EXTRACTING GLUEBALL MASSES FROM LATTICE QCD",
+     journal   = "Nucl. Phys.",
+     volume    = "B215",
+     year      = "1983",
+     pages     = "433",
+     SLACcitation  = "%%CITATION = NUPHA,B215,433;%%"
+}
+@Article{Michael:1989mf,
+     author    = "Michael, C.",
+     title     = "Particle decay in lattice gauge theory",
+     journal   = "Nucl. Phys.",
+     volume    = "B327",
+     year      = "1989",
+     pages     = "515",
+     SLACcitation  = "%%CITATION = NUPHA,B327,515;%%"
+}
+@Article{Michael:1991nc,
+     author    = "Michael, C.",
+     title     = "Hadronic forces from the lattice",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "26",
+     year      = "1992",
+     pages     = "417-419",
+     SLACcitation  = "%%CITATION = NUPHZ,26,417;%%"
+}
+@Article{Michael:1993yj,
+     author    = "Michael, Christopher",
+     title     = "{Fitting correlated data}",
+     journal   = "Phys. Rev.",
+     volume    = "D49",
+     year      = "1994",
+     pages     = "2616-2619",
+     eprint    = "hep-lat/9310026",
+     archivePrefix = "arXiv",
+     doi       = "10.1103/PhysRevD.49.2616",
+     SLACcitation  = "%%CITATION = HEP-LAT/9310026;%%"
+}
+@Article{Michael:1994sz,
+     author    = "Michael, Christopher and McKerrell, A.",
+     title     = "{Fitting correlated hadron mass spectrum data}",
+     journal   = "Phys. Rev.",
+     volume    = "D51",
+     year      = "1995",
+     pages     = "3745-3750",
+     eprint    = "hep-lat/9412087",
+     archivePrefix = "arXiv",
+     doi       = "10.1103/PhysRevD.51.3745",
+     SLACcitation  = "%%CITATION = HEP-LAT/9412087;%%"
+}
+@Article{Michael:2007vn,
+     author    = "Michael, C. and Urbach, C.",
+ collaboration = "ETM",
+     title     = "Neutral mesons and disconnected diagrams in Twisted Mass
+                  QCD",
+     journal   = "",
+     volume    = "",
+     pages     = "",
+     year      = "2007",
+     eprint    = "0709.4564",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = ARXIV:0709.4564;%%"
+}
+@Book{Montvay:1994cy,
+     author    = "Montvay, I. and M{\"u}nster, G.",
+     title     = "Quantum fields on a lattice",
+     publisher = "Cambridge University Press",
+     year      = "1994",
+     series    = "Cambridge Monographs on Mathematical Physics",
+}
+@Article{Montvay:1995ea,
+     author    = "Montvay, I.",
+     title     = "An Algorithm for Gluinos on the Lattice",
+     journal   = "Nucl. Phys.",
+     volume    = "B466",
+     year      = "1996",
+     pages     = "259-284",
+     eprint    = "hep-lat/9510042",
+     SLACcitation  = "%%CITATION = HEP-LAT 9510042;%%"
+}
+@Article{Montvay:2005tj,
+     author    = "Montvay, I. and Scholz, E.",
+     title     = "Updating algorithms with multi-step stochastic correction",
+     journal   = "Phys. Lett.",
+     volume    = "B623",
+     year      = "2005",
+     pages     = "73-79",
+     eprint    = "hep-lat/0506006",
+     SLACcitation  = "%%CITATION = HEP-LAT 0506006;%%"
+}
+@Article{Morgan:2002a,
+  author       = "Morgan, R. B.",
+  title        = "GMRES with Deated Restarting",
+  journal      = "SIAM J. Sci. Comput.",
+  volume       = "24",
+  year         = "2002",
+  pages        = "20"
+}
+@Article{Morningstar:2003gk,
+     author    = "Morningstar, Colin and Peardon, Mike J.",
+     title     = "{Analytic smearing of SU(3) link variables in lattice
+                  QCD}",
+     journal   = "Phys. Rev.",
+     volume    = "D69",
+     year      = "2004",
+     pages     = "054501",
+     eprint    = "hep-lat/0311018",
+     archivePrefix = "arXiv",
+     doi       = "10.1103/PhysRevD.69.054501",
+     SLACcitation  = "%%CITATION = HEP-LAT/0311018;%%"
+}
+@Article{Munster:2004am,
+     author    = "M{\"u}nster, G.",
+     title     = "On the phase structure of twisted mass lattice {QCD}",
+     journal   = "JHEP",
+     volume    = "09",
+     year      = "2004",
+     pages     = "035",
+     eprint    = "hep-lat/0407006",
+     SLACcitation  = "%%CITATION = HEP-LAT 0407006;%%"
+}
+@Article{Munster:2004wt,
+     author    = "M{\"u}nster, Gernot and Schmidt, Christian and Scholz, Enno E.
+                  ",
+     title     = "Chiral perturbation theory for twisted mass {QCD}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "140",
+     year      = "2005",
+     pages     = "320-322",
+     eprint    = "hep-lat/0409066",
+     SLACcitation  = "%%CITATION = HEP-LAT 0409066;%%"
+}   
+@Article{Nagai:2005mi,
+     author    = "Nagai, Kei-ichi and Jansen, Karl",
+     title     = "Two-dimensional lattice Gross-Neveu model with Wilson
+                  twisted mass  fermions",
+     journal   = "Phys. Lett.",
+     volume    = "B633",
+     year      = "2006",
+     pages     = "325-330",
+     eprint    = "hep-lat/0510076",
+     SLACcitation  = "%%CITATION = HEP-LAT 0510076;%%"
+}
+@Unpublished{Nagai:priv,
+  author = 	 {Nagai, K},
+  title = 	 {Two-dimensional Gross-Neveu model with {Wilson}
+                  twisted mass fermions},
+  note = 	 {private communication},
+  OPTkey = 	 {},
+  OPTmonth = 	 {},
+  OPTyear = 	 {},
+  OPTannote = 	 {}
+}
+@Article{Necco:2001xg,
+     author    = "Necco, S. and Sommer, R.",
+     title     = "The {N(f)} = 0 heavy quark potential from short to
+                  intermediate  distances",
+     journal   = "Nucl. Phys.",
+     volume    = "B622",
+     year      = "2002",
+     pages     = "328-346",
+     eprint    = "hep-lat/0108008",
+     SLACcitation  = "%%CITATION = HEP-LAT 0108008;%%"
+}
+@Article{Necco:2003vh,
+     author    = "Necco, Silvia",
+     journal   = "Nucl. Phys.",
+     volume    = "B683",
+     year      = "2004",
+     pages     = "137-167",
+     eprint    = "hep-lat/0309017",
+     SLACcitation  = "%%CITATION = HEP-LAT 0309017;%%"
+}
+@Article{Neff:2001zr,
+     author    = "Neff, H. and Eicker, N. and Lippert, T. and Negele, J. W.
+                  and Schilling, K.",
+     title     = "On the low fermionic eigenmode dominance in {QCD} on the
+                  lattice",
+     journal   = "Phys. Rev.",
+     volume    = "D64",
+     year      = "2001",
+     pages     = "114509",
+     eprint    = "hep-lat/0106016",
+     SLACcitation  = "%%CITATION = HEP-LAT/0106016;%%"
+}
+@Article{Neuberger:1997fp,
+     author    = "Neuberger, H.",
+     title     = "Exactly massless quarks on the lattice",
+     journal   = "Phys. Lett.",
+     volume    = "B417",
+     year      = "1998",
+     pages     = "141-144",
+     eprint    = "hep-lat/9707022",
+     SLACcitation  = "%%CITATION = HEP-LAT 9707022;%%"
+}
+@Article{Neuberger:1998wv,
+     author    = "Neuberger, H.",
+     title     = "More about exactly massless quarks on the lattice",
+     journal   = "Phys. Lett.",
+     volume    = "B427",
+     year      = "1998",
+     pages     = "353-355",
+     eprint    = "hep-lat/9801031",
+     SLACcitation  = "%%CITATION = HEP-LAT 9801031;%%"
+}
+@Article{Niedermayer:1998bi,
+     author    = "Niedermayer, F.",
+     title     = "Exact chiral symmetry, topological charge and related
+                  topics",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "73",
+     year      = "1999",
+     pages     = "105-119",
+     eprint    = "hep-lat/9810026",
+     SLACcitation  = "%%CITATION = HEP-LAT 9810026;%%"
+}
+@Article{Nielsen:1980rz,
+     author    = "Nielsen, H. B. and Ninomiya, M.",
+     title     = "Absence of neutrinos on a lattice. 1. proof by homotopy
+                  theory",
+     journal   = "Nucl. Phys.",
+     volume    = "B185",
+     year      = "1981",
+     pages     = "20",
+     SLACcitation  = "%%CITATION = NUPHA,B185,20;%%"
+}
+@Article{Nielsen:1981hk,
+     author    = "Nielsen, H. B. and Ninomiya, M.",
+     title     = "No go theorem for regularizing chiral fermions",
+     journal   = "Phys. Lett.",
+     volume    = "B105",
+     year      = "1981",
+     pages     = "219",
+     SLACcitation  = "%%CITATION = PHLTA,B105,219;%%"
+}
+@Article{Nielsen:1981xu,
+     author    = "Nielsen, H. B. and Ninomiya, M.",
+     title     = "Absence of neutrinos on a lattice. 2. intuitive topological
+                  proof",
+     journal   = "Nucl. Phys.",
+     volume    = "B193",
+     year      = "1981",
+     pages     = "173",
+     SLACcitation  = "%%CITATION = NUPHA,B193,173;%%"
+}
+@Article{Noaki:1998zc,
+     author    = "Noaki, J. and Izubuchi, T. and Ukawa, A.",
+     title     = "Two-dimensional Gross-Neveu model with {Wilson} fermion
+                  action at finite temperature and density",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "73",
+     year      = "1999",
+     pages     = "483-485",
+     eprint    = "hep-lat/9809071",
+     SLACcitation  = "%%CITATION = HEP-LAT 9809071;%%"
+}
+@Article{Orginos:2001xa,
+     author    = "Orginos, K.",
+ collaboration = "RBC",
+     title     = "Chiral properties of domain wall fermions with improved
+                  gauge actions",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "106",
+     year      = "2002",
+     pages     = "721-723",
+     eprint    = "hep-lat/0110074",
+     SLACcitation  = "%%CITATION = HEP-LAT 0110074;%%"
+}
+@Article{Orth:2005kq,
+     author    = "Orth, B. and Lippert, T. and Schilling, K.",
+     title     = "Finite-size effects in lattice {QCD} with dynamical {Wilson}
+                  fermions",
+     journal   = "Phys. Rev.",
+     volume    = "D72",
+     year      = "2005",
+     pages     = "014503",
+     eprint    = "hep-lat/0503016",
+     SLACcitation  = "%%CITATION = HEP-LAT 0503016;%%"
+}
+@Article{Osterwalder:1973dx,
+     author    = "Osterwalder, K. and Schrader, R.",
+     title     = "Axioms for euclidean Green's functions",
+     journal   = "Commun. Math. Phys.",
+     volume    = "31",
+     year      = "1973",
+     pages     = "83-112",
+     SLACcitation  = "%%CITATION = CMPHA,31,83;%%"
+}
+@Article{Osterwalder:1975tc,
+     author    = "Osterwalder, K. and Schrader, R.",
+     title     = "Axioms for euclidean Green's functions. 2",
+     journal   = "Commun. Math. Phys.",
+     volume    = "42",
+     year      = "1975",
+     pages     = "281",
+     SLACcitation  = "%%CITATION = CMPHA,42,281;%%"
+}
+@Article{Osterwalder:1977pc,
+     author    = "Osterwalder, K. and Seiler, E.",
+     title     = "Gauge field theories on the lattice",
+     journal   = "Ann. Phys.",
+     volume    = "110",
+     year      = "1978",
+     pages     = "440",
+     SLACcitation  = "%%CITATION = APNYA,110,440;%%"
+}
+@Article{PDBook,
+     author = "Eidelman, S. and others",
+     title = "{Review of Particle Physics}",
+     journal = "{Physics Letters B}",
+     year = "2004",
+     volume = "592",
+     pages = {1+},
+     url = {http://pdg.lbl.gov}
+}
+@Article{Peardon:2002wb,
+     author    = "Peardon, M. J. and Sexton, J.",
+ collaboration = "TrinLat",
+     title     = "Multiple molecular dynamics time-scales in hybrid Monte
+                  Carlo fermion simulations",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "119",
+     year      = "2003",
+     pages     = "985-987",
+     eprint    = "hep-lat/0209037",
+     SLACcitation  = "%%CITATION = HEP-LAT 0209037;%%"
+}
+@Book{Peskin:1995ev,
+  author = 	 {Peskin, M. E. and Schroeder, D. V.},
+  title = 	 {An Introduction to quantum field theory},
+  publisher = 	 {Westview Press},
+  year = 	 {1995},
+  OPTkey = 	 {},
+  OPTvolume = 	 {},
+  OPTnumber = 	 {},
+  OPTseries = 	 {Advanced Book Program},
+  OPTaddress = 	 {Boulder, Colorado},
+  OPTedition = 	 {},
+  OPTmonth = 	 {},
+  OPTnote = 	 {},
+  OPTannote = 	 {}
+}
+@Article{Politzer:1973fx,
+     author    = "Politzer, H. D.",
+     title     = "Reliable perturbative results for strong interactions?",
+     journal   = "Phys. Rev. Lett.",
+     volume    = "30",
+     year      = "1973",
+     pages     = "1346-1349",
+     SLACcitation  = "%%CITATION = PRLTA,30,1346;%%"
+}
+@Article{Politzer:1974fr,
+     author    = "Politzer, H. D.",
+     title     = "Asymptotic freedom: an approach to strong interactions",
+     journal   = "Phys. Rept.",
+     volume    = "14",
+     year      = "1974",
+     pages     = "129-180",
+     SLACcitation  = "%%CITATION = PRPLC,14,129;%%"
+}
+@Manual{R:2005,
+    title = {R: A language and environment for statistical computing},
+    author = {{R Development Core Team}},
+    organization = {R Foundation for Statistical Computing},
+    address = {Vienna, Austria},
+    year = {2005},
+    note = {{ISBN} 3-900051-07-0},
+    url = {http://www.R-project.org},
+}
+
+@Book{Rothe:1992wy,
+     author    = "Rothe, H.J.",
+     title     = "Lattice gauge theories",
+     publisher = "World Scientific, Singapore",
+     year      = "1992",
+     pages     = "528",
+     edition   = "",
+}
+@Article{Rupak:2002sm,
+     author    = "Rupak, G. and Shoresh, N.",
+     title     = "Chiral perturbation theory for the {Wilson} lattice action",
+     journal   = "Phys. Rev.",
+     volume    = "D66",
+     year      = "2002",
+     pages     = "054503",
+     eprint    = "hep-lat/0201019",
+     SLACcitation  = "%%CITATION = HEP-LAT 0201019;%%"
+}
+
+@Article{Saad:1993a,
+  author  = "Saad, Y.",
+  title   = "A flexible inner-outer preconditioned GMRES altorithm",
+  journal = "SIAM J. Sci. Comput.",
+  volume  = "14 (2)",
+  year    = "1993",
+  page    = "461-469"  
+}
+@Article{Sachrajda:2004mi,
+     author    = "Sachrajda, C. T. and Villadoro, G.",
+     title     = "{Twisted boundary conditions in lattice simulations}",
+     journal   = "Phys. Lett.",
+     volume    = "B609",
+     year      = "2005",
+     pages     = "73-85",
+     eprint    = "hep-lat/0411033",
+     archivePrefix = "arXiv",
+     doi       = "10.1016/j.physletb.2005.01.033",
+     SLACcitation  = "%%CITATION = HEP-LAT/0411033;%%"
+}
+@Article{Scorzato:2004da,
+     author    = "Scorzato, L.",
+     title     = "Pion mass splitting and phase structure in twisted mass
+                  {QCD}",
+     journal   = "Eur. Phys. J.",
+     volume    = "C37",
+     year      = "2004",
+     pages     = "445-455",
+     eprint    = "hep-lat/0407023",
+     SLACcitation  = "%%CITATION = HEP-LAT 0407023;%%"
+}
+@Article{Scorzato:2005rb,
+     author    = "Scorzato, L. and others",
+     title     = "N(f) = 2 lattice {QCD} and chiral perturbation theory",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "153",
+     year      = "2006",
+     pages     = "283-290",
+     eprint    = "hep-lat/0511036",
+     SLACcitation  = "%%CITATION = HEP-LAT 0511036;%%"
+}
+
+@Article{Sexton:1992nu,
+     author    = "Sexton, J. C. and Weingarten, D. H.",
+     title     = "Hamiltonian evolution for the hybrid monte carlo
+                  algorithm",
+     journal   = "Nucl. Phys.",
+     volume    = "B380",
+     year      = "1992",
+     pages     = "665-678",
+     SLACcitation  = "%%CITATION = NUPHA,B380,665;%%"
+}
+
+@Article{Sharpe:1998xm,
+     author    = "Sharpe, S. R. and Singleton, R., Jr.",
+     title     = "Spontaneous flavor and parity breaking with {Wilson}
+                  fermions",
+     journal   = "Phys. Rev.",
+     volume    = "D58",
+     year      = "1998",
+     pages     = "074501",
+     eprint    = "hep-lat/9804028",
+     SLACcitation  = "%%CITATION = HEP-LAT 9804028;%%"
+}
+
+@Article{Sharpe:2004ny,
+     author    = "Sharpe, S. R. and Wu, Jackson M. S.",
+     title     = "Twisted mass chiral perturbation theory at next-to-leading
+                  order",
+     journal   = "Phys. Rev.",
+     volume    = "D71",
+     year      = "2005",
+     pages     = "074501",
+     eprint    = "hep-lat/0411021",
+     SLACcitation  = "%%CITATION = HEP-LAT 0411021;%%"
+}
+@Article{Sharpe:2004ps,
+     author    = "Sharpe, S. R. and Wu, J. M. S.",
+     title     = "The phase diagram of twisted mass lattice {QCD}",
+     journal   = "Phys. Rev.",
+     volume    = "D70",
+     year      = "2004",
+     pages     = "094029",
+     eprint    = "hep-lat/0407025",
+     SLACcitation  = "%%CITATION = HEP-LAT 0407025;%%"
+}
+@Article{Sharpe:2005rq,
+     author    = "Sharpe, Stephen R.",
+     title     = "Observations on discretization errors in twisted-mass
+                  lattice QCD",
+     journal   = "Phys. Rev.",
+     volume    = "D72",
+     year      = "2005",
+     pages     = "074510",
+     eprint    = "hep-lat/0509009",
+     SLACcitation  = "%%CITATION = HEP-LAT 0509009;%%"
+}
+@Article{Sheikholeslami:1985ij,
+     author    = "Sheikholeslami, B. and Wohlert, R.",
+     title     = "Improved continuum limit lattice action for qcd with {Wilson}
+                  fermions",
+     journal   = "Nucl. Phys.",
+     volume    = "B259",
+     year      = "1985",
+     pages     = "572",
+     SLACcitation  = "%%CITATION = NUPHA,B259,572;%%"
+}
+@Article{Shindler:2005vj,
+     author    = "Shindler, Andrea",
+     title     = "Twisted mass lattice {QCD}: Recent developments and results",
+     journal   = "PoS",
+     volume    = "LAT2005",
+     year      = "2006",
+     pages     = "014",
+     eprint    = "hep-lat/0511002",
+     SLACcitation  = "%%CITATION = HEP-LAT 0511002;%%"
+}
+@Article{Shindler:2006tm,
+     author    = "Shindler, A.",
+ collaboration = "ETM",
+     title     = "Lattice QCD with light twisted quarks: First results",
+     year      = "2006",
+     eprint    = "hep-ph/0611264",
+     SLACcitation  = "%%CITATION = HEP-PH 0611264;%%"
+}
+@Article{Shindler:2007vp,
+     author    = "Shindler, A.",
+     title     = "{Twisted mass lattice QCD}",
+     journal   = "Phys. Rept.",
+     volume    = "461",
+     year      = "2008",
+     pages     = "37-110",
+     eprint    = "0707.4093",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     doi       = "10.1016/j.physrep.2008.03.001",
+     SLACcitation  = "%%CITATION = 0707.4093;%%"
+}
+@Article{Sleijpen:1996aa,
+     author    = "G. L. G. Sleijpen and H. A. Van der Vorst",
+     title     = "A Jacobi-Davidson iteration method for linear 
+                  eigenvalue problems",
+     journal   = "SIAM Journal on Matrix Analysis and Applications",
+     volume    = "17",
+     year      = "1996",
+     pages     = "401-425",
+}
+@Article{Sommer:1993ce,
+     author    = "Sommer, R.",
+     title     = "A New way to set the energy scale in lattice gauge theories
+                  and its applications to the static force and alpha-s in
+                  SU(2) Yang-Mills theory",
+     journal   = "Nucl. Phys.",
+     volume    = "B411",
+     year      = "1994",
+     pages     = "839-854",
+     eprint    = "hep-lat/9310022",
+     SLACcitation  = "%%CITATION = HEP-LAT 9310022;%%"
+}
+@Article{Sonneveld:1989cgs,
+ author = {Peter Sonneveld},
+ title = {CGS, a fast Lanczos-type solver for nonsymmetric linear systems},
+ journal = {SIAM J. Sci. Stat. Comput.},
+ volume = {10},
+ number = {1},
+ year = {1989},
+ issn = {0196-5204},
+ pages = {36--52},
+ publisher = {Society for Industrial and Applied Mathematics},
+ address = {Philadelphia, PA, USA},
+ }
+@Article{Sternbeck:2003gy,
+     author    = "Sternbeck, A. and Ilgenfritz, E.-M. and Kerler, W.
+                  and M{\"u}ller-Preu{\ss}ker, M. and St{\"u}ben, H.",
+     title     = "The {Aoki} phase for {N(f)} = 2 {Wilson} fermions revisited",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "129",
+     year      = "2004",
+     pages     = "898-900",
+     eprint    = "hep-lat/0309059",
+     SLACcitation  = "%%CITATION = HEP-LAT 0309059;%%"
+}
+@Article{Sternbeck:2005tk,
+     author    = "Sternbeck, A. and Ilgenfritz, E. -M. and Mueller-Preussker,
+                  M. and Schiller, A.",
+     title     = "{Going infrared in SU(3) Landau gauge gluodynamics}",
+     journal   = "Phys. Rev.",
+     volume    = "D72",
+     year      = "2005",
+     pages     = "014507",
+     eprint    = "hep-lat/0506007",
+     SLACcitation  = "%%CITATION = HEP-LAT/0506007;%%"
+}
+@Article{Symanzik:1983dc,
+     author    = "Symanzik, K.",
+     title     = "Continuum limit and improved action in lattice theories. 1.
+                  principles and phi**4 theory",
+     journal   = "Nucl. Phys.",
+     volume    = "B226",
+     year      = "1983",
+     pages     = "187",
+     SLACcitation  = "%%CITATION = NUPHA,B226,187;%%"
+}
+@Conference{Symanzik:1981hc,
+     author    = "Symanzik, K.",
+     title     = "Some topics in quantum field theory",
+     booktitle = "Mathematical problems in theoretical physics",
+     journal   = "Lecture Notes in Physics",
+     volume    = "153",
+     year      = "1981",
+     pages     = "47-58",
+     editor    = "R. Schrader et al.",
+     note      = "Presented at 6th Int. Conf. on Mathematical Physics,
+                  Berlin, West Germany"
+}
+@Article{Symanzik:1983gh,
+     author    = "Symanzik, K.",
+     title     = "Continuum limit and improved action in lattice theories. 2.
+                  O(N) nonlinear sigma model in perturbation theory",
+     journal   = "Nucl. Phys.",
+     volume    = "B226",
+     year      = "1983",
+     pages     = "205",
+     SLACcitation  = "%%CITATION = NUPHA,B226,205;%%"
+}
+@Article{Takaishi:1996xj,
+     author    = "Takaishi, T.",
+     title     = "Heavy quark potential and effective actions on blocked
+                  configurations",
+     journal   = "Phys. Rev.",
+     volume    = "D54",
+     year      = "1996",
+     pages     = "1050-1053",
+     SLACcitation  = "%%CITATION = PHRVA,D54,1050;%%"
+}
+@Article{Takaishi:2005tz,
+     author    = "Takaishi, Tetsuya and de Forcrand, Philippe",
+     title     = "{Testing and tuning new symplectic integrators for hybrid
+                  Monte Carlo  algorithm in lattice QCD}",
+     journal   = "Phys. Rev.",
+     volume    = "E73",
+     year      = "2006",
+     pages     = "036706",
+     eprint    = "hep-lat/0505020",
+     archivePrefix = "arXiv",
+     doi       = "10.1103/PhysRevE.73.036706",
+     SLACcitation  = "%%CITATION = HEP-LAT/0505020;%%"
+}
+@Article{Takeda:2004xh,
+     author    = "Takeda, S. and others",
+     title     = "A scaling study of the step scaling function in SU(3) gauge
+                  theory with  improved gauge actions",
+     journal   = "Phys. Rev.",
+     volume    = "D70",
+     year      = "2004",
+     pages     = "074510",
+     eprint    = "hep-lat/0408010",
+     SLACcitation  = "%%CITATION = HEP-LAT 0408010;%%"
+}
+@Article{Ukawa:2002pc,
+     author    = "Ukawa, A.",
+ collaboration = "CP-PACS and JL{QCD}",
+     title     = "Computational cost of full {QCD} simulations experienced by
+                  {CP-PACS and JLQCD Collaborations}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "106",
+     year      = "2002",
+     pages     = "195-196",
+     SLACcitation  = "%%CITATION = NUPHZ,106,195;%%"
+}
+@Article{Urbach:2005ji,
+     author    = "Urbach, C. and Jansen, K. and Shindler, A. and Wenger, U.",
+     title     = "{HMC} algorithm with multiple time scale integration and mass
+                  preconditioning",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "174",
+     year      = "2006",
+     pages     = "87-98",
+     eprint    = "hep-lat/0506011",
+     SLACcitation  = "%%CITATION = HEP-LAT 0506011;%%"
+}
+@Article{Urbach:2007rt,
+     author    = "Urbach, Carsten",
+ collaboration = "ETM",
+     title     = "{Lattice QCD with two light Wilson quarks and maximally
+                  twisted mass}",
+     journal   = "PoS",
+     volume    = "LAT2007",
+     year      = "2007",
+     pages     = "022",
+     eprint    = "0710.1517",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = 0710.1517;%%"
+}
+@Article{WalkerLoud:2005bt,
+     author    = "Walker-Loud, Andre and Wu, Jackson M. S.",
+     title     = "{Nucleon and Delta masses in twisted mass chiral
+                  perturbation theory}",
+     journal   = "Phys. Rev.",
+     volume    = "D72",
+     year      = "2005",
+     pages     = "014506",
+     eprint    = "hep-lat/0504001",
+     archivePrefix = "arXiv",
+     doi       = "10.1103/PhysRevD.72.014506",
+     SLACcitation  = "%%CITATION = HEP-LAT/0504001;%%"
+}
+@Article{Weinberg:1973un,
+     author    = "Weinberg, S.",
+     title     = "Nonabelian gauge theories of the strong interactions",
+     journal   = "Phys. Rev. Lett.",
+     volume    = "31",
+     year      = "1973",
+     pages     = "494-497",
+     SLACcitation  = "%%CITATION = PRLTA,31,494;%%"
+}
+@Article{Weinberg:1978kz,
+     author    = "Weinberg, S.",
+     title     = "Phenomenological Lagrangians",
+     journal   = "Physica",
+     volume    = "A96",
+     year      = "1979",
+     pages     = "327",
+     SLACcitation  = "%%CITATION = PHYSA,A96,327;%%"
+}
+@Book{Weinberg:1995mt,
+     author    = "Weinberg, S.",
+     title     = "The Quantum theory of fields. Vol. 1: Foundations",
+     publisher = "Cambridge University Press",
+     year      = "1995",
+     pages     = "609",
+}
+@Article{Weisz:1982zw,
+     author    = "Weisz, P.",
+     title     = "Continuum limit improved lattice action for pure {Yang-Mills}
+                  theory. 1",
+     journal   = "Nucl. Phys.",
+     volume    = "B212",
+     year      = "1983",
+     pages     = "1",
+     SLACcitation  = "%%CITATION = NUPHA,B212,1;%%"
+}
+@Article{Weisz:1983bn,
+     author    = "Weisz, P. and Wohlert, R.",
+     title     = "Continuum limit improved lattice action for pure {Yang-Mills}
+                  theory. 2",
+     journal   = "Nucl. Phys.",
+     volume    = "B236",
+     year      = 1984,
+     pages     = 397,
+     SLACcitation  = "%%CITATION = NUPHA,B236,397;%%"
+}
+@Article{Wennekers:2005wa,
+     author    = "Wennekers, J. and Wittig, H.",
+     title     = "On the renormalized scalar density in quenched QCD",
+     year      = "2005",
+     eprint    = "hep-lat/0507026",
+     SLACcitation  = "%%CITATION = HEP-LAT 0507026;%%"
+}
+@Article{Weyl:1918ib,
+     author    = "Weyl, H.",
+     title     = "Gravitation und Elektrizit{\"a}t",
+     journal   = "Sitzungsber. Preuss. Akad. Wiss. Berlin (Math. Phys. )",
+     volume    = "1918",
+     year      = "1918",
+     pages     = "465",
+     SLACcitation  = "%%CITATION = SPWPA,1918,465;%%"
+}
+@Article{Weyl:1929fm,
+     author    = "Weyl, H.",
+     title     = "Electron and gravitation",
+     journal   = "Z. Phys.",
+     volume    = "56",
+     year      = "1929",
+     pages     = "330-352",
+     SLACcitation  = "%%CITATION = ZEPYA,56,330;%%"
+}
+@Article{Wilson:1974sk,
+     author    = "Wilson, K. G.",
+     title     = "Confinement of quarks",
+     journal   = "Phys. Rev.",
+     volume    = "D10",
+     year      = "1974",
+     pages     = "2445-2459",
+     SLACcitation  = "%%CITATION = PHRVA,D10,2445;%%"
+}
+@Article{Wilson:1974sk,
+     author    = "Wilson, K. G.",
+     title     = "Confinement of quarks",
+     journal   = "Phys. Rev.",
+     volume    = "D10",
+     year      = "1974",
+     pages     = "2445-2459",
+     SLACcitation  = "%%CITATION = PHRVA,D10,2445;%%"
+}
+@Article{Wilson:1975mb,
+     author    = "Wilson, K. G.",
+     title     = "The renormalization group: Critical phenomena and the kondo
+                  problem",
+     journal   = "Rev. Mod. Phys.",
+     volume    = "47",
+     year      = "1975",
+     pages     = "773",
+     SLACcitation  = "%%CITATION = RMPHA,47,773;%%"
+}
+@Article{Wilson:1975mb,
+     author    = "Wilson, K. G.",
+     title     = "The renormalization group: Critical phenomena and the kondo
+                  problem",
+     journal   = "Rev. Mod. Phys.",
+     volume    = "47",
+     year      = "1975",
+     pages     = "773",
+     SLACcitation  = "%%CITATION = RMPHA,47,773;%%"
+}
+@Article{Wolff:2003sm,
+     author    = "Wolff, U.",
+ collaboration = "ALPHA",
+     title     = "Monte Carlo errors with less errors",
+     journal   = "Comput. Phys. Commun.",
+     volume    = "156",
+     year      = "2004",
+     pages     = "143-153",
+     eprint    = "hep-lat/0306017",
+     SLACcitation  = "%%CITATION = HEP-LAT 0306017;%%"
+}
+@Article{Yang:1954ek,
+     author    = "Yang, C.-N. and Mills, R. L.",
+     title     = "Conservation of isotopic spin and isotopic gauge
+                  invariance",
+     journal   = "Phys. Rev.",
+     volume    = "96",
+     year      = "1954",
+     pages     = "191-195",
+     SLACcitation  = "%%CITATION = PHRVA,96,191;%%"
+}
+@Article{Yoshie:2008aw,
+     author    = "Yoshie, Tomoteru",
+     title     = "{Making use of the International Lattice Data Grid}",
+     journal   = "PoS",
+     volume    = "LATTICE2008",
+     year      = "2008",
+     pages     = "019",
+     eprint    = "0812.0849",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = 0812.0849;%%"
+}
+@Article{Zweig:1964jf,
+     author    = "Zweig, G.",
+     title     = "An SU(3) model for strong interaction symmetry and its
+                  breaking. 2",
+     note     = "CERN-TH-412"
+}
+@Article{cln:web,
+  author = 	 {},
+  eprint =       {http://www.ginac.de/CLN/}
+}
+@Article{deForcrand:1995bs,
+     author    = "de Forcrand, P.",
+     title     = "Progress on lattice {QCD} algorithms",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "47",
+     year      = "1996",
+     pages     = "228-235",
+     eprint    = "hep-lat/9509082",
+     SLACcitation  = "%%CITATION = HEP-LAT 9509082;%%"
+}
+@Article{deForcrand:1996bx,
+     author    = "de Forcrand, P. and others",
+ collaboration = "{QCD}-TARO",
+     title     = "Search for effective lattice action of pure {QCD}",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "53",
+     year      = "1997",
+     pages     = "938-941",
+     eprint    = "hep-lat/9608094",
+     SLACcitation  = "%%CITATION = HEP-LAT 9608094;%%"
+}
+@Article{deForcrand:1996ck,
+     author    = "de Forcrand, P. and Takaishi, T.",
+     title     = "Fast fermion Monte Carlo",
+     journal   = "Nucl. Phys. Proc. Suppl.",
+     volume    = "53",
+     year      = "1997",
+     pages     = "968-970",
+     eprint    = "hep-lat/9608093",
+     SLACcitation  = "%%CITATION = HEP-LAT 9608093;%%"
+}
+@Article{etmc:asqr,
+     author    = "Frezzotti, R. et al.",
+     title     = "{O(a^2) cutoff effects in Wilson fermion simulations}",
+     journal   = "PoS",
+     volume    = "LAT2007",
+     year      = "2007",
+     pages     = "277",
+     eprint    = "0710.2492",
+     archivePrefix = "arXiv",
+     primaryClass  =  "hep-lat",
+     SLACcitation  = "%%CITATION = 0710.2492;%%"
+}
+@Article{ildg:web,
+  eprint = 	 {http://cssm.sasr.edu.au/ildg/},
+  author =	 {}
+}
+@Book{kleinert:1,
+     author    = "Kleinert, H.",
+     title     = "Path integrals in quantum mechanics, statistics and polymer ph
+ysics",
+     publisher = "World Scientific, Singapore",
+     year      = "1995",
+     edition   = "2nd Edition",
+}
+@Article{lapack:web,
+  author = 	 {},
+  eprint =       {http://www.netlib.org/lapack/}
+}
+@Article{lime:web,
+  author = 	 {USQCD},
+  title = 	 {c-lime library},
+  eprint =       {http://usqcd.jlab.org/usqcd-docs/c-lime/}
+}
+@Article{hmc:web,
+  author = 	 {},
+  title = 	 {tmLQCD},
+  eprint =       {http://www.carsten-urbach.eu/}
+}
+@Book{meister:1999,
+  author = 	 {Meister, Andreas},
+  title = 	 {Numerik linearer Gleichungssysteme},
+  publisher = 	 {vieweg},
+  year = 	 {1999},
+  OPTkey = 	 {},
+  OPTvolume = 	 {},
+  OPTnumber = 	 {},
+  OPTseries = 	 {},
+  OPTaddress = 	 {},
+  OPTedition = 	 {},
+  OPTmonth = 	 {},
+  OPTnote = 	 {},
+  OPTannote = 	 {}
+}
+@Manual{minuit,
+  title = 	 {MINUIT home page},
+  note= {\\seal.web.cern.ch/seal/snapshot/work-packages/mathlibs/minuit/home.html}
+}
+@Article{mpi:web,
+  author =       {},
+  title  =       {The message passing interface standard},
+  eprint =       {http://www-unix.mcs.anl.gov/mpi/}
+}
+@PhdThesis{orth:2004phd,
+  author = 	 {Orth, B.},
+  title = 	 {Finite size effects in lattice {QCD}
+                  with dynamical {Wilson} fermions},
+  school = 	 {Bergische Universit{\"a}t Wuppertal},
+  year = 	 {2004},
+  OPTkey = 	 {},
+  OPTtype = 	 {},
+  OPTaddress = 	 {},
+  OPTmonth = 	 {},
+  OPTnote = 	 {},
+  OPTannote = 	 {}
+}
+@PhdThesis{pleiter:phd,
+  author = 	 {Pleiter, D.},
+  title = 	 {XXX},
+  school = 	 {Freie {U}niversit�t {B}erlin},
+  year = 	 {2001}
+}
+@book{press:1992,
+	address = {Cambridge, UK},
+	author = {Press, William   and Teukolsky, Saul   and Vetterling, William   and Flannery, Brian  },
+	citeulike-article-id = {767703},
+	edition = {2nd},
+	keywords = {bibtex-import},
+	posted-at = {2006-07-21 00:26:35},
+	priority = {0},
+	publisher = {Cambridge University Press},
+	title = {Numerical Recipes in C},
+	year = {1992}
+}
+@Manual{root,
+  title = 	 {The ROOT system home page},
+  note = {root.cern.ch/}
+}
+
+@Book{saad:2003a,
+     author    = "Y. Saad",
+     title     = "Iterative Methods for sparse linear systems",
+     publisher = "SIAM",
+     year      = "2003",
+     edition   = "2nd",
+}
+
+@Article{scidac,
+  author = 	 {},
+  eprint =       {http://www.scidac.gov/}
+}
+@MastersThesis{urbach:2002aa,
+  author = 	 {Urbach, C.},
+  title = 	 {Untersuchung der {R}eversibilit{\"a}tsverletzung im {H}ybrid
+                  {M}onte {C}arlo {A}lgorithmus},
+  school = 	 {Freie Universit{\"a}t Berlin, Fachbereich Physik},
+  year = 	 {2002}
+}
diff --git a/doc/eo_pre.tex b/doc/eo_pre.tex
index 228ee3062..1fa2b4b9c 100644
--- a/doc/eo_pre.tex
+++ b/doc/eo_pre.tex
@@ -163,7 +163,7 @@ \subsubsection{Symmetric even/odd Preconditioning}
 to  
 \begin{equation}
   \begin{split}
-    \label{eq:eo9}
+    \label{eq:sym9}
     X &= 
     \begin{pmatrix}
       -(M_{ee}^-)^{-1}M_{eo}(M_{oo}^-)^{-1}X_o \\ X_o\\
diff --git a/doc/main.tex b/doc/main.tex
index 2643b44aa..d749171e2 100644
--- a/doc/main.tex
+++ b/doc/main.tex
@@ -39,6 +39,10 @@
   Copyright \textcopyright\ 2009 Carsten Urbach
 \end{flushright}
 
+\section{Theoretical Background}
+
+\myinput{basis}
+
 \section{Installation and Usage}
 
 \myinput{install}
diff --git a/doc/martins-trick.tex b/doc/martins-trick.tex
index 170afd0af..2ff9bed4c 100644
--- a/doc/martins-trick.tex
+++ b/doc/martins-trick.tex
@@ -171,7 +171,7 @@ \subsection{Hasenbusch trick for dynamical tmQCD}
 This can again be used also with symmetrical even/odd preconditioning
 by re-defining $Y_{1,2}$ and $X_{1,2}$
 \begin{equation}
-  \label{eq:mt7}
+  \label{eq:mt9}
   \begin{split}
     Y_1 &= 
     \begin{pmatrix}

From a22cf8e55357af769da67769fc8d6ee984c06209 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 29 May 2012 19:38:22 +0200
Subject: [PATCH 013/110] some mistakes corrected and nd-twisted-clover notes
 started

---
 doc/basis.tex  |  2 +-
 doc/eo_pre.tex | 24 ++++++++++++++++--------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/doc/basis.tex b/doc/basis.tex
index 46a21b7e8..22e2d81b8 100644
--- a/doc/basis.tex
+++ b/doc/basis.tex
@@ -53,7 +53,7 @@ \subsection{QCD on a lattice}
 \begin{equation}
   \label{eq:Dh}
   D_h(\bar\mu, \bar\epsilon)  = D_\mathrm{W}\ 1_f +
-  i\bar\mu\gamma_5\tau^3 + \bar\epsilon \tau^1 \, .
+  i\bar\mu\gamma_5\tau^3 - \bar\epsilon \tau^1 \, .
 \end{equation}
 Note that this notation is not unique. Equivalently -- as used in
 Ref.~\cite{Chiarappa:2006ae} -- one may write
diff --git a/doc/eo_pre.tex b/doc/eo_pre.tex
index 1fa2b4b9c..c48f367f1 100644
--- a/doc/eo_pre.tex
+++ b/doc/eo_pre.tex
@@ -194,37 +194,37 @@ \subsubsection{Mass non-degenerate flavour doublet}
   \begin{split}
     Q^h &=
     \begin{pmatrix}
-      (\gamma_5+i\bar\mu\tau^3 -\bar\epsilon\tau^1) & Q^h_{eo}\\
-      Q^h_{oe} & (\gamma_5+i\bar\mu\tau^3 -\bar\epsilon\tau^1)\\
+      (\gamma_5+i\bar\mu\tau^3 -\bar\epsilon\gamma_5\tau^1) & Q^h_{eo}\\
+      Q^h_{oe} & (\gamma_5+i\bar\mu\tau^3 -\bar\epsilon\gamma_5\tau^1)\\
     \end{pmatrix} \\
     &=
     \begin{pmatrix}
       Q^h_{ee} & 0 \\
-      Q^h_{oe} & 0 \\
+      Q^h_{oe} & 1 \\
     \end{pmatrix}
     \cdot
     \begin{pmatrix}
       1 & (Q^h_{ee})^{-1}Q_{eo} \\
-      0 & Q^h_{oo} \\
+      0 & \hat Q^h_{oo} \\
     \end{pmatrix} \\
   \end{split}
 \end{equation}
-where $Q^h_{oo}$ is given in flavour space by
+where $\hat Q^h_{oo}$ is given in flavour space by
 \begin{equation*}
-  Q^h_{oo} = \gamma_5
+  \hat Q^h_{oo} = \gamma_5
   \begin{pmatrix}
     1 + i\bar\mu\gamma_5 -
     \frac{M_{oe}(1-i\bar\mu\gamma_5)M_{eo}}{1+\bar\mu^2-\bar\epsilon^2} & 
     -\bar\epsilon\left(1+\frac{M_{oe}M_{eo}}{1+\bar\mu^2-\bar\epsilon^2}\right) \\
     -\bar\epsilon\left(1+\frac{M_{oe}M_{eo}}{1+\bar\mu^2-\bar\epsilon^2}\right) & 
     1 - i\bar\mu\gamma_5 -
-    \frac{M_{oe}(1-i\bar\mu\gamma_5)M_{eo}}{1+\bar\mu^2-\bar\epsilon^2}\\
+    \frac{M_{oe}(1+i\bar\mu\gamma_5)M_{eo}}{1+\bar\mu^2-\bar\epsilon^2}\\
   \end{pmatrix}
 \end{equation*}
 with the previous definitions of $M_{eo}$ etc. The inplementation for
 the HMC is very similar to the mass degenerate case.
 
-\subsubsection{Clover term and Twisted mass term}
+\subsubsection{Combining Clover and Twisted mass term}
 
 We start again with the lattice fermion action in the hopping
 parameter representation in the $\chi$-basis now including the clover
@@ -486,6 +486,14 @@ \subsubsection{Clover term and Twisted mass term}
 computed. Finally, $\delta T_{ee}$ is computed and combined with the
 insertion matrices.
 
+\subsubsection{Combining Clover and Nondegenerate Twisted mass term}
+
+Now we have
+\[
+\hat Q^h_{oo} = \gamma_5(M_{oo}^h -
+(M_{oe}^h\ (1+T_{ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1)^{-1}\ M_{eo}^h)
+\]
+
 \subsection{Inversion}
 
 In addition to even/odd preconditioning in the HMC algorithm as

From 048bf99cf4034f17027313f1f9eb8430b030faff Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 30 May 2012 00:19:07 +0200
Subject: [PATCH 014/110] docu for nd clover added

---
 doc/eo_pre.tex | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/eo_pre.tex b/doc/eo_pre.tex
index c48f367f1..325e5e7e6 100644
--- a/doc/eo_pre.tex
+++ b/doc/eo_pre.tex
@@ -493,6 +493,13 @@ \subsubsection{Combining Clover and Nondegenerate Twisted mass term}
 \hat Q^h_{oo} = \gamma_5(M_{oo}^h -
 (M_{oe}^h\ (1+T_{ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1)^{-1}\ M_{eo}^h)
 \]
+where
+\[
+(1+T_{ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1)^{-1} =
+\frac{1}{(1+T_{ee})^2 + \bar\mu^2 - \bar\epsilon^2}
+(1+T_{ee}-i\bar\mu\gamma_5\tau^3+\bar\epsilon\tau^1)
+\]
+if $(1+T_{ee})^2$ is symmetric.
 
 \subsection{Inversion}
 

From cc90a5377d2ddf466887c5fcd47f7680fbccb35d Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 30 May 2012 13:51:14 +0200
Subject: [PATCH 015/110] moved test to c99complex

---
 tests/test_su3_algebra.c | 65 ++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 33 deletions(-)

diff --git a/tests/test_su3_algebra.c b/tests/test_su3_algebra.c
index f6c11ccfc..d0be8020f 100644
--- a/tests/test_su3_algebra.c
+++ b/tests/test_su3_algebra.c
@@ -1,5 +1,5 @@
 #include <config.h>
-
+#include <complex.h>
 #include <cu/cu.h>
 
 #include "../su3.h"
@@ -12,18 +12,17 @@ TEST(su3_assign) {
   su3 m1,m2;
   
   int test = 0;
-
-  m1.c00.re = 1; m1.c00.im = 1;  m1.c01.re = 0; m1.c01.im = 0;  m1.c02.re=0; m1.c02.im=0;
-  m1.c10.re = 0; m1.c10.im = 0;  m1.c11.re = 1; m1.c11.im = 1;  m1.c12.re=0; m1.c12.im=0;
-  m1.c20.re = 0; m1.c20.im = 0;  m1.c21.re = 0; m1.c21.im = 0;  m1.c22.re=1; m1.c22.im=1;
+  m1.c00 = 1 + 1.*I; m1.c01 = 0.; m1.c02 = 0.;
+  m1.c10 = 0.; m1.c11 = 1 + 1.*I; m1.c12 = 0.;
+  m1.c20 = 0.; m1.c21 = 0.; m1.c22 = 1 + 1.*I;
 
   _su3_assign(m2,m1);
 
-  if( m2.c00.re == 1 && m2.c00.im == 1 && m2.c01.re == 0 && m2.c01.im == 0 && m2.c02.re == 0 && m2.c02.im == 0 &&
- m2.c10.re == 0 && m2.c10.im == 0 && m2.c11.re == 1 && m2.c11.im == 1 && m2.c12.re == 0 && m2.c12.im == 0 &&
- m2.c20.re == 0 && m2.c20.im == 0 && m2.c21.re == 0 && m2.c21.im == 0 && m2.c22.re == 1 && m2.c22.im == 1 )
+  if( creal(m2.c00) == 1 && cimag(m2.c00) == 1 && creal(m2.c01) == 0 && cimag(m2.c01) == 0 && creal(m2.c02) == 0 && cimag(m2.c02) == 0 &&
+      creal(m2.c10) == 0 && cimag(m2.c10) == 0 && creal(m2.c11) == 1 && cimag(m2.c11) == 1 && creal(m2.c12) == 0 && cimag(m2.c12) == 0 &&
+      creal(m2.c20) == 0 && cimag(m2.c20) == 0 && creal(m2.c21) == 0 && cimag(m2.c21) == 0 && creal(m2.c22) == 1 && cimag(m2.c22) == 1 )
     test = 1;
-
+  
   assertTrueM(test,"The SU3 assignment operator does not work correctly!\n");
 }
 
@@ -34,36 +33,36 @@ TEST(su3_expo_positivedet) {
   int test = 0;
 
   /* Positive determinant */
-  Q.c00.re = -0.2994; Q.c00.im = 0.0;
-  Q.c01.re =  0.5952; Q.c01.im = 1.3123;
-  Q.c02.re = -0.7943; Q.c02.im = 0.0913;
-  Q.c11.re = -1.1430; Q.c11.im = 0.0;
-  Q.c12.re = -2.0025; Q.c12.im = 0.2978;
-  Q.c22.re = +1.4424; Q.c22.im = 0.0;
-  Q.c10.re = Q.c01.re; Q.c10.im = -Q.c01.im;
-  Q.c20.re = Q.c02.re; Q.c20.im = -Q.c02.im;
-  Q.c21.re = Q.c12.re; Q.c21.im = -Q.c12.im;
+  Q.c00 = -0.2994;
+  Q.c01 =  0.5952 + 1.3123*I;
+  Q.c02 = -0.7943 + 0.0913*I;
+  Q.c11 = -1.1430;
+  Q.c12 = -2.0025 + 0.2978*I;
+  Q.c22 = +1.4424;
+  Q.c10 = conj(Q.c01);
+  Q.c20 = conj(Q.c02);
+  Q.c21 = conj(Q.c12);
   
   /* Matlab's solution for U = exp(i * Q) */
-  U.c00.re = +0.3391; U.c00.im = -0.1635;
-  U.c01.re = -0.2357; U.c01.im = +0.5203;
-  U.c02.re = +0.5609; U.c02.im = +0.4663;
-  U.c10.re = -0.0740; U.c10.im = -0.4204;
-  U.c11.re = -0.7706; U.c11.im = -0.1863;
-  U.c12.re = +0.1191; U.c12.im = -0.4185;
-  U.c20.re = +0.5351; U.c20.im = -0.6243;
-  U.c21.re = +0.1825; U.c21.im = +0.1089;
-  U.c22.re = -0.5279; U.c22.im = -0.0022;
+  U.c00 = +0.3391 -0.1635*I;
+  U.c01 = -0.2357 +0.5203*I;
+  U.c02 = +0.5609 +0.4663*I;
+  U.c10 = -0.0740 -0.4204*I;
+  U.c11 = -0.7706 -0.1863*I;
+  U.c12 = +0.1191 -0.4185*I;
+  U.c20 = +0.5351 -0.6243*I;
+  U.c21 = +0.1825 +0.1089*I;
+  U.c22 = -0.5279 -0.0022*I;
 
   _trace_lambda(T,Q);
   Q = exposu3(T);
 
-  if( Q.c00.re - U.c00.re > EPS &&  Q.c01.re - U.c01.re > EPS && Q.c02.re - U.c02.re > EPS &&
-  Q.c10.re - U.c10.re > EPS && Q.c11.re - U.c11.re > EPS && Q.c12.re - U.c12.re > EPS &&   
-  Q.c20.re - U.c20.re > EPS && Q.c21.re - U.c21.re > EPS && Q.c22.re - U.c22.re > EPS &&
-  Q.c00.im - U.c00.im > EPS && Q.c01.im - U.c01.im > EPS && Q.c02.im - U.c02.im > EPS &&
-  Q.c10.im - U.c10.im > EPS && Q.c11.im - U.c11.im > EPS && Q.c12.im - U.c12.im > EPS &&   
-  Q.c20.im - U.c20.im > EPS && Q.c21.im - U.c21.im > EPS && Q.c22.im - U.c22.im > EPS )
+  if( creal(Q.c00 - U.c00) > EPS && creal(Q.c01 - U.c01) > EPS && creal(Q.c02 - U.c02) > EPS &&
+      creal(Q.c10 - U.c10) > EPS && creal(Q.c11 - U.c11) > EPS && creal(Q.c12 - U.c12) > EPS &&   
+      creal(Q.c20 - U.c20) > EPS && creal(Q.c21 - U.c21) > EPS && creal(Q.c22 - U.c22) > EPS &&
+      cimag(Q.c00 - U.c00) > EPS && cimag(Q.c01 - U.c01) > EPS && cimag(Q.c02 - U.c02) > EPS &&
+      cimag(Q.c10 - U.c10) > EPS && cimag(Q.c11 - U.c11) > EPS && cimag(Q.c12 - U.c12) > EPS &&   
+      cimag(Q.c20 - U.c20) > EPS && cimag(Q.c21 - U.c21) > EPS && cimag(Q.c22 - U.c22) > EPS )
     test = 1;
 
   assertFalseM(test,"The exponentation of Q with a positive determinant failed.\n");

From e7dd5d6923fb03d5244758be3d474b474ca8f220 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 30 May 2012 17:01:34 +0200
Subject: [PATCH 016/110] cu test for six_invert

---
 Makefile.tests                 |  8 ++++-
 tests/test_clover.c            | 17 ++++++++++
 tests/test_clover_six_invert.c | 57 ++++++++++++++++++++++++++++++++++
 tests/test_clover_six_invert.h | 13 ++++++++
 4 files changed, 94 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_clover.c
 create mode 100644 tests/test_clover_six_invert.c
 create mode 100644 tests/test_clover_six_invert.h

diff --git a/Makefile.tests b/Makefile.tests
index f7339cd75..65fa08a62 100644
--- a/Makefile.tests
+++ b/Makefile.tests
@@ -1,4 +1,4 @@
-TESTS = tests/test_sample tests/test_su3 tests/test_buffers
+TESTS = tests/test_sample tests/test_su3 tests/test_buffers tests/test_clover
 
 TEMP = $(patsubst %.c,%,$(wildcard $(top_srcdir)/tests/*.c))
 TESTMODULES = $(patsubst $(top_srcdir)/%,%,$(TEMP))
@@ -35,5 +35,11 @@ TEST_BUFFERS_LIBS:=$(top_builddir)/cu/libcu.a $(top_builddir)/buffers/libbuffers
 tests/test_buffers: $(TEST_BUFFERS_OBJECTS) $(TEST_BUFFERS_LIBS)
 	${LINK} $(TEST_BUFFERS_OBJECTS) $(TESTFLAGS) $(TEST_BUFFERS_FLAGS)
 
+TEST_CLOVER_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_clover*.c)) clover_leaf.o
+TEST_CLOVER_FLAGS:=-lm -lhmc -llinalg
+TEST_CLOVER_LIBS:=$(top_builddir)/cu/libcu.a
+tests/test_clover: $(TEST_CLOVER_OBJECTS) $(TEST_CLOVER_LIBS)
+	${LINK} $(TEST_CLOVER_OBJECTS) $(TESTFLAGS) $(TEST_CLOVER_FLAGS)
+
 tests: ${TESTS}
 
diff --git a/tests/test_clover.c b/tests/test_clover.c
new file mode 100644
index 000000000..e93679362
--- /dev/null
+++ b/tests/test_clover.c
@@ -0,0 +1,17 @@
+#define MAIN_PROGRAM
+
+#include "../global.h"
+#include "test_clover_six_invert.h"
+
+TEST_SUITES {
+  TEST_SUITE_ADD(CLOVER),
+  TEST_SUITES_CLOSURE
+};
+
+int main(int argc,char *argv[]){
+  CU_SET_OUT_PREFIX("regressions/");
+  CU_RUN(argc,argv);
+  return 0;
+}
+
+#undef MAIN_PROGRAM
diff --git a/tests/test_clover_six_invert.c b/tests/test_clover_six_invert.c
new file mode 100644
index 000000000..17fc3daa9
--- /dev/null
+++ b/tests/test_clover_six_invert.c
@@ -0,0 +1,57 @@
+#include <stdio.h>
+#include <config.h>
+#include <complex.h>
+#include <cu/cu.h>
+
+#define EPS 1e-7
+
+int six_invert(_Complex double a[6][6]);
+
+TEST(clover_six_invert) {
+  _Complex double a[6][6], b[6][6];
+  int test = 0;
+
+  // random matrix a  
+  a[0][0] = -0.0226172-1.0842742*I; a[0][1] = -0.4641519+0.7071808*I; a[0][2] = -0.0786318+1.4290063*I;
+  a[1][0] =  0.2165182+2.6528579*I; a[1][1] =  1.4397192-0.5239191*I; a[1][2] = -0.7269084+0.8157988*I;
+  a[2][0] = -0.0628841-0.3470563*I; a[2][1] = -1.0386082-0.2135166*I; a[2][2] = -1.3647777+0.7312646*I;
+  a[3][0] = -0.1675412-0.7309873*I; a[3][1] =  0.1120023-1.3983000*I; a[3][2] = -0.1266411+0.4298037*I;
+  a[4][0] = -0.2725515+0.1809753*I; a[4][1] = -0.1379395-0.7037811*I; a[4][2] = -0.6896344+0.1783902*I;
+  a[5][0] = -1.0980302+0.2763006*I; a[5][1] = -1.8903566-0.3511587*I; a[5][2] =  1.1886761-1.7150829*I;
+  
+  a[0][3] =  0.5028327+1.1093231*I; a[0][4] =  0.3878236-1.3375976*I; a[0][5] =  0.1203910+2.0495843*I;
+  a[1][3] = -0.5099459-0.0617545*I; a[1][4] =  1.6599072-0.1078419*I; a[1][5] =  0.5164999+1.0314383*I;
+  a[2][3] = -0.6036081+0.3900738*I; a[2][4] = -0.0447905+0.7071715*I; a[2][5] =  0.6763751+0.4613504*I;
+  a[3][3] =  1.0440726+1.4681992*I; a[3][4] = -1.3339747+0.0932149*I; a[3][5] =  0.3268227-0.4352195*I;
+  a[4][3] = -0.3226257-0.8897978*I; a[4][4] = -0.2680521+0.1304365*I; a[4][5] = -1.0114200-0.2461815*I;
+  a[5][3] = -0.1194779-0.4089390*I; a[5][4] = -0.1003558+1.6537274*I; a[5][5] = -0.6532741+0.5098912*I;
+  
+  // b = inverse of a
+  b[0][0] = -0.24037097+0.14414191*I; b[0][1] = -0.11380668-0.08118723*I; b[0][2] = -0.1589440+0.4350548*I;
+  b[1][0] = -0.10475996+0.12442873*I; b[1][1] =  0.10510192+0.23615703*I; b[1][2] = -0.0141379+0.2762152*I;
+  b[2][0] = -0.01620610+0.00456679*I; b[2][1] =  0.02483109-0.02776261*I; b[2][2] = -0.1478979-0.0784658*I;
+  b[3][0] =  0.09209149+0.00787285*I; b[3][1] =  0.01995269+0.00092068*I; b[3][2] = -0.2347910+0.1687461*I;
+  b[4][0] =  0.21497592+0.31304060*I; b[4][1] =  0.24420948-0.01908121*I; b[4][2] =  0.3385191-0.2141792*I;
+  b[5][0] = -0.01061067-0.16808488*I; b[5][1] =  0.09468236-0.08485920*I; b[5][2] =  0.4353193+0.0010994*I;
+  
+  b[0][3] = -0.0239881-0.4151801*I; b[0][4] = -0.6263347-0.5963434*I; b[0][5] = -0.45655201-0.02202738*I;
+  b[1][3] = -0.1350729-0.0418095*I; b[1][4] = -0.6033738+0.0647601*I; b[1][5] = -0.28037632+0.30025691*I;
+  b[2][3] = -0.1431319+0.0244497*I; b[2][4] = -0.2807683-0.0808173*I; b[2][5] =  0.12654249+0.21884983*I;
+  b[3][3] =  0.2140318-0.4344302*I; b[3][4] = -0.1638382+0.0162849*I; b[3][5] = -0.17682708-0.12990665*I;
+  b[4][3] = -0.4013470+0.0988086*I; b[4][4] = -0.3337646+0.9573819*I; b[4][5] =  0.28730090+0.30454484*I;
+  b[5][3] = -0.1739908+0.0800473*I; b[5][4] = -0.2584657+0.3703075*I; b[5][5] =  0.09579707+0.08151071*I;
+  
+  six_invert(a);
+  test = 0;
+
+  for(int i = 0; i < 6; i++) {
+    for(int j = 0; j < 6; j++) {
+      if(creal(a[i][j] - b[i][j]) > EPS || cimag(a[i][j] - b[i][j]) > EPS) {
+	printf("%d %d %e %e %e %e\n", i, j, creal(a[i][j]), cimag(a[i][j]), creal(b[i][j]), cimag(b[i][j]));
+	test = 1;
+      }
+    }
+  }
+
+  assertFalseM(test,"The six_invert function does not work correctly!\n");
+}
diff --git a/tests/test_clover_six_invert.h b/tests/test_clover_six_invert.h
new file mode 100644
index 000000000..7c1b8d379
--- /dev/null
+++ b/tests/test_clover_six_invert.h
@@ -0,0 +1,13 @@
+#ifndef _TEST_CLOVER_SIX_INVERT_H
+#define _TEST_CLOVER_SIX_INVERT_H
+
+#include <cu/cu.h>
+
+TEST(clover_six_invert);
+
+TEST_SUITE(CLOVER){
+  TEST_ADD(clover_six_invert),
+  TEST_SUITE_CLOSURE
+};
+
+#endif /* _TEST_CLOVER_SIX_INVERT_H */

From 01425bfee9006fdb3e9e58aab97d78fadf9b2c5b Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 30 May 2012 17:29:06 +0200
Subject: [PATCH 017/110] test for six_det added

---
 tests/test_clover_six_invert.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/test_clover_six_invert.c b/tests/test_clover_six_invert.c
index 17fc3daa9..fa95c8ef6 100644
--- a/tests/test_clover_six_invert.c
+++ b/tests/test_clover_six_invert.c
@@ -6,6 +6,7 @@
 #define EPS 1e-7
 
 int six_invert(_Complex double a[6][6]);
+_Complex double six_det(_Complex double a[6][6]);
 
 TEST(clover_six_invert) {
   _Complex double a[6][6], b[6][6];
@@ -55,3 +56,36 @@ TEST(clover_six_invert) {
 
   assertFalseM(test,"The six_invert function does not work correctly!\n");
 }
+
+TEST(clover_six_det) {
+  _Complex double a[6][6];
+  int test = 0;
+  _Complex double d = 0.;
+
+  // random matrix a  
+  a[0][0] = -0.0226172-1.0842742*I; a[0][1] = -0.4641519+0.7071808*I; a[0][2] = -0.0786318+1.4290063*I;
+  a[1][0] =  0.2165182+2.6528579*I; a[1][1] =  1.4397192-0.5239191*I; a[1][2] = -0.7269084+0.8157988*I;
+  a[2][0] = -0.0628841-0.3470563*I; a[2][1] = -1.0386082-0.2135166*I; a[2][2] = -1.3647777+0.7312646*I;
+  a[3][0] = -0.1675412-0.7309873*I; a[3][1] =  0.1120023-1.3983000*I; a[3][2] = -0.1266411+0.4298037*I;
+  a[4][0] = -0.2725515+0.1809753*I; a[4][1] = -0.1379395-0.7037811*I; a[4][2] = -0.6896344+0.1783902*I;
+  a[5][0] = -1.0980302+0.2763006*I; a[5][1] = -1.8903566-0.3511587*I; a[5][2] =  1.1886761-1.7150829*I;
+  
+  a[0][3] =  0.5028327+1.1093231*I; a[0][4] =  0.3878236-1.3375976*I; a[0][5] =  0.1203910+2.0495843*I;
+  a[1][3] = -0.5099459-0.0617545*I; a[1][4] =  1.6599072-0.1078419*I; a[1][5] =  0.5164999+1.0314383*I;
+  a[2][3] = -0.6036081+0.3900738*I; a[2][4] = -0.0447905+0.7071715*I; a[2][5] =  0.6763751+0.4613504*I;
+  a[3][3] =  1.0440726+1.4681992*I; a[3][4] = -1.3339747+0.0932149*I; a[3][5] =  0.3268227-0.4352195*I;
+  a[4][3] = -0.3226257-0.8897978*I; a[4][4] = -0.2680521+0.1304365*I; a[4][5] = -1.0114200-0.2461815*I;
+  a[5][3] = -0.1194779-0.4089390*I; a[5][4] = -0.1003558+1.6537274*I; a[5][5] = -0.6532741+0.5098912*I;
+  
+  d = six_det(a);
+  test = 0;
+
+  if(creal(d) + 44.9277673 > EPS || cimag(d) - 84.4696631 > EPS) {
+    printf("%.10e.10 %.10e\n", creal(d), cimag(d));
+    test = 1;
+  }
+
+  assertFalseM(test,"The six_det function does not work correctly!\n");
+}
+
+

From d1e12428642fa7f238661f5c67877e225f76422d Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 30 May 2012 18:46:54 +0200
Subject: [PATCH 018/110] some first clover functions for nd case

---
 clover_leaf.c                   | 119 ++++++++++++++++++++++++++++++++
 clover_leaf.h                   |   2 +
 clover_trlog_monomial.c         |   2 +-
 doc/Phmc-report.pdf             | Bin 103665 -> 103666 bytes
 doc/Polynomial-constr-notes.pdf | Bin 70315 -> 70317 bytes
 doc/Status-Phmc.pdf             | Bin 189431 -> 189433 bytes
 doc/basis.tex                   |   4 ++
 doc/eo_pre.tex                  |  33 ++++++---
 tests/test_clover_six_invert.h  |   4 +-
 9 files changed, 153 insertions(+), 11 deletions(-)

diff --git a/clover_leaf.c b/clover_leaf.c
index 4e5a71e8d..06440ce78 100644
--- a/clover_leaf.c
+++ b/clover_leaf.c
@@ -439,6 +439,54 @@ double sw_trace(const int ieo, const double mu) {
 
 }
 
+double sw_trace_nd(const int ieo, const double mu, const double eps) {
+  int i,x,icx,ioff;
+  static su3 v;
+  static _Complex double a[6][6];
+  static double tra;
+  static double ks,kc,tr,ts,tt;
+  static _Complex double det[2];
+  
+  ks=0.0;
+  kc=0.0;
+
+  if(ieo==0) {
+    ioff=0;
+  } 
+  else {
+    ioff=(VOLUME+RAND)/2;
+  }
+  for(icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+    x = g_eo2lexic[icx];
+    for(i=0;i<2;i++) {
+      populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
+      populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
+      _su3_dagger(v, sw[x][1][i]); 
+      populate_6x6_matrix(a, &v, 3, 0);
+      populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
+      // we add the twisted mass term
+      if(i == 0) add_tm(a, mu);
+      else add_tm(a, -mu);
+      // and compute the tr log (or log det)
+      det[i] = six_det(a);
+    }
+    tra = log(conj(det[0])*det[0]*conj(det[1])*det[1] - eps*eps);
+
+    tr=tra+kc;
+    ts=tr+ks;
+    tt=ts-ks;
+    ks=ts;
+    kc=tr-tt;
+  }
+  kc=ks+kc;
+#ifdef MPI
+  MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  return(ks);
+#else
+  return(kc);
+#endif
+}
+
 
 void mult_6x6(_Complex double a[6][6], _Complex double b[6][6], _Complex double d[6][6]) {
 
@@ -453,6 +501,21 @@ void mult_6x6(_Complex double a[6][6], _Complex double b[6][6], _Complex double
   return;
 }
 
+void copy_6x6(_Complex double a[6][6], _Complex double b[6][6]) {
+  for(int i = 0; i < 6; i++) {
+    for(int j = 0; j < 6; j++) {
+      a[i][j] = b[i][j];
+    }
+  }
+  return;
+}
+
+// This function computes the inverse of
+// (1 + T_ee \pm I\mu\gamma_5)
+//
+// + is stored in sw_inv[0-(VOLUME/2-1)] 
+// - is stored in sw_inv[VOLUME/2-(VOLUME-1)]
+
 void sw_invert(const int ieo, const double mu) {
   int ioff, err=0;
   int i, x;
@@ -523,6 +586,62 @@ void sw_invert(const int ieo, const double mu) {
   return;
 }
 
+inline void add_shift(_Complex double a[6][6], const double mshift) {
+  for(int i = 0; i < 6; i++) {
+    a[i][i] += mshift;
+  }
+  return;
+}
+
+// This function computes
+//
+// 1/((1+T)^2 + barmu^2 - bareps^)^{-1}
+//
+// for all even x,
+// which is stored in sw_inv[0-(VOLUME/2-1)]
+//
+// it is the complement of sw_invert for the
+// non-degenerate case
+// multiplication with
+// (1+T - i\bar\mu\gamma_5\tau^3 + \bar\epsion\tau^1)
+// must be done elsewhere because of flavour structure
+
+void sw_invert_nd(const double mshift) {
+  int err=0;
+  int i, x;
+  static su3 v;
+  static _Complex double a[6][6], b[6][6];
+
+  for(int icx = 0; icx < (VOLUME/2); icx++) {
+    x = g_eo2lexic[icx];
+
+    for(i = 0; i < 2; i++) {
+      populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
+      populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
+      _su3_dagger(v, sw[x][1][i]); 
+      populate_6x6_matrix(a, &v, 3, 0);
+      populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
+
+      mult_6x6(b, a, a);
+      // we add the mass shift term
+      add_shift(b, mshift);
+      // so b = (1+T)^2 + shift
+      err = six_invert(b); 
+      // here we need to catch the error! 
+      if(err > 0 && g_proc_id == 0) {
+	printf("# inversion failed in six_invert_nd code %d\n", err);
+	err = 0;
+      }
+
+      /*  copy "a" back to sw_inv */
+      get_3x3_block_matrix(&sw_inv[icx][0][i], b, 0, 0);
+      get_3x3_block_matrix(&sw_inv[icx][1][i], b, 0, 3);
+      get_3x3_block_matrix(&sw_inv[icx][2][i], b, 3, 3);
+      get_3x3_block_matrix(&sw_inv[icx][3][i], b, 3, 0);
+    }
+  }
+  return;
+}
 
 // this is (-tr(1+T_ee(+mu)) -tr(1+T_ee(-mu)))      
 // (or T_oo of course)
diff --git a/clover_leaf.h b/clover_leaf.h
index 6b8d056c0..b9157660d 100644
--- a/clover_leaf.h
+++ b/clover_leaf.h
@@ -28,7 +28,9 @@ extern su3 ** swm, ** swp;
 
 void sw_term(su3 ** const gf, const double kappa, const double c_sw);
 double sw_trace(const int ieo, const double mu);
+double sw_trace_nd(const int ieo, const double mu, const double eps);
 void sw_invert(const int ieo, const double mu);
+void sw_invert_nd(const double mshift);
 void sw_deriv(const int ieo, const double mu);
 void sw_spinor(const int ieo, spinor * const kk, spinor * const ll);
 void sw_all(hamiltonian_field_t * const hf, const double kappa, const double c_sw);
diff --git a/clover_trlog_monomial.c b/clover_trlog_monomial.c
index 8b36ac036..c91e5168d 100644
--- a/clover_trlog_monomial.c
+++ b/clover_trlog_monomial.c
@@ -53,7 +53,7 @@ void clover_trlog_heatbath(const int id, hamiltonian_field_t * const hf) {
   init_sw_fields();
   sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
   /*compute the contribution from the clover trlog term */
-  mnl->energy0 = -sw_trace(EO, mnl->mu);
+  mnl->energy0 = -sw_trace_nd(EO, mnl->mu,0);
   if(g_proc_id == 0 && g_debug_level > 3) {
     printf("called clover_trlog_heatbath for id %d E = %e\n", id, mnl->energy0);
   }
diff --git a/doc/Phmc-report.pdf b/doc/Phmc-report.pdf
index e2db92bf0af4711b10c3d90f062b308dac5cee58..57d041efaf07e9ad2e097d11a91c0a6845466d64 100644
GIT binary patch
delta 2227
zcmW+%eLR(S8-IRB9Vhu6?}t7IOB^F4uOshMBvYKjiNcl&<t1;OXmln<N@csBPj@e?
zEG<uH*}2DEX?b}nOJyEgQBlq|Z8b}%Y}N8ySO0v!pX>el{qE~`-4myUlc$9>lZ1XV
zWlRX=gn}Ohgfe$$Z8xWd!e=7w%2cDui<MRKDyJKd_8exLD3>uinCQ`}a*qc_zj$>~
zu0{AYMs5%tKzcQ26ccFm&jNl%SAj}P8BlRs4Fkb}`PS$%V3BbHpq<G&FjSa?qM^rh
z0|>5WG8Ez@Uje?7+y%UC9tpU>;(ai0v@iiPS2RzCRgO#LfRoZ<3H93XwCIAmTbUc$
z=bt~buO#wy*D1=CTkgSZhSuE>G-%BeO8<Guo~^Xd6+}@=nJ?x;o7@LBX6Ti&pb=1P
zYXvH2+cGTBEOV67t?wTV^Zl(?Q6Fn?W8~m=MM!#&_Ln^${kN8K&twv8XYRWcz2`Z$
z1Kx3b7mzI644AoaAt1X@1GvJeNKES+wD#jVi#6tYLp7B7pHm-1pK#uSNsJdMkv6&h
zgv|uHF@XJU&p_4fz8h6?Pc5LQ*L_f#co#!OuEu8*T3-4p45>Qly;1jT9R>AmBGGqY
zluKOVCD5ar?(H}8AuU%abHqOwT!;MA^{MKwL$_3Nqkom&piFeYZb)=5pb!(q24TPI
zAa_t{f^Z_3E5SB^zXgW?I)-F|;XufB7=R`l+uv88OO?SuggOg!s6Ay4FS`Uz31L^j
z>7DRojNY+)956c~$$%DHb-K8|)^<&3)iWnx#3Ee5O>ad8x~U>R0bx3_aW)8bG0q_L
z#f+n+Ls6nfL-f`t6C>OzDH9&+BBDBhmCbV(Q>Hk!3LJ%TJJGR983j0?YyjN3x)Tgb
z<FDaY5wAT3*s``CRJ#)6(D!~4&W&5LHK?MKjfoB2l$j<!ZW^F6i55%>MqIX6r|nJi
z;V-Bx$DTfvJYGV%vJZRl!sRLRApw`aUWroA#<!zil!o)e?N2|5<goeQSmV14>?A#N
zE7r?A2gqh&)yG*di;O0FIpC}8)exZA;)(%fIZl9QbCLjQF5CyRFE<s;&vU;;b3;D9
zi0Fb;J*qy%nJ8?dk}20w;D@%?Tcv;&ssg~hs_!94xef1?>nf~6O5JX#1GKH=NA!+V
zSBUA(kNaYkC4aBGc=_WR$~{m^@H{U{ccHtwOdqMb`~##rpT6A;V-*<8b?oXD1MO$6
zKw{1u^aJ71!7db<4j-FK&ksrJJ}jCe9fvo31lngV9<c)#?U8U|M3>LfPq8c9(k+$5
ztZ^9aqnz*2w^!DGOpN8VwHtt#u(}Gsn{{b`UMD{T>^^A==08rV(JQ0A77oMcH#(rb
z(H$_o5%<iw#xQJSrU_^1lQZ)%*OzDW08N_{K$YDbj`r4XN&$T|FcsVbjREEwZ^8Y*
zw_Zb`^uhrHTK}ca-I1gm!-PLigceZFzO4sM6PH#9U~2@ldxKEWj&r`GLkcs?DX(Db
zTw8Y@R$bJi0o3-u95dqU>%owB-4pHgH)nJ~SAT0vOlQhcr0hJ)Hg8GwB53rs3mAU5
z-2_-XkPG-^APX>Y@Tws_);baxWRqEL-;n#zeG%o{@ASe+IC8HX4^%O<572B_E~f2g
zR_eAgGajP8WoEE(htH#%>w|1~pV5@5)k;9*wYZ157eB8@TJx(8Qu63|?9bx&9VoGn
z?BTfFrLicG82u+@Ak;ib#iLC>Ed?x^_!}^P5>^NEW)g41YibQTO=&Pb^tmYp#=jT?
zVcGOzOs0DIJyOt{lv#xGCj?|Ki0on^ISq7INMMjyi7r_J6r)S}!N*QS!UZBKnwl6;
zy`X%)9!Y=>_?!CVDP-p_i3vVt-qetU3$5tj=_}T)bZYln`40~-IAt_7zf-QsbMT8V
z>03wnLJ3*+HpZMtpgem^N?`G`pIVa7fh?>DObNfoig=(?yEPdDW}D`dlkmBDxeaN>
zUgyY2D^Q1wz<-1-v?nXUx!a!L-;1B?K>onY)6N983NLXXvxOj9dXOi^G}!S^?Z`O~
zZ+}_d)kL?nYS4R<FTi}>lN18Edy!5(TK|>N<skb7|3tE#F2s~K@+Nk0Ue?=(1OOHI
z5Eyv&s1M<QVth$4l<D;)`0ro?{m4?FGC#5x$jG0-hvpspp;6$G0J0G|EQlBo9qLZ`
z_+a9zGly=g%BlVdu9iI<N}kW7QBgE$ILqYmXnWi1h?n$o*7M=4;iS(g|DUDA70))f
zoCHE1^9WLjAL4i<QQ*Bytt2X-&8tWz(7-Cf2<f!Rz#!#wqDd!Kc9)a2M4!G%I7Ej^
zYHxV;Q}&R8z|P`Z6eJ4|U>iq{g4h{HEHU7|lHglrW$^@FE~}0Qk@8pLiIe~T0cR}j
Ao&W#<

delta 2266
zcmW+%dpwod8lJV)#_LWtj>{pJa?33u_falMWp8(>Nks-pAxx4uL~7J`ezQ#`qbMVp
zjBm0<lbE8^$ka@!IN8n7X&Oazo2JHj-~M@?-}BzqyVm!v^-VVkpEn7srWt%Uqs<ub
zLcuKo1Mc3Od_Bk&zEF~{O42GjUsf(Ix4r)R{v*=Uz^BgjV*uqPE)TSRbL|EGtngdP
z+@$mc(O=5rR6wqJ2Jw*EYBb5!I5hFunm!o7eXn&1F;DvxqJ_=|bd>4@kYixpCKL|3
zmLwuYClLP<-9x;s7m7Gv|6_D-*4IHdpJ!l6t9)%JMw~X>B?8yoQ+b_*{ZW|#LtZ;(
z4;F;J>pcm4iP3(lrfJfTNy8>GrUvidO3#$(D+<B@fcwf6+vHc8=~7hU{ANUTb7M5w
znHSRnt(Mk?kp1z|K#%_oN*W_Hu8&pcOZ>MC$bZ}a$&YowzpxZhJ1Ps*DB8ff8}Y97
z0z_u>DPo$94WiVh4RM8So;oz1l3PtGIJW7*NEL8D+YVv&ak~sEqP-}JD8=CyYR1Qj
zL%iwq0!{tS6=V`GmLo29{STUS+;-uLe3g3&dER<RG@vkH$++{?i&>3bN=iQj@(D{_
z1pq1cKi73<XZvugUijI#QLinkkn`rtUvtD-9~E5(F5J5U>x_8kQk_U&YIl#XGn#l`
z8VIh>&kXUgpFg6Fe;PWZ{@3vPpEfgp9Nm#*34B_Boj?JsfU8-40fXWKFJq8t&^pR4
z44y>H2uaX@UB*534)5f>&*g=)$AP;a;(%c)D^e+JXXqXjZ$g{rqG$-WL-BL?Bzbxy
z1pxjK7qvFTsSLQ_2zw<^@F{CmaRja)q8tMiBll8ZMN}B#aMUTp;^-c9ERVTLA4Mbn
zBw~B~O*ECSjilHI31U;|4{6NPFx$6jqICQt=b<@kUsd1j5~=*d-JoN_);<yV@O9eE
z3{ujP#b3|!&e2&Tm==uL=T{~lNOqTX<`_*(JrX@xiW9bekUoSY(G)B5SsSBBUEBQO
zO5`WgT=7*~s)?*W{WmRsA(dK8Nz0~2(mD{U(`kzTNXHrF_^rW+@3ux`LPUlGC6sKl
zMQq)cfT*5{2gQA!nS}0FnctJUc?aE;@T??&!jrs?#5`;r@I6_c<a?iOh-i?Lg?KRM
z3MNJGqyy)F%&jL%&ev2x$}PA>(JKo})nV^vLlIF0e{MK`>9Z=}#|lNXpVvkED11+`
zD$#+Gl|=Xc_2I>MT1v@$&%Qx*q$;T~D(>5APZUq8dr7n$sa1h<Bcl5CdCG>9z(v(y
z``iyT78vrdCWvW4@Ep}di`<NULs59#!to*Cmmd9aWVdS7Y59kBn~=Dm`clNf`ea16
zhC_%~8qCo>(NIWHX^nNb8BVQvA^DqehPjkxdVkI}2VxtCTWH3LT1=_b@iW^Ibz5W6
zl+hYQUCO>ILiA|EY2nA(G^o_nS$Z})+OLw>+xdkCG#*p9H<qwXGxq86fGpq_bPbSe
z`oaplYMf8E8;b01nt#uWhB(W7^kwRtKi|KdR&^L?Lwq=Z^UY~o+lY=G*A|oio1bPC
z(e}-+6Y4NqoM<RDFzRv>RW8CtZ`-3|==N#Ef;*XrlXub)6NdXVp|*X@$JZ>a#OhS$
zBj-iHyWAbbo$!MDCA6Wkk%Ne$QL#F7x2#fZ=Vo1$hL-CBpEG)n!W<uL#e2I3xMOk=
z$__a_&;09$8;Root3b4V{2cXX@OU?=>ffz!yL|UV7%EQfSuu)3&yr|sZ>EY6^Pm5T
zm^F=03O74Vr{OjeM?o`fl)vodJW7mxHGv}VwIdbV`SuEt-`|OI7|7}c>;OuslaMtb
z-4`;PBdMMuTZ$xBWH&L!Qi%l#lt5|b`JJ3sAj<+4iyg=YRoN6)mvyT%y5urlO%^0H
zhT*2mChag&5ih>+sM9v}bn8D$+O{wBj42q}0J1z0TmB(OkBM-3>1{)X{{v~M5j%`z
zV9anzWR=Fug@P`cunA;oiz#cs&n=Ufv3BYeELl5Jk0p~K<yo;+7<|Qw(N9gLx{&=r
zmEYJg6O?-PY_1Tcu?u^q4Sv>7<zpQ#ZeEt#`_?)oSE79}`wHEii&-v`i!18^Xgr~H
z$=7QB`%vjIEjCZ4?Zzx{ztSb{%o{1&o#DJoYuy=-B=%staG4t(jD9InUr)9Ssl<~V
z(1#^s=Gmit8+Tv!aa6Wyyb;%2p&Wmr7G&05*f=K1z1e2sAYZ1z6u=o|v3|@$K^b<I
zZ>#(Tk6Kz2z+M_aSQsRXrt3Tz@9ugR@)j<ozZ^{;O*jm)re(~5_Ifv%`CuLW5LQMX
zrY@98=$u}xVmU~ut63V-@M^{h;kC}4VUT66VLi06i<reTRrovhOBgAryY6}uq(>wS
spD<awgr(DlEF#%al)aJ6h!P$|F}m7Ps~CniS-K|%CCK_?n620U0i78B+yDRo

diff --git a/doc/Polynomial-constr-notes.pdf b/doc/Polynomial-constr-notes.pdf
index bb6b693d19a7b7663bc9a257c0506b4a7a17f8c4..952faa951a04483a1306bd463ada977cbb642f21 100644
GIT binary patch
delta 1168
zcmW-ge^AV69LHz#^<CCJ*NjZWZg%Mh*>)7hGD&xG4I32-yPIu_>xXmEt#tI|#$L6~
zkJ2|0@?)k_AwQlmA+2f(;Sd@p*H!1bZpM$H8##Al-1~F?yq<Z#Kfj*m`F@|%mBQId
z;e541!Q>i6l7exSM<<EiVPAn!0?6xxWn?MDy<nke0I!MVphf8b?<>oYls>}81B+9&
ziE`hK(t+OUQ?T6X@v!()wt^O;XNZ7Mk{6YBdQAmCdF3LIJBCx`bnk%StTM{XxYheP
zWzL^a0zRG)3HoS$1_w2M;8?8_JfbzDc0ha8V-$-!Tu|^V+r8!rlh5d)5&WQA2io<M
zk>b<~)ZR5|B1HA1DZ?N-{97PO0y1f1!j!3W0AtWX%5zL>0%sZi293c#fIQfZaF%f&
zWhp{(sAg-Z27UO!SubG9PSZ^Y^PE`feKafr><)VZ>(aa&iW?)G5Emo5L=}5KRQSg8
zQT7nyrBQDw-(~&-%#3bVGFw9b4UzHrOK6mQFD4Gu#g=-qk2{4g_v`(euX<g1x0lIn
zvGwRD#dT8s^7tRYU*iX0-AG8KGZ<G5g8J1OSmCSvX~SzRN{W1KT?#f>U%{%enJDsG
zVje}(>{f_Y`{#5@F?lhC;!^5q!pPKp;GxuyU};*DFT33qY}SuX9`9odD!Jflwjb_G
zd#moge>r)j)XumgJ)DYpMm~!9+;1L|VjE*<O5&z_pdl*;%*_f!pdu@R0{!1gV2Xpo
z8oOCT*39e$U`}?|2zG9;&s0*&xMj-@YSxw0N*8o|+Xm{K>r2@NX_tmQs{br~Vn%9r
zk!pQMpy8KA?tUrdg|;Cya239}<U*Qddkbo`J4EW^_k9&@@wPLJlrrykYV<N+3!y5w
zM54)+3^94vYZ9Y()6@8F_c3tXo;e6C+w&7O&=rXu0$WkW<e=hW1RIO<;YP}#beCHW
zET*O9;8y(9eBMzj%_=L`Db%d&UncUWNq^4vWW4RzWty@5#9t$s(z^7C?*<covy<D<
zUmmJ>55BJa0+k_Ep_I4wR5KV*ja%at)eU4N)y{@ERC^qPo6baHsw~yv-SC8SHuCs+
zjS|a)`su?FQZyEV`y1oIfkwO<KEwTgTG90$+UdqtHO-c8RzdjPnoAw*?f=ju%bice
zyLqsB?>guT;vZztDVrWjw1jtN(ulgQa){_>HuRHgdhogB4kG{1<Dzyu`w}U;_xT;;
Ee++Pi7XSbN

delta 1164
zcmW-geNfYN9LI0t$8Y#$%3bU5&<!^g@L}w97bkFpvs6w;Jfw)SAqWu<W#n44114S*
zn2*?ggo2`V=Md49`sSe`QxG-6G$hW1h`>niJS*vx((~;7@t@as@Av2F`>@|vUg3mS
zXl<6sm~>p0En}R?2T1(p_b!+qfc%v3Hd!)p4_G7W!3$y|n4z$PHx>0rDhvpbV{x%9
zl>0bP3kEBX!SX3*z~XnNuBF8gv{*nWTScV<s`=mxRXGCXuX3tf6ddl)ya{H;Ex~sw
zbLq@l@TZx}!4UP&;DmZEI9=lc4{FS)y{9=Z4`itg)zw2KzK!RYbW)p$;D6dYFiSTF
zDK4Er?cK9yK{U^v=LgXj)&)@;UQ8Pk=FO)A7#6IcJo~~+V3ht}&=9#D<dHsvZHB#+
zC0p#Eno^@0efa*U2QVd9^fd@`ObYcr6lVg5<8H$`^SXoL29pco8`Gf3*yNL%zk~Wq
zo-kgQ@QCu==0CvV#2y8+t{cB9GQRXJ8YSIGP6f3obwTX;r^3q5bYY$6Rp%b>VbYb9
zHuQ6*4p9A?w4LCuY4>1VU6)U1Fsz>dbsN;M;x~lRhTq9hP~?3}64-8e2<x~tnj&MF
zl@uw+vOsiaEu&M4Ijbp@n%hPbrsVGh59B`w>k2M~vhS}%nsu+_%m}eAsQudAnbj~_
z@JKm)^K4GKvxjkeVLTP{qAC>g*bi=zn)+cfP08GH1Jv7+!E##!0!M8o3XFf`1as{i
z*7T3nWW84MCg>;`3}B}xMx$$67|-~mf|?CFy6J+Blx?QY<)M_VckWQL{<dj_vx@SE
z_p$f-BJ{tk@{K!lA86W*5$o~hPB+prwsxULQz25Hx!aojc&{steg{<kPHi4mX&?ft
z&yeVJ=TO#;KS`)|(T7pG>o6FyI|hL@yMLe)>h_6pfvv4)(n601L7%4z?h46BH|h9%
zH7%F-m*OwY;}2PARzss!reyWcn8@#B-(M1hhdX?hW>kFf&lIMxB;5|(9L;#ymt}PQ
zlSe1Pzr1gt(%59Ayu4$bU_>+SjC-5g$=cMi1fss>2n3Hgxg1lat*7wS@buGG^1f|V
zE3ovnE%HZ5-cbX3I?}-V9f>^3_cF<IafJ41>sHd_ifc^};orql{idF$G{@Tef_S|W
z*4TABT|oMeMRdllTTWU`1I09;b+8d4X~>Fx(uol~wbW1K&qmzTZu@8^WslwMHT)0v
C+=EL1

diff --git a/doc/Status-Phmc.pdf b/doc/Status-Phmc.pdf
index 8fafd8b630d59c0e2ea0cc461bca4ed0905fc3b4..abb00b29f25e1e22163c124529454050b3536d12 100644
GIT binary patch
delta 4665
zcmX9?d0dX$6Tfp(DIKJnNQn?q5+d!3l4z4sS-MEVtxH8+qzx_Hwii!iDWSTd^u~iq
z%2L<9ODUCIC_l*!<u~)*f9Cu7&Y77r=gc{Cp7;G))33XxU)pWpr|!&D04`Nm6c}*H
z2_-F**OW9-zEtYKfk(>HJ_yMJN&$7&swa_rRMnS5GE{v$N_+JwsESaJqw#Npn~^kV
zY$W16v`Lc}jf!_nc9I{sI!U9k<zry<^egc*>>aY4xWrXc2W>+%-@p)_R9~W8YVeC=
zdV*W=+8dXyc??g9Nr}Z#Ex<_9wc79GH2BDY9g6=v7FN7gZ_4vZTAn`3Fz~4nm#iE4
z6(?Jzy95nub(?7!1!Gnq8K9?6X_$?@hD3f`7LlCs)kp&LmB=i_;4MlYLtm67hB-Je
zf8r=wS){Qmk{V+pP~t44B;aG?crN`pc>?7!Vd@|PD-$J(=QQm%Bn8tdiL5eRgXE!U
z4rub%N9R8d9u?eBow`I=6RrNpHKJMb(plG5E=e<+jJB7|=A#^9ehB4R^RXzK%{QQ2
zVDSga8VgD@*;2NT^49LC^0qDjUSjiuChD=Bjl;9-Mo=a}_98AuI>K=a73z?)6<IXR
z(MIxPj*OVQ(01gw_=7U9i)lz0<lCsvrOIwWWEtkZ76)IrPbZ=^uM?%J=R}k?o-}Q`
zr!~sEo>+HD%>3yn&(5d)8sue!($340^j@(Tf_b}A9v(h)lB93_USdG$?M168uGx}2
zl&woLRk>wVo}uJ7KMRkR`qg1Wy}4wEzd0J#_|wiy+m^MWx8&A}1t|3bdr^*A>5l{3
zR`vxo{=M8h*D5I@VvD9@NK(0nRzEJy4yvVzR;`X8XjyZYKpJdK@Hxbmz+~-g1WEL|
zeJC|UFQDp4Xc*1ovfdeG=6ZXSPuHJ8DcP_X<);l_<#-sEv_+U88yR^NU1}n`$)#%3
zeU!U58=}0l86!%UL=B@^Hg54DxF3B46!{3NpQ1UJY>CCoAt{NyJCYasl^wA-(309B
zUkwT+=BuiMToSiE9=-Z^rrT1YAI7Ce;-YEeMsHg}5TCFLtEHOep)%9cdNP-ECH{?(
za+2_rmn5&JL%QKF+WXSKZlZNuN*K-El{$l9QaY_(vNpp5Np;38+Sv3xoOFi-I`T=G
zn7vfY%psNUzHvz6_HkNW+Wtx;?g#MRacS_uYJy4G_B3C3?kbc`x#6htJW@_01CJF@
zg!6e)lp*;jrMd;tG;;XwUui@vv_RQeco|jaj*pX5=KZ6cmh3&WzY<GbWI&cfPF*Hw
zEICFFTT26wye$o-Jy;|<(!{yv12nmx__)_=^`gWK+wV47zb&<EE({SrRR-&0b;^R!
zHo44~TwYWxqA<3Vxkz4CX49IsSH~k!zI2`Dd2~6L3}#;a9m%PyM&#5|t3`Ib*S4UH
zx}Jzq;YN)*H|V(&WAx`4C|qTmQCZ&alg0)IJj6G?;dtXsycSaJTWJ*R<ZWD~wC#2>
zO|apv9O*XRvqf2cuN39F``8sF^BzRu@SO+LNTu5zDbUh(KADQ7>d6{%F>Dxv(z780
z<*SC%DEB}0MA`9lA<DUpttgF}>QMH5R)%hupWULgC9NM&#y-b8D=mI5FwR~0LbDc&
zg{fB!b+M(r(xwSRU+2>t!`@<xk|exsLsi&2SqqD5i$qmdTOq0rzqh22%^x=-S=_Ec
zp6feg*|mJaqbIfbUkZh}{Tc79q+e$X$`75?z0|+Zy$SoGkMiJ`rzkgd(RncX>PZpf
zyUkEu=pK*qO}7sY*nV@T=o#Oa5q$X(MRCJ=Gm)tLOh>Z&AoKzk4gdsc@RNV->$S=^
z%XdE&x-%s9QpC{4M~%n1fOW%I!g>bLI=q*8IOobwJPgh|9-YZ@2rfSUt}HSsVC|uZ
ztf_Cfs3r&97*^o&kbtmT9u6azD!?}2;sphuQi^VUVH_DnDnSwgQ-+D0D=ONk!JS_Q
zuXJ9)MN4I%1}8pLfz^nD%>bB(R5Ac2lDb+I)>8meH4MbXrE1_zR>#$0JJKnGL4iii
zG++Wkpaz(t_K*f7ldrxe7?AM-Ex_FpGqgaL=;`5romf!NfjJ2Cbf6dq<whVw>NXPo
zK^UqFc+!Mhx-hIiZ*_vS@cWTB-Dfu&aPg}yP-77y#(*t4o*VO@qx@KiKyB1mFh*_B
zSjYl(zG8D}`yl%nbDLd<zRR4MvM}+y8W;S>19nOA@A06DGm7Q<xHX8525<<^u=vUd
zJcyeb!%5`A>!~oW5C2@k1xphM#(_c;P@x3QPKS{Q_0!=6YAa2_4c&wRX0rWTW(NL5
ztIcWcLXSC2L!+q{Fq5=@T0$VwW=oh))WjN=A_&%S3$=kZ;7IXL*nt5;y&Wt=u(XF7
z9F(64M(84%IDj9yra6Ku(r1o9{YKdC1bD^7dMB8PAw-=yu!Gcvb7k`la)BB6>WnhH
zD9#@6L9QfRw_{aD{}?MSEOCV@v>V|Dc#{Mbcfhw>jCThge1pZO^C4Lq)hk0G1)(bx
z(ovfj2F`No{NeY)VJ#L9OjkGtB&daKGcLN4%f;6lAQlGmRL5y`d)$Nj9NfD2)D+Ea
zV@H^m_T}Q1NT5eebl(U&DY?c?P>yupW>5iDo>=~<w=-zon!#!b#<3mOxF|-!Rx-1Q
zrfG#+(YS?NP>TV4QiU}!FbaKBWBv=dG!8cs`K5R$pe+pE4%G;g5@a=rPk<ey9gqy(
z2!Y91F)p4-hUvIU;bRK?i|{&CR;l4>pev`sD>6G{s-hDoTUm2)t^_twBDK2!`=cPg
z8(h%cb2ls|?WsL*5W!u5|Do1C6Wpi(l6^3MMsu@359zfmtR+&z1E7o4{QxK;6@3n3
zT~M8I2=*d1$cA(}c^7h_1Yz9~P)BXT5m-mY-A6%=R?(9utH#cJ*#WD`r^N^w1$ghd
zm|p;#BAhu6*ha;d$FZ#-wK@qlNTW~wckCV%!Ca&xi(v}|&OQaN$tqC_vk>k`0lTK)
zQVPj9SXT-X1!eACA5ro#Qp25#>&rk&*5fOna5xXF@H%#9O@N={5FP*h&Q{_L?{{^r
z5<8Px8|6?g{I3GA!HXv^fF-%SuY^hJ-10}u*BkBDJ^9arv#lz+%nnXA=c4gN*o`OD
z@7?U6PkSx&W_kXxyrMxpq>hVEs<7bbY<vmYDe22F8L2q`3e2F5PN~8BrpQM})b%!S
zv9tES1-o3syNnI#8s3f>e82Y0IU#{Xg;7DHR4dwooGY?V9r3)N?Ru%nk_X>1X*_D)
z8!xF+n3Zs1_quRXb1m(_%EFp?E4i@c23DSn-)`VBLUg$W4wQ=IHsFVpIO7idBs$|B
z92m~k4jVaK%{FF>i$2b)f4xF+%Cr4$=W7@5=s0e7jtial=#+?(`!GfxUCz}3KK6p{
z6WK9+`Gjt|=u{8%@hZeL0DhSY^$mcZO5(PsU`}PPYLXSL|1)?>xwJI{ecK3qTj3Pa
z^R3WCbngq<EopjzJI+O|m*7ao%U?kdZPxWS@QC=~cd`~Jl(x|=66)IEC2B9c2Pgau
zB`SV~FJ#rz2?6+<W^UZ(V?{60{!E>s5)!%FHuZY8V#+x?F8<jCd&q3yzYvco?COSM
zJ-$3{_S2+wpB~l~JIvLvUTNPHBfK|_cWDogy;;k(YHsS4Jwcz<-(W5IG<=7Agsnd)
zU7`91<RS$20DhZ^4LuNrXIb<Gc9d*?GB#9`tHth0Z?8DEQGV0nJwwJMlxu8^{_y;z
z=LjxLmSYrJr4N&(l`GHQ<IkR()AA$6o$SxWOhrZ?(L#M+_6p6al$aq_LTvBPVrgsE
zsj|;FEUr*v%1UUqR*O+h?rSkAX!Fli{xyBP?k_kUYV8|g@*yuH`l5rvC-p(spMAqS
zKActSa4pww{@x*a5$^5F(&Tu=MXd+>=MT^JAF$)%6U%U1q7HM!u-A2%ABNQ%!M;;6
zy1KHQf9T>Ys4gDO8VGmlvE_svV_7|!T_4XjA~@<ZJk>&`KJ)9(TQ2is<JI-$dy6+E
z@G}#bIT>0QuptOu1`J<1A=`kRKyWo=Z-M8TPGoXwTtiw^wO-+S%Y@_8k59Q0WrY20
zjxighh<1ONvKWL8Q&x>2oxuVWxuJ=npu)xBW=!__xSF%)l-OiTcAIvM*|39Dqa1r?
zL-V)JWYnv}9hfeH;DDvz!bC?l3BliyZCB;Wc@as2;*a`QRW5y{-T8tGUmO{}kb=Q1
zwgNwWyr(ZV87+NjmRzQ6wlqWe--)-s-#)G0b#L?KBZIh5Jd5EgC-iY*pK#JHC+3Nh
z3(j-dFoeLltf@bD(f)S7*y(!Co{W}u{K#-&rHXvD((gZya*?~TEK0t>jR};jtp}qo
zo?RZSnA8rQxNgM0UaW}ltT*!_v;ICT6CuxsjX}8S!!!}z_%J%EJAK)11YJM26+QC&
zm<7F?FP5-=<k{gbEB2h<WK>+RRCd&aC(D^0=L18Bx)eH$>^axeh!1Z7^Ht#;1q(-b
z?fJEPu>m&!0EW+<cqV`i@1w|n`Q^Z@$8EyhQ1+|X9>mIU?}frO%or^i*RWSaFNd(l
zNcaE2`r_++dgFpCRe64m`HgMKs+#7}ZNHb}CsQb+-<V=tICEnPJkjrObuK>MfXhSw
zwUJn2d<-I4HC1-`CMIhFzEMmhAJu3UNX;Q~8=f%4^zF<J^mrdLr%h%5229yE@6nZq
z{0rBT{}%Ul4A1-L^TyhK%X@Z)M_=8=h3*8FgHvQBGJKyzha@(RG7i}xJJoM@uux=+
zb~5@J82T5>rgYAyFziV}b}H3YEKkLqM5>X_PW0iWZCtEPXCEmKQNr9QSl>M?6(MsE
iMG?FNhOe>Ul*#H4M($<!?}iY!m*I~>(QY3*J^TM%S}KYF

delta 4683
zcmX9?d0dX$6Tfp(aYWKhq%7G}5~5YnDq2WMC`%>Wx|Gx}Qb}6S_TGB3B?@&z%Nq|W
zsgO&SYuBa_mn?N<EApFp??3bXeCN!}nRDixInVoktL^`#wtvcP;Kg^W6@W|C6a@xc
za;l#u%Ip0!P`35!z=4Nc+6N&~xfD=mp>i6@dlelyB*WAup|nz)j;c_#9W>rOqy<TX
zdN>jHVa*!6a7>JCqP_f}^$F@tt?&IJXI+Ugwz4^3&m~S8BhfZc;~yBx6Y5KNsqQbf
zsd3Il8|!MWFC38+l@yJmnt+j{8@1lZsq@i;Iu!qUB>28kYd+zYuqM?`&#zIROE!)E
zf|IS&_CmvJ+AXw<{BgcWl*j8(8YUC2BaxqYfXI<a)ku7F`jJ_n?rW4DdP`B3=pDg<
z#Z$)6%EAnskklIJLqE;}OMKopP2$p@(<W0clV=Pjm~GgP;@Qt!i6nn!C6RST8<0FO
zIszK})v?8o0>%V1R40208zR*nI)%1q)SPpA#w95x)6llYWHHL2riW3UGo664#dHhG
zC1$^)tTm%F6U}A&C~x6{DtC)~;3bydX`&x<=Ham2+)<Q?zm<rKk&d$6PK7$`Fq<rz
z=W8MPK3_&m9cep`IC=x;kE0sm1}|;W;ZpAGPnN+h8*%W3%Pb<Ai@H#%xK2T7=}Ob4
zx>}(8+ZF3BiCR1h<+;VQUxVHBQO<QUC%s!VhG6bal*d93I!V&ko^2RVdVBeLifiI^
z0_8KWOcickbwW?F($j2VYyJA*VeVYA+uIZkYrSdbrR}Sqp||9g?-G>b{d!T3TZ?&1
zW7qZtRo+u>nln2gG<3U$ZD2zALe2hMn(cp$CR(>Xl%RFP-vrVC3xbb<a|jGK&O?wy
zZrYDhBj_Tko&*KcJdPm_C^JK>P&S5~MJd^`0%hlxFLFGXOWH#Xk&O;JhAy>X-^itE
z>phg|+w@S@Y{Q6BuZZC^OZavVf_ss<pvXtf{wbPr$@XZx9Fmggzen>T&$3*zL#@fJ
z^3|YFV!E!{-!VS+(b)CBG40k8onS7_-4RI}H#XLbASP}dR!b#ip^~+$#WXJY9RDXq
zI+B2=yd*J%4(XP?wD+ZZZ=&^%q+pu+bFwkP)Kpr%WMkSwB-LrRXk$||29WNsKu10y
z6SJ3!nMX*qbpJ#oJN6Ht)ukM)MB;J??;V#0WK|PP&9<WXHs@eQlIEPvsB+CMr;$O&
z^C`lG6H=6cc_^jY`H?ho^v_>uL@Y2v*;Q~ERp(Dml;eD0q`lX^!v`y|)P=faIkfmP
zK~u?ba@bkwgXDE-5beQo(UvC8x!|M0J;g`8ZtIuFr_JfH)Lgl0Zc9O+*jX8%gVibX
zN87}*rR4IWVmXDGQ<;OLtumX|w5vJ>39q?9^E|v9KnB)Ve?wAyRiB(%uW6E9@Ad5{
zBW}c_RH&;}<GMfYMCt$UEEKGplU7;Yzf*mS4IbjUZX9pAiPu7^bt{E}oxY8$l(yea
zqzShCEl0ZWyK_*M-z`PC=^l1P$)fuaIDF?mHBxEpLj_veo+mSqR6W^1E_w~)P`Wmx
zp?ukJ2Iav<SCk!%%TO+8dWKTJ`7z4APs`Bl^3z+Cw&dA6l+n-e&Pt1(3v2*)<jYK2
z&6Xu!)zik7_EL)`40@GEa}0ltElLvix*b))Z)7bjqCE^%pW6#ib@Z({g=~4h4atfR
z>f{;HA<M3{6OW$M^4}y1bNeISSxNt{R+R6$sC%h>qI(nkNe5-tr$&@pKht^8|Kds!
zW4@W7y!dSr%74Cj;J}=27mA+Nvzp-3_Xvs`+?$C+>1QgE^epHFE*t{zSLdhq?(dzQ
zXOibq9JD7ex+Zj3)5E5dT)?_vEFpwJ<VfDj7CPj}PdN$>yC0rCU=vVu@=aM-g3rdo
zp$BIC!$nm&_=aJH0rC)skS-5L5sVZd7Pxp(0jQLsb6=Q9Mq&LR0fBLtGJq>8TB^dG
zUxuu8@a3X82WoKQ10`6GC|D}PBBT;!m_q7m6$qgKMyeQyi>p+@fvird!7ilJhkyc&
znyABM1V42!LhWI7NF-k!4bUayC7OV{C8lYDHqkR906VduFcRh?EE)+#I4Cy?8B*ub
z@E5``ZNQTz+|q{O1Nk$1NZEWZ?54}S7F{lW(FSTPLg+Y{gO2CN{pTn@0YXt5F#!xv
zTQ~s@fExGRR{CMEmGOcWr(th0t&^6;Ur^<O_awkBDgHSLRB%SITnD!X(N-4@;~5rT
z>cc|fMh0*ix$tTREb7BQmT<w`5CU+Zzz~!ufpfE9G(!C>c!AnVBXCAHLD@vMf2&Qv
zn`pHutzGzG3Nz7Yh8b9s_J8K!hqT2U785nJfK>>B1>8cdpC#B*{8Mv57omPGtVS@m
zf?6Duw+4N55e;p?lU!45!3pV8TcCa;?6L>EVq(2LSYrrr<b2po>VgHb`35_JF}^xu
z^e&0>l;6pfY}W2r*D)|^HW$2{pbG6qIRoA#LCFR1?G|HPzysf4v2igZYN2{<5F{ad
z4uVwF#s`CgoEm@7Q!u>M?7oq&txue4V5~vml^ia<+5*uqgeTk1e4ODD&?jr>isI=S
zu@gp_miFc1_AsDFO>_x|J(OJ2Rwzfhe;X))3Xd;;*xTj5Xu}ZIID_bp>s%BgU?-WG
zMbfmwtw`KLE~rKUKB>ZnC>Vpj$x;7>T(tu?5_wGw<kJ=g?1E~9sd2KJ#KggF(kdr{
zJAz*#R*Z{h6JZvvQh1*PUlCp<%PKV@1+?XqctvJcR8?gBwAmJ1Tp)q1l*qL|0Q;jL
zpAL@b?wSrONL!o%SqLry{EJ$vOmL<GNcMv=jpiJH@kp;9z*-{JI|SNDT@HaFQqdy|
z>w@aE!>|vjZZ@RS$-9^XB?y~xK@GKWxv+_hza0ZPTE&kOvTE$flO3?yJX(yPo{#sQ
zi+TAlfFhhd3D`!(wv*Uakj_30mPjK{|99-}7s3Lhql;iW1<o#pS7a401v`X0Qoyb$
zIF>>p4n8ggi2~>D^`Ry2!_-~47*YmOvYu1{1tYj$h1>Bv8+<%%hmQ0<=rCKXbAR*r
znPjfvHA^1Eg?}pm8@zb>BAAoQ+e(<K#?8OCeyRIl@gwhfK=$m4&n8*ZOu1-q3DWU|
zdcK+G-?`6hyq)XsYbqMl0v~hnNfj0xoegT>10{VKrXdv<Ujbv<=%iY_Z;E_u=;Pi7
zE_Pk}Z^4e&@h)RSx{kNQm><-#o*(E}SP<bqMx~<N-=QMAIM?-}mQzi$IS;sH*mTUa
zH%3yWU>A2PebZ(mQ%x<u%7WTOYq_w!4lB>a?m9e1h>o|whEkE-2K<l`jqkuuqQ-aO
z&<L)2RNv-mwgFpF_<m9SD__OwPY*g@xVB<<$H}?pxzKf&PKhYF2jk??<@{s7$6nBW
zB0HvSPw1wL_Vqv?uR>G<;FqaT-vIcjB*r#^DV4pdSyr@xPoa@=X>S4gwh{V1gJPr?
zo<TFweJ^CUr1=HzI2SeBz?O{HyaazH*8KwyiRHYJH9+A^JKZ7SNjtQmw(>35<1Z+&
z???DVR=r)|gTH7N?AUg^@I}i1lBX*LhNaI*zLBk%bbc-u!#_g?nW=n*7(^lc8x)P_
zYj(_QOxV=<;Bk@70(FbER?ShuTca4q51XTJUgMgzH^-MfL7xrXu#tQkdms-X_B$mj
z)O?2=gtb2azs$tO9}s~jS@Z;U3~e*y*f0&Q8vRG=hl=Cj@>^GA3>_C&t{xuw?s=Q*
zC@xHwV-$HnA12ExSDwAa-#j;G=7ml?J&=q06d8R%3k`kQOEjzQ$MmoaV#h!hO@}B*
zg?+?f(NC4}erUEylTktLYceTl@sCyBwSC;~Ejbfpu{6~1-HEiwOEwCfYJ+b)?dC%6
z2v$9kYdUp%y9bUBb@{M5MUIDF(!76g@rZ11<=vN_m~X}vj%2nNwr(Wz#IO@au^vih
zjJ7Q2AKEwzs*A_62Ex7L*&4#m39O#X>L#&p1UnsuM_SmY!#oG_*310(BsCrR-lDB>
zT!hKYlq}74*-!*`U54+Ra734#LU7h&uYsR1p2Fl*xw^ElDny~Db@ItsC#PSD(5LQZ
zz{V(|-9{r8h0tllsu4<!nU5mZGt?85@HH`EvbV?Cls%`+rkk_dv~6;hEQ@M%)QVZs
z{Lif!b?Oisrj3wkgSFs-zAc-Iu+o<88pKb8CJc@_=3P~}>ZMlK3od-IW%xV_Q|y>8
ze)qV~T46X=+Gdhi#!XhG@vl>E_uM|C_WAC%%ejNOP-4gMi4zp;Stri<)t<TH+`@td
zY&e4d0@gf`J8E^`E3&_FBqOc$0~Z!MvQkC9zTa;@k8x4ii5;Na3!Irixmqn`^tqF<
zkQI^I&J~x9$lX{W;RSc*MrMONSSCWg2OEcQ+k<H!yzyXkQj?albcC^<Y$tl;donY6
zG+%nL{^a?ux2)KXD`iw%x=MD`goZV2`~W^EXqaPx&FCNJo13^W(1$Hm;vM<RM!9AD
z`fY_S7lM5lzIEa`A2y<oBLC%=Lw1kag;ZbmtJvw!%5dw2QyZ88T0GsrUJ|_;$Q~ih
z`knQ~r}<3yk}FjwJe%^G+7neYOe5QWE9YWa5ThTM;?B*?nJMsi&p*|$xo*Mbp?^>q
zRu~Hz#;U2XzFV2B2P}(VBKfFBGC%4K;jwtY5GA|VTo}*$nAmSE`|2}&|DuOi9`H|G
zQ@(r0>v8<VUmwG-^<VR2&*sRhe{i8Ejvc`%4#l&4bhS%h6Di}3yJe@keK!k2=Cy~>
zr@-*NESu6PPhxFayma}$!P~mmNYn!dDQQ3WYTUXet7n4zOO5rN*nci2<MyKIuvB)c
z4=-)UH#U{Mr{vB{m<!FoGgvah{tOB&ED;#K&w@iHdyFt<AH)AUgq`~s{!A3D_p>we
F{txJWE{^~J

diff --git a/doc/basis.tex b/doc/basis.tex
index 22e2d81b8..87df2e0a9 100644
--- a/doc/basis.tex
+++ b/doc/basis.tex
@@ -55,6 +55,10 @@ \subsection{QCD on a lattice}
   D_h(\bar\mu, \bar\epsilon)  = D_\mathrm{W}\ 1_f +
   i\bar\mu\gamma_5\tau^3 - \bar\epsilon \tau^1 \, .
 \end{equation}
+It has the property
+\[
+D_h^\dagger = \tau^1\gamma_5 D_h \gamma_5 \tau^1\,.
+\]
 Note that this notation is not unique. Equivalently -- as used in
 Ref.~\cite{Chiarappa:2006ae} -- one may write
 \begin{equation}
diff --git a/doc/eo_pre.tex b/doc/eo_pre.tex
index 325e5e7e6..cf53ca63d 100644
--- a/doc/eo_pre.tex
+++ b/doc/eo_pre.tex
@@ -222,7 +222,11 @@ \subsubsection{Mass non-degenerate flavour doublet}
   \end{pmatrix}
 \end{equation*}
 with the previous definitions of $M_{eo}$ etc. The inplementation for
-the HMC is very similar to the mass degenerate case.
+the HMC is very similar to the mass degenerate case. $\hat Q^h$ has
+again a hermitian conjugate given by
+\[
+(\hat Q^h)^\dagger = \tau^1\ \hat Q^h\ \tau^1
+\]
 
 \subsubsection{Combining Clover and Twisted mass term}
 
@@ -404,7 +408,7 @@ \subsubsection{Combining Clover and Twisted mass term}
 For the implementation it is useful to compute the term
 \begin{equation}
   \label{eq:Tee}
-  1 + \frac{i}{2} c_\mathrm{sw}
+  1+T_{a\alpha,b\beta} = 1 + \frac{i}{2} c_\mathrm{sw}
   \kappa\sigma_{\mu\nu}^{\alpha\beta}F_{\mu\nu}^{\alpha\beta}(x)
 \end{equation}  
 once for all $x$. This is implemented in {\ttfamily clover\_leaf.c} in
@@ -414,9 +418,10 @@ \subsubsection{Combining Clover and Twisted mass term}
 mass term in later on. 
 
 The term in eq.~(\ref{eq:Tee}) represents two complex $6\times6$ matrices
-per site. As the off-diagonal $3\times3$ matrices are just inverse to
-each other, we get away with storing two times three $3\times3$
-complex matrices. These are stored in the array {\ttfamily
+per site. As for each $6\times6$ matrix the off-diagonal $3\times3$
+matrices are just hermitian conjugate to each other -- $1+T$ is
+hermitian --, we get away with storing two times three 
+$3\times3$ complex matrices. These are stored in the array {\ttfamily
   sw[VOLUME][3][2]} of type {\ttfamily su3}. Here, {\ttfamily
   sw[x][0][0]} is the upper diagonal $3\times3$ matrix, {\ttfamily
   sw[x][1][0]} the upper off-diagnoal $3\times3$ matrix and {\ttfamily
@@ -491,15 +496,25 @@ \subsubsection{Combining Clover and Nondegenerate Twisted mass term}
 Now we have
 \[
 \hat Q^h_{oo} = \gamma_5(M_{oo}^h -
-(M_{oe}^h\ (1+T_{ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1)^{-1}\ M_{eo}^h)
+(M_{oe}^h\ (M_{ee}^h)^{-1}\ M_{eo}^h)\,,
+\]
+with
+\[
+M_{oo|ee}^h = 1+T_{oo|ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1\,.
 \]
-where
+Because $1+T_{oo|ee}$ is hermitian, we can invert $M_{ee}^h$ by
 \[
 (1+T_{ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1)^{-1} =
 \frac{1}{(1+T_{ee})^2 + \bar\mu^2 - \bar\epsilon^2}
-(1+T_{ee}-i\bar\mu\gamma_5\tau^3+\bar\epsilon\tau^1)
+(1+T_{ee}-i\bar\mu\gamma_5\tau^3+\bar\epsilon\tau^1)\,.
+\]
+{\bf check!}\\
+Therefore, the determinant we have to compute is
+\[
+\det(Q^h) =
+\det[\gamma_5(1+T_{ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1)]\
+\det[\hat Q^h_{oo}]
 \]
-if $(1+T_{ee})^2$ is symmetric.
 
 \subsection{Inversion}
 
diff --git a/tests/test_clover_six_invert.h b/tests/test_clover_six_invert.h
index 7c1b8d379..35c6ee237 100644
--- a/tests/test_clover_six_invert.h
+++ b/tests/test_clover_six_invert.h
@@ -4,10 +4,12 @@
 #include <cu/cu.h>
 
 TEST(clover_six_invert);
+TEST(clover_six_det);
 
 TEST_SUITE(CLOVER){
   TEST_ADD(clover_six_invert),
-  TEST_SUITE_CLOSURE
+    TEST_ADD(clover_six_det),
+    TEST_SUITE_CLOSURE
 };
 
 #endif /* _TEST_CLOVER_SIX_INVERT_H */

From fecfe604d9f82d819098392cf8a612c7b2465fd7 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 30 May 2012 19:09:46 +0200
Subject: [PATCH 019/110] typo re-introduced by merging corrected

---
 cloverdet_monomial.c      |  4 ++--
 cloverdetratio_monomial.c | 12 ++++++------
 det_monomial.c            |  6 +++---
 detratio_monomial.c       | 12 ++++++------
 ndpoly_monomial.c         | 24 ++++++++++++------------
 poly_monomial.c           | 28 ++++++++++++++--------------
 6 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/cloverdet_monomial.c b/cloverdet_monomial.c
index 7daa14c02..5022d6b0a 100644
--- a/cloverdet_monomial.c
+++ b/cloverdet_monomial.c
@@ -96,13 +96,13 @@ void cloverdet_derivative(const int id, hamiltonian_field_t * const hf) {
   // to get the even sites of X_e
   H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -mnl->mu);
   // \delta Q sandwitched by Y_o^\dagger and X_e
-  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
+  deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); 
   
   // to get the even sites of Y_e
   H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, mnl->mu);
   // \delta Q sandwitched by Y_e^\dagger and X_o
   // uses the gauge field in hf and changes the derivative fields in hf
-  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor);
+  deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor);
   
   // here comes the clover term...
   // computes the insertion matrices for S_eff
diff --git a/cloverdetratio_monomial.c b/cloverdetratio_monomial.c
index cb269b3f1..d55ae3463 100644
--- a/cloverdetratio_monomial.c
+++ b/cloverdetratio_monomial.c
@@ -99,12 +99,12 @@ void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf
   /* to get the even sites of X */
   H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -mnl->mu);
   /* \delta Q sandwitched by Y_o^\dagger and X_e */
-  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
+  deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); 
   
   /* to get the even sites of Y */
   H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, mnl->mu);
   /* \delta Q sandwitched by Y_e^\dagger and X_o */
-  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor); 
+  deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor); 
 
   // here comes the clover term...
   // computes the insertion matrices for S_eff
@@ -127,12 +127,12 @@ void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf
   /* to get the even sites of X */
   H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -mnl->mu);
   /* \delta Q sandwitched by Y_o^\dagger and X_e */
-  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
+  deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); 
   
   /* to get the even sites of Y */
   H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, mnl->mu);
   /* \delta Q sandwitched by Y_e^\dagger and X_o */
-  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor);
+  deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor);
 
   // here comes the clover term...
   // computes the insertion matrices for S_eff
@@ -210,12 +210,12 @@ void cloverdetratio_derivative(const int no, hamiltonian_field_t * const hf) {
   /* to get the even sites of X */
   H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -mnl->mu);
   /* \delta Q sandwitched by Y_o^\dagger and X_e */
-  deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
+  deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); 
   
   /* to get the even sites of Y */
   H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, mnl->mu);
   /* \delta Q sandwitched by Y_e^\dagger and X_o */
-  deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor); 
+  deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor); 
 
   // here comes the clover term...
   // computes the insertion matrices for S_eff
diff --git a/det_monomial.c b/det_monomial.c
index 18a6bacd9..4e8c62ff4 100644
--- a/det_monomial.c
+++ b/det_monomial.c
@@ -90,12 +90,12 @@ void det_derivative(const int id, hamiltonian_field_t * const hf) {
     /* to get the even sites of X_e */
     H_eo_tm_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EO, -1.);
     /* \delta Q sandwitched by Y_o^\dagger and X_e */
-    deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
+    deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); 
     
     /* to get the even sites of Y_e */
     H_eo_tm_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EO, +1);
     /* \delta Q sandwitched by Y_e^\dagger and X_o */
-    deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor);
+    deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor);
   } 
   else {
     /*********************************************************************
@@ -146,7 +146,7 @@ void det_derivative(const int id, hamiltonian_field_t * const hf) {
     }
     
     /* \delta Q sandwitched by Y^\dagger and X */
-    deriv_Sb_D_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor);
+    deriv_Sb_D_psi(mnl->w_fields[0], mnl->w_fields[1], hf, mnl->forcefactor);
   }
   g_mu = g_mu1;
   boundary(g_kappa);
diff --git a/detratio_monomial.c b/detratio_monomial.c
index fa34ad28b..aa93f013f 100644
--- a/detratio_monomial.c
+++ b/detratio_monomial.c
@@ -99,12 +99,12 @@ void detratio_derivative(const int no, hamiltonian_field_t * const hf) {
     /* to get the even sites of X */
     H_eo_tm_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EO, -1.);
     /* \delta Q sandwitched by Y_o^\dagger and X_e */
-    deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
+    deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); 
     
     /* to get the even sites of Y */
     H_eo_tm_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EO, +1);
     /* \delta Q sandwitched by Y_e^\dagger and X_o */
-    deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor); 
+    deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor); 
 
     g_mu = mnl->mu2;
     boundary(mnl->kappa2);
@@ -117,12 +117,12 @@ void detratio_derivative(const int no, hamiltonian_field_t * const hf) {
     /* to get the even sites of X */
     H_eo_tm_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EO, -1.);
     /* \delta Q sandwitched by Y_o^\dagger and X_e */
-    deriv_Sb(OE, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
+    deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); 
     
     /* to get the even sites of Y */
     H_eo_tm_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EO, +1);
     /* \delta Q sandwitched by Y_e^\dagger and X_o */
-    deriv_Sb(EO, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor);
+    deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor);
   } 
   else { /* no even/odd preconditioning */
     /*********************************************************************
@@ -182,7 +182,7 @@ void detratio_derivative(const int no, hamiltonian_field_t * const hf) {
     }
 
     /* \delta Q sandwitched by Y^\dagger and X */
-    deriv_Sb_D_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor); 
+    deriv_Sb_D_psi(mnl->w_fields[0], mnl->w_fields[1], hf, mnl->forcefactor); 
     
     g_mu = mnl->mu2;
     boundary(mnl->kappa2);
@@ -192,7 +192,7 @@ void detratio_derivative(const int no, hamiltonian_field_t * const hf) {
     mul_r(mnl->w_fields[0], -1., mnl->pf, VOLUME);
     
     /* \delta Q sandwitched by Y^\dagger and X */
-    deriv_Sb_D_psi(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor);
+    deriv_Sb_D_psi(mnl->w_fields[0], mnl->w_fields[1], hf, mnl->forcefactor);
   }
   g_mu = g_mu1;
   boundary(g_kappa);
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index 21ac13352..9b2036540 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -101,16 +101,16 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 	      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
       
       /* \delta M_eo sandwitched by  chi[j-1]_e^\dagger  and  chi[2N-j]_o */
-      deriv_Sb(EO, g_spinor_field[DUM_DERI], g_chi_up_spinor_field[phmc_dop_n_cheby], hf, mnl->forcefactor);      /* UP */
-      deriv_Sb(EO, g_spinor_field[DUM_DERI+1], g_chi_dn_spinor_field[phmc_dop_n_cheby], hf, mnl->forcefactor);    /* DN */
+      deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[phmc_dop_n_cheby], hf, mnl->forcefactor);      /* UP */
+      deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[phmc_dop_n_cheby], hf, mnl->forcefactor);    /* DN */
       
       /* Get the even parts of the  (2N-j)-th  chi_spinors */
       H_eo_tm_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
 	      g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], EO);
       
       /* \delta M_oe sandwitched by  chi[j-1]_o^\dagger  and  chi[2N-j]_e */
-      deriv_Sb(OE, g_chi_up_spinor_field[j-1], g_spinor_field[DUM_DERI], hf, mnl->forcefactor);
-      deriv_Sb(OE, g_chi_dn_spinor_field[j-1], g_spinor_field[DUM_DERI+1], hf, mnl->forcefactor);
+      deriv_Sb(OE, g_chi_up_spinor_field[j-1], mnl->w_fields[0], hf, mnl->forcefactor);
+      deriv_Sb(OE, g_chi_dn_spinor_field[j-1], mnl->w_fields[1], hf, mnl->forcefactor);
     }
   } 
   else if(g_epsbar == 0.0) {
@@ -135,19 +135,19 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 
       Qtm_minus_psi(mnl->w_fields[3],g_chi_up_spinor_field[j-1]); 
 
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_chi_up_spinor_field[phmc_dop_n_cheby], EO, -1.);
-      deriv_Sb(OE, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
+      H_eo_tm_inv_psi(mnl->w_fields[2], g_chi_up_spinor_field[phmc_dop_n_cheby], EO, -1.);
+      deriv_Sb(OE, mnl->w_fields[3], mnl->w_fields[2], hf, mnl->forcefactor); 
       
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], EO, 1.); 
-      deriv_Sb(EO, g_spinor_field[DUM_DERI+2], g_chi_up_spinor_field[phmc_dop_n_cheby], hf, mnl->forcefactor);
+      H_eo_tm_inv_psi(mnl->w_fields[2], mnl->w_fields[3], EO, 1.); 
+      deriv_Sb(EO, mnl->w_fields[2], g_chi_up_spinor_field[phmc_dop_n_cheby], hf, mnl->forcefactor);
 
       Qtm_minus_psi(mnl->w_fields[3],g_chi_up_spinor_field[mnl->MDPolyDegree]); 
 
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2],g_spinor_field[DUM_DERI+3], EO, +1.);
-      deriv_Sb(OE, g_chi_up_spinor_field[j-1] , g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
+      H_eo_tm_inv_psi(mnl->w_fields[2],mnl->w_fields[3], EO, +1.);
+      deriv_Sb(OE, g_chi_up_spinor_field[j-1] , mnl->w_fields[2], hf, mnl->forcefactor); 
       
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_chi_up_spinor_field[j-1], EO, -1.); 
-      deriv_Sb(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], hf, mnl->forcefactor);
+      H_eo_tm_inv_psi(mnl->w_fields[2], g_chi_up_spinor_field[j-1], EO, -1.); 
+      deriv_Sb(EO, mnl->w_fields[2], mnl->w_fields[3], hf, mnl->forcefactor);
     }
   }
   /*
diff --git a/poly_monomial.c b/poly_monomial.c
index 499e5be8a..354645fce 100644
--- a/poly_monomial.c
+++ b/poly_monomial.c
@@ -120,20 +120,20 @@ void poly_derivative(const int id, hamiltonian_field_t * const hf){
       
 
       Qtm_minus_psi(mnl->w_fields[1],chi_spinor_field[j-1]); 
+
+      H_eo_tm_inv_psi(mnl->w_fields[0], chi_spinor_field[degreehalf+1], EO, -1.);
+      deriv_Sb(OE, mnl->w_fields[1], mnl->w_fields[0], hf, mnl->forcefactor); 
       
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], chi_spinor_field[degreehalf+1], EO, -1.);
-      deriv_Sb(OE, g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
-      
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], EO, 1.); 
-      deriv_Sb(EO, g_spinor_field[DUM_DERI+2], chi_spinor_field[degreehalf+1], hf, mnl->forcefactor);
+      H_eo_tm_inv_psi(mnl->w_fields[0], mnl->w_fields[1], EO, 1.); 
+      deriv_Sb(EO, mnl->w_fields[0], chi_spinor_field[degreehalf+1], hf, mnl->forcefactor);
     
       Qtm_minus_psi(mnl->w_fields[1],chi_spinor_field[degreehalf+1]); 
 
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2],g_spinor_field[DUM_DERI+3], EO, +1.);
-      deriv_Sb(OE, chi_spinor_field[j-1] , g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor); 
+      H_eo_tm_inv_psi(mnl->w_fields[0],mnl->w_fields[1], EO, +1.);
+      deriv_Sb(OE, chi_spinor_field[j-1] , mnl->w_fields[0], hf, mnl->forcefactor); 
       
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], chi_spinor_field[j-1], EO, -1.); 
-      deriv_Sb(EO, g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3], hf, mnl->forcefactor);
+      H_eo_tm_inv_psi(mnl->w_fields[0], chi_spinor_field[j-1], EO, -1.); 
+      deriv_Sb(EO, mnl->w_fields[0], mnl->w_fields[1], hf, mnl->forcefactor);
     }
 
 
@@ -156,11 +156,11 @@ void poly_derivative(const int id, hamiltonian_field_t * const hf){
       g_mu=mnl->mu2;
       boundary(mnl->kappa2);
 
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2],chi_spinor_field[degreehalf], EO, -1.);
-      deriv_Sb(OE, mnl->pf , g_spinor_field[DUM_DERI+2], hf, mnl->forcefactor);
-      
-      H_eo_tm_inv_psi(g_spinor_field[DUM_DERI+2], mnl->pf, EO, +1.);
-      deriv_Sb(EO, g_spinor_field[DUM_DERI+2], chi_spinor_field[degreehalf], hf, mnl->forcefactor);
+      H_eo_tm_inv_psi(mnl->w_fields[0],chi_spinor_field[degreehalf], EO, -1.);
+      deriv_Sb(OE, mnl->pf , mnl->w_fields[0], hf, mnl->forcefactor);
+
+      H_eo_tm_inv_psi(mnl->w_fields[0], mnl->pf, EO, +1.);
+      deriv_Sb(EO, mnl->w_fields[0], chi_spinor_field[degreehalf], hf, mnl->forcefactor);
     }
   } 
   else {

From 8714a909ea5a6f098d16a2333a8b6848d50ca5e1 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Thu, 31 May 2012 10:30:31 +0200
Subject: [PATCH 020/110] sw_trace_nd tested for eps=0

---
 clover_leaf.c           | 24 ++++++++++++++++++------
 clover_trlog_monomial.c |  6 +++---
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/clover_leaf.c b/clover_leaf.c
index 06440ce78..296e5aee2 100644
--- a/clover_leaf.c
+++ b/clover_leaf.c
@@ -439,6 +439,16 @@ double sw_trace(const int ieo, const double mu) {
 
 }
 
+
+// This function computes the trace-log part of the clover term
+// in case of even/odd preconditioning in the nd case
+//
+// it is expected that sw_term is called beforehand such that
+// the array sw is populated properly
+//
+// it is tested to deliver bit-identical results to sw_trace
+// if eps is set to zero
+
 double sw_trace_nd(const int ieo, const double mu, const double eps) {
   int i,x,icx,ioff;
   static su3 v;
@@ -446,7 +456,7 @@ double sw_trace_nd(const int ieo, const double mu, const double eps) {
   static double tra;
   static double ks,kc,tr,ts,tt;
   static _Complex double det[2];
-  
+  double se = (eps*eps)*(eps*eps)*(eps*eps);
   ks=0.0;
   kc=0.0;
 
@@ -464,13 +474,15 @@ double sw_trace_nd(const int ieo, const double mu, const double eps) {
       _su3_dagger(v, sw[x][1][i]); 
       populate_6x6_matrix(a, &v, 3, 0);
       populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
-      // we add the twisted mass term
+      // we add the twisted mass term prop to tau^3
       if(i == 0) add_tm(a, mu);
       else add_tm(a, -mu);
-      // and compute the tr log (or log det)
       det[i] = six_det(a);
     }
-    tra = log(conj(det[0])*det[0]*conj(det[1])*det[1] - eps*eps);
+    // and compute the tr log (or log det)
+    // for the 2x2 matrix in flavour space
+    // with eps*tau^1 in the off diagonal
+    tra = log(conj(det[0])*det[0]*conj(det[1])*det[1] - se*se);
 
     tr=tra+kc;
     ts=tr+ks;
@@ -586,7 +598,7 @@ void sw_invert(const int ieo, const double mu) {
   return;
 }
 
-inline void add_shift(_Complex double a[6][6], const double mshift) {
+inline void add_shift_6x6(_Complex double a[6][6], const double mshift) {
   for(int i = 0; i < 6; i++) {
     a[i][i] += mshift;
   }
@@ -624,7 +636,7 @@ void sw_invert_nd(const double mshift) {
 
       mult_6x6(b, a, a);
       // we add the mass shift term
-      add_shift(b, mshift);
+      add_shift_6x6(b, mshift);
       // so b = (1+T)^2 + shift
       err = six_invert(b); 
       // here we need to catch the error! 
diff --git a/clover_trlog_monomial.c b/clover_trlog_monomial.c
index c91e5168d..be8fc84c7 100644
--- a/clover_trlog_monomial.c
+++ b/clover_trlog_monomial.c
@@ -36,7 +36,7 @@
 #include "clover_trlog_monomial.h"
 
 void clover_trlog_derivative(const int id, hamiltonian_field_t * const hf) {
-  monomial * mnl = &monomial_list[id];
+  //monomial * mnl = &monomial_list[id];
   /* this term has no derivative */
   /* so a dummy function         */
   if(g_proc_id == 0 && g_debug_level > 4) {
@@ -53,7 +53,7 @@ void clover_trlog_heatbath(const int id, hamiltonian_field_t * const hf) {
   init_sw_fields();
   sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
   /*compute the contribution from the clover trlog term */
-  mnl->energy0 = -sw_trace_nd(EO, mnl->mu,0);
+  mnl->energy0 = -sw_trace(EO, mnl->mu);
   if(g_proc_id == 0 && g_debug_level > 3) {
     printf("called clover_trlog_heatbath for id %d E = %e\n", id, mnl->energy0);
   }
@@ -65,7 +65,7 @@ double clover_trlog_acc(const int id, hamiltonian_field_t * const hf) {
   mnl->energy1 = 0.;
   sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
   /*compute the contribution from the clover trlog term */
-  mnl->energy1 = -sw_trace(EO, mnl->mu);   
+  mnl->energy1 = -sw_trace(EO, mnl->mu);
   if(g_proc_id == 0 && g_debug_level > 3) {
     printf("called clover_trlog_acc for id %d dH = %1.4e\n", 
 	   id, mnl->energy1 - mnl->energy0);

From 2afa91a8fe62c470783f94e8f9e370155e70b726 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Thu, 31 May 2012 13:56:34 +0200
Subject: [PATCH 021/110] renamed clover to clovertm_operators and
 Nondegenerate_Matrix to tm_operators_nd

---
 Makefile.in                                 | 4 ++--
 Ptilde_nd.c                                 | 2 +-
 chebyshev_polynomial.c                      | 2 +-
 chebyshev_polynomial_nd.c                   | 2 +-
 clover_leaf.c                               | 4 ++--
 clover_trlog_monomial.c                     | 2 +-
 cloverdet_monomial.c                        | 2 +-
 cloverdetratio_monomial.c                   | 2 +-
 cloverndpoly_monomial.c                     | 4 ++--
 clover.c => clovertm_operators.c            | 2 +-
 clover.h => clovertm_operators.h            | 5 +++--
 eigenvalues_bi.c                            | 2 +-
 invert_clover_eo.c                          | 2 +-
 invert_doublet_eo.c                         | 2 +-
 io/utils_write_first_message.c              | 8 ++++----
 max_eigenvalues_bi.c                        | 2 +-
 monomial.c                                  | 2 +-
 nddetratio_monomial.c                       | 4 ++--
 ndpoly_monomial.c                           | 4 ++--
 operator.c                                  | 4 ++--
 poly_monomial.c                             | 2 +-
 prepare_source.c                            | 2 +-
 Nondegenerate_Matrix.c => tm_operators_nd.c | 2 +-
 Nondegenerate_Matrix.h => tm_operators_nd.h | 4 ++--
 24 files changed, 36 insertions(+), 35 deletions(-)
 rename clover.c => clovertm_operators.c (99%)
 rename clover.h => clovertm_operators.h (95%)
 rename Nondegenerate_Matrix.c => tm_operators_nd.c (99%)
 rename Nondegenerate_Matrix.h => tm_operators_nd.h (97%)

diff --git a/Makefile.in b/Makefile.in
index 2ae2d6433..ab83325d8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -46,7 +46,7 @@ MODULES = read_input gamma hybrid_update measure_gauge_action start \
 	expo get_staples update_backward_gauge \
 	measure_rectangles get_rectangle_staples  \
 	test/check_geometry test/check_xchange \
-	test/overlaptests clover clover_leaf \
+	test/overlaptests clovertm_operators clover_leaf \
 	invert_eo invert_doublet_eo update_gauge \
 	polyakov_loop getopt sighandler reweighting_factor \
 	source_generation boundary update_tm ranlxd  \
@@ -56,7 +56,7 @@ MODULES = read_input gamma hybrid_update measure_gauge_action start \
 	xchange_field xchange_gauge prepare_source \
 	init_gauge_field init_geometry_indices init_spinor_field \
 	init_dirac_halfspinor xchange_halffield \
-	Nondegenerate_Matrix nddetratio_monomial \
+	tm_operators_nd nddetratio_monomial \
 	chebyshev_polynomial_nd Ptilde_nd  \
 	init_chi_spinor_field reweighting_factor_nd \
 	init_bispinor_field eigenvalues_bi D_psi \
diff --git a/Ptilde_nd.c b/Ptilde_nd.c
index c9bba2e8c..9ec08e7c4 100644
--- a/Ptilde_nd.c
+++ b/Ptilde_nd.c
@@ -32,7 +32,7 @@
 #include "linalg_eo.h"
 #include "start.h"
 #include "tm_operators.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 #include "chebyshev_polynomial_nd.h"
 #include "phmc.h"
 #include "Ptilde_nd.h"
diff --git a/chebyshev_polynomial.c b/chebyshev_polynomial.c
index aad75ba68..f81a53d9a 100644
--- a/chebyshev_polynomial.c
+++ b/chebyshev_polynomial.c
@@ -31,7 +31,7 @@
 #include "start.h"
 #include "tm_operators.h"
 #include "chebyshev_polynomial.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 
 
 #define PI 3.141592653589793
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index 21f3e9323..2fce78dca 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -29,7 +29,7 @@
 #include "linalg_eo.h"
 #include "start.h"
 #include "tm_operators.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 #include "phmc.h"
 #include "chebyshev_polynomial_nd.h"
 
diff --git a/clover_leaf.c b/clover_leaf.c
index 296e5aee2..c4d35cd7b 100644
--- a/clover_leaf.c
+++ b/clover_leaf.c
@@ -2,7 +2,7 @@
  *
  * Copyright (C) 1995 Ulli Wolff, Stefan Sint
  *               2001,2005 Martin Hasenbusch
- *               2011 Carsten Urbach
+ *               2011,2012 Carsten Urbach
  *
  * This file is part of tmLQCD.
  *
@@ -46,7 +46,7 @@
 #include "su3.h"
 #include "sse.h"
 #include "su3adj.h"
-#include "clover.h"
+#include "clovertm_operators.h"
 #include "clover_leaf.h"
 
 const double tiny_t = 1.0e-20;
diff --git a/clover_trlog_monomial.c b/clover_trlog_monomial.c
index be8fc84c7..240bf9f32 100644
--- a/clover_trlog_monomial.c
+++ b/clover_trlog_monomial.c
@@ -29,8 +29,8 @@
 #include "su3.h"
 #include "su3adj.h"
 #include "su3spinor.h"
+#include "clovertm_operators.h"
 #include "clover_leaf.h"
-#include "clover.h"
 #include "monomial.h"
 #include "Hopping_Matrix.h"
 #include "clover_trlog_monomial.h"
diff --git a/cloverdet_monomial.c b/cloverdet_monomial.c
index 5022d6b0a..a80376576 100644
--- a/cloverdet_monomial.c
+++ b/cloverdet_monomial.c
@@ -43,7 +43,7 @@
 #include "hamiltonian_field.h"
 #include "boundary.h"
 #include "monomial.h"
-#include "clover.h"
+#include "clovertm_operators.h"
 #include "cloverdet_monomial.h"
 
 /* think about chronological solver ! */
diff --git a/cloverdetratio_monomial.c b/cloverdetratio_monomial.c
index d55ae3463..ece7b6f5b 100644
--- a/cloverdetratio_monomial.c
+++ b/cloverdetratio_monomial.c
@@ -43,7 +43,7 @@
 #include "solver/solver.h"
 #include "read_input.h"
 #include "smearing/stout.h"
-#include "clover.h"
+#include "clovertm_operators.h"
 #include "clover_leaf.h"
 
 #include "monomial.h"
diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 921f4c3f7..092409e18 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -35,10 +35,10 @@
 #include "deriv_Sb.h"
 #include "tm_operators.h"
 #include "chebyshev_polynomial.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 #include "Hopping_Matrix.h"
 #include "phmc.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 #include "chebyshev_polynomial_nd.h"
 #include "Ptilde_nd.h"
 #include "reweighting_factor_nd.h"
diff --git a/clover.c b/clovertm_operators.c
similarity index 99%
rename from clover.c
rename to clovertm_operators.c
index 742848550..444375ecf 100644
--- a/clover.c
+++ b/clovertm_operators.c
@@ -37,7 +37,7 @@
 #include "linalg_eo.h"
 #include "Hopping_Matrix.h"
 #include "tm_operators.h"
-#include "clover.h"
+#include "clovertm_operators.h"
 
 
 su3 *** sw;
diff --git a/clover.h b/clovertm_operators.h
similarity index 95%
rename from clover.h
rename to clovertm_operators.h
index ccbdc27f0..3fafb7480 100644
--- a/clover.h
+++ b/clovertm_operators.h
@@ -2,6 +2,7 @@
  *
  * Copyright (C) 2005 Martin Hasenbusch
  *               2009 Carsten Urbach
+ *               2012 Carsten Urbach
  *
  * This file is part of tmLQCD.
  *
@@ -19,8 +20,8 @@
  * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
  ***********************************************************************/
 
-#ifndef _CLOVER_H
-#define _CLOVER_H
+#ifndef _CLOVERTM_OPERATORS_H
+#define _CLOVERTM_OPERATORS_H
 
 #include "su3.h"
 
diff --git a/eigenvalues_bi.c b/eigenvalues_bi.c
index 95d62442c..5d6e9767b 100644
--- a/eigenvalues_bi.c
+++ b/eigenvalues_bi.c
@@ -51,7 +51,7 @@
 #include "solver/solver.h"
 #include "solver/jdher_bi.h"
 #include "eigenvalues_bi.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 
 
 double eigenvalues_bi(int * nr_of_eigenvalues,  
diff --git a/invert_clover_eo.c b/invert_clover_eo.c
index a62f3da47..074d20ba8 100644
--- a/invert_clover_eo.c
+++ b/invert_clover_eo.c
@@ -40,7 +40,7 @@
 #include"linalg_eo.h"
 #include"tm_operators.h"
 #include"Hopping_Matrix.h"
-#include"clover.h"
+#include"clovertm_operators.h"
 #include"D_psi.h"
 #include"linsolve.h"
 #include"gamma.h"
diff --git a/invert_doublet_eo.c b/invert_doublet_eo.c
index e8ba6a44e..c1571f7a6 100644
--- a/invert_doublet_eo.c
+++ b/invert_doublet_eo.c
@@ -45,7 +45,7 @@
 #include"solver/solver.h"
 #include"read_input.h"
 #include"xchange.h"
-#include"Nondegenerate_Matrix.h"
+#include"tm_operators_nd.h"
 #include"invert_doublet_eo.h"
 
 
diff --git a/io/utils_write_first_message.c b/io/utils_write_first_message.c
index e16357350..b92b61464 100644
--- a/io/utils_write_first_message.c
+++ b/io/utils_write_first_message.c
@@ -129,10 +129,10 @@ int write_first_messages(FILE * parameterfile, const int inv) {
   if(inv != 1) {
     printf("# mu = %f\n", g_mu/2./g_kappa);
     printf("# g_rgi_C0 = %f, g_rgi_C1 = %f\n", g_rgi_C0, g_rgi_C1);
-    printf("# SFBC parameters (gauge):\n");
-    printf("# g_Ct = %f, g_Cs = %f\n", g_Ct, g_Cs);
-    printf("# g_C1ss = %f, g_C1tss = %f, g_C1tts = %f\n", g_C1ss, g_C1tss, g_C1tts);
-    printf("# g_eta = %f\n", g_eta);
+/*     printf("# SFBC parameters (gauge):\n"); */
+/*     printf("# g_Ct = %f, g_Cs = %f\n", g_Ct, g_Cs); */
+/*     printf("# g_C1ss = %f, g_C1tss = %f, g_C1tts = %f\n", g_C1ss, g_C1tss, g_C1tts); */
+/*     printf("# g_eta = %f\n", g_eta); */
     printf("# Using %s precision for the inversions!\n", 
 	   g_relative_precision_flag ? "relative" : "absolute");
   }
diff --git a/max_eigenvalues_bi.c b/max_eigenvalues_bi.c
index 05a6903c4..09815652f 100644
--- a/max_eigenvalues_bi.c
+++ b/max_eigenvalues_bi.c
@@ -54,7 +54,7 @@
 #include "solver/pjdher_bi.h"
 #endif
 #include "max_eigenvalues_bi.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 
 /* Needed only if you want to create an EV-file
 #include "rw_ev.h"
diff --git a/monomial.c b/monomial.c
index 45e7b6140..eadb1a482 100644
--- a/monomial.c
+++ b/monomial.c
@@ -32,8 +32,8 @@
 #include "su3adj.h"
 #include "su3spinor.h"
 #include "tm_operators.h"
+#include "clovertm_operators.h"
 #include "clover_leaf.h"
-#include "clover.h"
 #include "ranlxd.h"
 #include "sse.h"
 #include "linalg_eo.h"
diff --git a/nddetratio_monomial.c b/nddetratio_monomial.c
index ffb8df4d3..6ba80a1a5 100644
--- a/nddetratio_monomial.c
+++ b/nddetratio_monomial.c
@@ -35,12 +35,12 @@
 #include "deriv_Sb.h"
 #include "tm_operators.h"
 #include "chebyshev_polynomial.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 #include "Hopping_Matrix.h"
 #include "phmc.h"
 #include "boundary.h"
 #include "gamma.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 #include "chebyshev_polynomial_nd.h"
 #include "Ptilde_nd.h"
 #include "reweighting_factor_nd.h"
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index 9b2036540..0dced7b56 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -35,10 +35,10 @@
 #include "deriv_Sb.h"
 #include "tm_operators.h"
 #include "chebyshev_polynomial.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 #include "Hopping_Matrix.h"
 #include "phmc.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 #include "chebyshev_polynomial_nd.h"
 #include "Ptilde_nd.h"
 #include "reweighting_factor_nd.h"
diff --git a/operator.c b/operator.c
index aaf51c025..726e4a7d0 100644
--- a/operator.c
+++ b/operator.c
@@ -38,7 +38,7 @@
 #include "linalg_eo.h"
 #include "D_psi.h"
 #include "Dov_psi.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 #include "Hopping_Matrix.h"
 #include "invert_eo.h"
 #include "invert_doublet_eo.h"
@@ -55,7 +55,7 @@
 #include <io/utils.h>
 #include "test/overlaptests.h"
 #include "solver/index_jd.h"
-#include "clover.h"
+#include "clovertm_operators.h"
 #include "clover_leaf.h"
 #include "operator.h"
 
diff --git a/poly_monomial.c b/poly_monomial.c
index 354645fce..4b151fceb 100644
--- a/poly_monomial.c
+++ b/poly_monomial.c
@@ -47,7 +47,7 @@
 #include "solver/solver.h"
 #include "solver/chrono_guess.h"
 #include "solver/eigenvalues.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 #include "Hopping_Matrix.h"
 #include "hamiltonian_field.h"
 #include "phmc.h"
diff --git a/prepare_source.c b/prepare_source.c
index 9fe62bc82..3a34349dc 100644
--- a/prepare_source.c
+++ b/prepare_source.c
@@ -40,7 +40,7 @@
 #include "su3.h"
 #include "operator.h"
 #include "linalg_eo.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 #include "source_generation.h"
 #include "prepare_source.h"
 
diff --git a/Nondegenerate_Matrix.c b/tm_operators_nd.c
similarity index 99%
rename from Nondegenerate_Matrix.c
rename to tm_operators_nd.c
index a6157de48..f3bd6270d 100644
--- a/Nondegenerate_Matrix.c
+++ b/tm_operators_nd.c
@@ -39,7 +39,7 @@
 #include "linsolve.h"
 #include "linalg_eo.h"
 #include "tm_operators.h"
-#include "Nondegenerate_Matrix.h"
+#include "tm_operators_nd.h"
 
 
 void mul_one_minus_iconst(spinor * const l, spinor * const k, const double mu);
diff --git a/Nondegenerate_Matrix.h b/tm_operators_nd.h
similarity index 97%
rename from Nondegenerate_Matrix.h
rename to tm_operators_nd.h
index 7176f7009..4625e2ab0 100644
--- a/Nondegenerate_Matrix.h
+++ b/tm_operators_nd.h
@@ -19,8 +19,8 @@
  * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
  ***********************************************************************/
 
-#ifndef _NONDEGENRATE_MATRIX_H
-#define _NONDEGENRATE_MATRIX_H
+#ifndef _TM_OPERATTORS_ND_H
+#define _TM_OPERATTORS_ND_H
 
 void mul_one_pm_itau2(spinor * const p, spinor * const q,
 		      spinor * const r, spinor * const s,

From f3084b83f7fe6b0e99828611c53d549d79616341 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 5 Sep 2012 01:04:52 +0200
Subject: [PATCH 022/110] this should fix issue 138

---
 online_measurement.c | 46 ++++++++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/online_measurement.c b/online_measurement.c
index e37e09ad4..fcffa4c79 100644
--- a/online_measurement.c
+++ b/online_measurement.c
@@ -54,12 +54,14 @@
 
 void online_measurement(const int traj, const int id, const int ieo) {
   int i, j, t, tt, t0;
-  double *Cpp, *Cpa, *Cp4;
+  double *Cpp = NULL, *Cpa = NULL, *Cp4 = NULL;
   double res = 0., respa = 0., resp4 = 0.;
   double atime, etime;
   float tmp;
 #ifdef MPI
   double mpi_res = 0., mpi_respa = 0., mpi_resp4 = 0.;
+  // send buffer for MPI_Gather
+  double *sCpp = NULL, *sCpa = NULL, *sCp4 = NULL;
 #endif
   FILE *ofs;
   char *filename;
@@ -83,10 +85,20 @@ void online_measurement(const int traj, const int id, const int ieo) {
   }
   atime = gettime();
 
-  Cpp = (double*) calloc(g_nproc_t*T, sizeof(double));
-  Cpa = (double*) calloc(g_nproc_t*T, sizeof(double));
-  Cp4 = (double*) calloc(g_nproc_t*T, sizeof(double));
-
+#ifdef MPI
+  sCpp = (double*) calloc(T, sizeof(double));
+  sCpa = (double*) calloc(T, sizeof(double));
+  sCp4 = (double*) calloc(T, sizeof(double));
+  if(g_mpi_time_rank == 0) {
+    Cpp = (double*) calloc(g_nproc_t*T, sizeof(double));
+    Cpa = (double*) calloc(g_nproc_t*T, sizeof(double));
+    Cp4 = (double*) calloc(g_nproc_t*T, sizeof(double));
+  }
+#else
+  Cpp = (double*) calloc(T, sizeof(double));
+  Cpa = (double*) calloc(T, sizeof(double));
+  Cp4 = (double*) calloc(T, sizeof(double));
+#endif
   source_generation_pion_only(g_spinor_field[0], g_spinor_field[1], 
 			      t0, 0, traj);
 
@@ -119,18 +131,22 @@ void online_measurement(const int traj, const int id, const int ieo) {
     respa = mpi_respa;
     MPI_Reduce(&resp4, &mpi_resp4, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
     resp4 = mpi_resp4;
+    sCpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
+    sCpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
+    sCp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
+#else
+    Cpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
+    Cpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
+    Cp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
 #endif
-    Cpp[t+g_proc_coords[0]*T] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
-    Cpa[t+g_proc_coords[0]*T] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
-    Cp4[t+g_proc_coords[0]*T] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
   }
 
 #ifdef MPI
   /* some gymnastics needed in case of parallelisation */
   if(g_mpi_time_rank == 0) {
-    MPI_Gather(&Cpp[g_proc_coords[0]*T], T, MPI_DOUBLE, Cpp, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
-    MPI_Gather(&Cpa[g_proc_coords[0]*T], T, MPI_DOUBLE, Cpa, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
-    MPI_Gather(&Cp4[g_proc_coords[0]*T], T, MPI_DOUBLE, Cp4, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
+    MPI_Gather(sCpp, T, MPI_DOUBLE, Cpp, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
+    MPI_Gather(sCpa, T, MPI_DOUBLE, Cpa, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
+    MPI_Gather(sCp4, T, MPI_DOUBLE, Cp4, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
   }
 #endif
 
@@ -168,8 +184,14 @@ void online_measurement(const int traj, const int id, const int ieo) {
     fprintf( ofs, "6  1  %d  %e  %e\n", t, Cp4[tt], 0.);
     fclose(ofs);
   }
+#ifdef MPI
+  if(g_mpi_time_rank == 0) {
+    free(Cpp); free(Cpa); free(Cp4);
+  }
+  free(sCpp); free(sCpa); free(sCp4);
+#else
   free(Cpp); free(Cpa); free(Cp4);
-  
+#endif
   etime = gettime();
   
   if(g_proc_id == 0 && g_debug_level > 0) {

From 0766746aa9f479fa2f132113ae6e4d0598515234 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 5 Oct 2012 10:16:10 +0200
Subject: [PATCH 023/110] sw_trace_nd and sw_invert_nd with openMP

---
 clover_leaf.c | 51 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/clover_leaf.c b/clover_leaf.c
index 853d53a91..e3f439ec7 100644
--- a/clover_leaf.c
+++ b/clover_leaf.c
@@ -498,12 +498,23 @@ double sw_trace(const int ieo, const double mu) {
 // if eps is set to zero
 
 double sw_trace_nd(const int ieo, const double mu, const double eps) {
-  int i,x,icx,ioff;
-  static su3 v;
-  static _Complex double a[6][6];
-  static double tra;
-  static double ks,kc,tr,ts,tt;
-  static _Complex double det[2];
+  double ALIGN res = 0.0;
+#ifdef MPI
+  double ALIGN mres;
+#endif
+
+#ifdef OMP
+#pragma omp parallel
+  {
+  int thread_num = omp_get_thread_num();
+#endif
+
+  int x,ioff;
+  su3 ALIGN v;
+  _Complex double ALIGN a[6][6];
+  double ALIGN tra;
+  double ALIGN ks,kc,tr,ts,tt;
+  _Complex double ALIGN det[2];
   double se = (eps*eps)*(eps*eps)*(eps*eps);
   ks=0.0;
   kc=0.0;
@@ -514,9 +525,13 @@ double sw_trace_nd(const int ieo, const double mu, const double eps) {
   else {
     ioff=(VOLUME+RAND)/2;
   }
-  for(icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+
+#ifdef OMP
+#pragma omp for
+#endif
+  for(unsigned int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
     x = g_eo2lexic[icx];
-    for(i=0;i<2;i++) {
+    for(unsigned int i = 0; i < 2; i++) {
       populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
       populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
       _su3_dagger(v, sw[x][1][i]); 
@@ -539,11 +554,23 @@ double sw_trace_nd(const int ieo, const double mu, const double eps) {
     kc=tr-tt;
   }
   kc=ks+kc;
+  
+#ifdef OMP
+  g_omp_acc_re[thread_num] = kc;
+  } /* OpenMP parallel closing brace */
+
+  for(int i = 0; i < omp_num_threads; ++i) {
+    res += g_omp_acc_re[i];
+  }
+#else
+  res=kc;
+#endif
+
 #ifdef MPI
-  MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-  return(ks);
+  MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  return(mres);
 #else
-  return(kc);
+  return(res);
 #endif
 }
 
@@ -561,7 +588,7 @@ void mult_6x6(_Complex double a[6][6], const _Complex double b[6][6], const _Com
   return;
 }
 
-void copy_6x6(_Complex double a[6][6], _Complex double b[6][6]) {
+void copy_6x6(_Complex double a[6][6], const _Complex double b[6][6]) {
   for(int i = 0; i < 6; i++) {
     for(int j = 0; j < 6; j++) {
       a[i][j] = b[i][j];

From 69fcff5e505ebce633d1b0da9ccdb2e2ec03290d Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 5 Oct 2012 10:51:23 +0200
Subject: [PATCH 024/110] sw_trace_nd and sw_invert_nd with openMP and cleaning
 in tm_operators_nd started

---
 clover_leaf.c     |  24 +++++++---
 tm_operators_nd.c | 117 ++++++++++++++++++----------------------------
 2 files changed, 63 insertions(+), 78 deletions(-)

diff --git a/clover_leaf.c b/clover_leaf.c
index e3f439ec7..ab30a6245 100644
--- a/clover_leaf.c
+++ b/clover_leaf.c
@@ -257,7 +257,7 @@ void six_invert(int* ifail ,_Complex double a[6][6])
     p[k] = conj(sigma) * a[k][k];
     q = conj(sigma) * sigma;
     if (q < tiny_t)
-      *ifail++;
+      (*ifail)++;
     d[k] = -conj(sigma) / q;
 
     /* reflect all columns to the right */
@@ -274,7 +274,7 @@ void six_invert(int* ifail ,_Complex double a[6][6])
   sigma = a[nm1][nm1];
   q = conj(sigma) * sigma;
   if (q < tiny_t)
-    *ifail++;
+    (*ifail)++;
   d[nm1] = conj(sigma) / q;
 
   /*  inversion of upper triangular matrix in place
@@ -540,7 +540,7 @@ double sw_trace_nd(const int ieo, const double mu, const double eps) {
       // we add the twisted mass term prop to tau^3
       if(i == 0) add_tm(a, mu);
       else add_tm(a, -mu);
-      det[i] = six_det(a);
+      six_det(&det[i], a);
     }
     // and compute the tr log (or log det)
     // for the 2x2 matrix in flavour space
@@ -575,7 +575,7 @@ double sw_trace_nd(const int ieo, const double mu, const double eps) {
 }
 
 
-void mult_6x6(_Complex double a[6][6], const _Complex double b[6][6], const _Complex double d[6][6]) {
+void mult_6x6(_Complex double a[6][6], _Complex double b[6][6], _Complex double d[6][6]) {
 
   for(int i = 0; i < 6; i++) {
     for(int j = 0; j < 6; j++) {
@@ -716,11 +716,18 @@ inline void add_shift_6x6(_Complex double a[6][6], const double mshift) {
 // must be done elsewhere because of flavour structure
 
 void sw_invert_nd(const double mshift) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
   int err=0;
   int i, x;
-  static su3 v;
-  static _Complex double a[6][6], b[6][6];
+  su3 ALIGN v;
+  _Complex double ALIGN a[6][6], b[6][6];
 
+#ifdef OMP
+#pragma omp for
+#endif
   for(int icx = 0; icx < (VOLUME/2); icx++) {
     x = g_eo2lexic[icx];
 
@@ -735,7 +742,7 @@ void sw_invert_nd(const double mshift) {
       // we add the mass shift term
       add_shift_6x6(b, mshift);
       // so b = (1+T)^2 + shift
-      err = six_invert(b); 
+      six_invert(&err, b); 
       // here we need to catch the error! 
       if(err > 0 && g_proc_id == 0) {
 	printf("# inversion failed in six_invert_nd code %d\n", err);
@@ -749,6 +756,9 @@ void sw_invert_nd(const double mshift) {
       get_3x3_block_matrix(&sw_inv[icx][3][i], b, 3, 0);
     }
   }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
   return;
 }
 
diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index 358021c79..4bdf6e2f6 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -42,7 +42,9 @@
 #include "tm_operators_nd.h"
 
 
-void mul_one_minus_iconst(spinor * const l, spinor * const k, const double mu);
+void mul_one_pm_iconst(spinor * const l, spinor * const k, 
+		       const double mu_, const int sign_);
+
 
 /* external functions */
 
@@ -68,8 +70,8 @@ void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_strange);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_charm);
 
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX], g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX], g_mubar, -1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -83,8 +85,8 @@ void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX], k_strange, -g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+1], k_charm, g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX], k_strange, g_mubar, +1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+1], k_charm, g_mubar, -1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX], k_charm, -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], k_strange, -g_epsbar, VOLUME/2);
@@ -127,8 +129,8 @@ void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar, -1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -142,8 +144,8 @@ void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], k_charm, -g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], k_strange, g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], k_charm, g_mubar, +1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], k_strange, g_mubar, -1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], k_strange, -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], k_charm, -g_epsbar, VOLUME/2);
@@ -188,8 +190,8 @@ void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar, -1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -201,8 +203,8 @@ void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], k_charm, -g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], k_strange, g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], k_charm, g_mubar, +1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], k_strange, g_mubar, -1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], k_strange, -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], k_charm, -g_epsbar, VOLUME/2);
@@ -232,8 +234,8 @@ void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7]);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6]);
 
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar, -1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -245,8 +247,8 @@ void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7], -g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6], g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7], g_mubar, +1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6], g_mubar, -1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+6], -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+7], -g_epsbar, VOLUME/2);
@@ -305,8 +307,8 @@ void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX], g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX], g_mubar, -1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -321,8 +323,8 @@ void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX], k_charm, -g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+1], k_strange, g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX], k_charm, g_mubar, +1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+1], k_strange, g_mubar, -1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX], k_strange, -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], k_charm, -g_epsbar, VOLUME/2);
@@ -442,8 +444,8 @@ void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k){
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar, -1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -457,8 +459,8 @@ void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k){
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], k_charm, -g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], k_strange, g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], k_charm, g_mubar, +1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], k_strange, g_mubar, -1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], k_strange, -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], k_charm, -g_epsbar, VOLUME/2);
@@ -488,8 +490,8 @@ void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k){
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7]);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6]);
 
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], -g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar, -1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
@@ -503,8 +505,8 @@ void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k){
   Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7], -g_mubar);
-  mul_one_minus_iconst(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6], g_mubar);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7], g_mubar, +1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6], g_mubar, -1);
 
   assign_add_mul_r(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+6], -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+7], -g_epsbar, VOLUME/2);
@@ -551,8 +553,8 @@ void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX], k_strange);
   Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX+1], k_charm);
 
-  mul_one_minus_iconst(l_strange, g_spinor_field[DUM_MATRIX+1], g_mubar);
-  mul_one_minus_iconst(l_charm, g_spinor_field[DUM_MATRIX], -g_mubar);
+  mul_one_pm_iconst(l_strange, g_spinor_field[DUM_MATRIX+1], g_mubar, -1);
+  mul_one_pm_iconst(l_charm, g_spinor_field[DUM_MATRIX], g_mubar, +1);
 
   assign_add_mul_r(l_strange, g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
   assign_add_mul_r(l_charm, g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
@@ -570,8 +572,8 @@ void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm,
 
   /* recall:   strange <-> up    while    charm <-> dn   */
 
-  mul_one_minus_iconst(l_strange, k_strange, g_mubar);
-  mul_one_minus_iconst(l_charm, k_charm, -g_mubar);
+  mul_one_pm_iconst(l_strange, k_strange, g_mubar, -1);
+  mul_one_pm_iconst(l_charm, k_charm, g_mubar, +1);
 
   assign_add_mul_r(l_strange, k_charm, g_epsbar, VOLUME/2);
   assign_add_mul_r(l_charm, k_strange, g_epsbar, VOLUME/2);
@@ -629,40 +631,8 @@ void mul_one_pm_itau2(spinor * const p, spinor * const q,
   mul_r(q, fac, q, N);
 }
 
-void mul_one_minus_imubar(spinor * const l, spinor * const k, const double mu) {
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-      
-  spinor *r, *s;
-  su3_vector ALIGN phi1;
-
-  /************ loop over all lattice sites ************/
-#ifdef OMP
-#pragma omp for
-#endif
-  for(int ix = 0; ix < (VOLUME/2); ++ix){
-    r=l + ix;
-    s=k + ix;
-    /* Multiply the spinorfield with the inverse of 1+imu\gamma_5 */
-    _complex_times_vector(phi1, (1. - mu * I), s->s0);
-    _vector_assign(r->s0, phi1);
-    _complex_times_vector(phi1, (1. - mu * I), s->s1);
-    _vector_assign(r->s1, phi1);
-    _complex_times_vector(phi1, (1. + mu * I), s->s2);
-    _vector_assign(r->s2, phi1);
-    _complex_times_vector(phi1, (1. + mu * I), s->s3);
-    _vector_assign(r->s3, phi1);
-  }
-
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-}
-
-
-void mul_one_plus_imubar(spinor * const l, spinor * const k){
+void mul_one_pm_iconst(spinor * const l, spinor * const k, 
+		       const double mu_, const int sign_) {
 #ifdef OMP
 #pragma omp parallel
   {
@@ -670,6 +640,10 @@ void mul_one_plus_imubar(spinor * const l, spinor * const k){
 
   spinor *r, *s;
   su3_vector ALIGN phi1;
+  double mu = mu_;
+  if(sign_ < 0) {
+    mu = -mu_;
+  }
 
   /************ loop over all lattice sites ************/
 #ifdef OMP
@@ -679,13 +653,13 @@ void mul_one_plus_imubar(spinor * const l, spinor * const k){
     r=l + ix;
     s=k + ix;
     /* Multiply the spinorfield with the inverse of 1+imu\gamma_5 */
-    _complex_times_vector(phi1, (1. + g_mubar * I), s->s0);
+    _complex_times_vector(phi1, (1. + mu * I), s->s0);
     _vector_assign(r->s0, phi1);
-    _complex_times_vector(phi1, (1. + g_mubar * I), s->s1);
+    _complex_times_vector(phi1, (1. + mu * I), s->s1);
     _vector_assign(r->s1, phi1);
-    _complex_times_vector(phi1, (1. - g_mubar * I), s->s2);
+    _complex_times_vector(phi1, (1. - mu * I), s->s2);
     _vector_assign(r->s2, phi1);
-    _complex_times_vector(phi1, (1. - g_mubar * I), s->s3);
+    _complex_times_vector(phi1, (1. - mu * I), s->s3);
     _vector_assign(r->s3, phi1);
   }
 
@@ -696,6 +670,7 @@ void mul_one_plus_imubar(spinor * const l, spinor * const k){
   return;
 }
 
+
 /*  calculates P(Q Q^dagger) for the nondegenerate case */
 
 void P_ndpsi(spinor * const l_strange, spinor * const l_charm,

From 73ea5fc43c624b1e2c22e626ef674cc48c9d22d4 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 5 Oct 2012 11:50:52 +0200
Subject: [PATCH 025/110] removed all _bi linalg functions as they are
 superfluous

---
 linalg/Makefile.in                         |   5 +-
 linalg/assign_add_mul_add_mul_bi.c         | 102 -------------
 linalg/assign_add_mul_add_mul_bi.h         |  29 ----
 linalg/assign_add_mul_r_bi.c               | 149 -------------------
 linalg/assign_add_mul_r_bi.h               |  27 ----
 linalg/assign_bi.c                         |  97 ------------
 linalg/assign_bi.h                         |  28 ----
 linalg/assign_diff_mul_bi.c                |  99 -------------
 linalg/assign_diff_mul_bi.h                |  27 ----
 linalg/assign_mul_add_r_bi.c               | 162 ---------------------
 linalg/assign_mul_add_r_bi.h               |  27 ----
 linalg/assign_mul_bra_add_mul_ket_add_bi.c | 103 -------------
 linalg/assign_mul_bra_add_mul_ket_add_bi.h |  28 ----
 linalg/diff_bi.c                           | 100 -------------
 linalg/diff_bi.h                           |  29 ----
 linalg/mul_r_bi.c                          |  96 ------------
 linalg/mul_r_bi.h                          |  28 ----
 linalg/scalar_prod_bi.c                    |  86 -----------
 linalg/scalar_prod_bi.h                    |  27 ----
 linalg/scalar_prod_r_bi.c                  |  90 ------------
 linalg/scalar_prod_r_bi.h                  |  28 ----
 linalg/square_norm_bi.c                    |  98 -------------
 linalg/square_norm_bi.h                    |  34 -----
 solver/Makefile.in                         |   2 +-
 solver/bicgstab_complex_bi.c               |  32 ++--
 solver/cg_her_bi.c                         |  45 +++---
 solver/gram-schmidt_bi.c                   | 102 -------------
 solver/gram-schmidt_bi.h                   |  28 ----
 solver/jdher_bi.c                          |  23 +--
 tm_operators_nd.c                          |   7 +-
 30 files changed, 51 insertions(+), 1687 deletions(-)
 delete mode 100644 linalg/assign_add_mul_add_mul_bi.c
 delete mode 100644 linalg/assign_add_mul_add_mul_bi.h
 delete mode 100644 linalg/assign_add_mul_r_bi.c
 delete mode 100644 linalg/assign_add_mul_r_bi.h
 delete mode 100644 linalg/assign_bi.c
 delete mode 100644 linalg/assign_bi.h
 delete mode 100644 linalg/assign_diff_mul_bi.c
 delete mode 100644 linalg/assign_diff_mul_bi.h
 delete mode 100644 linalg/assign_mul_add_r_bi.c
 delete mode 100644 linalg/assign_mul_add_r_bi.h
 delete mode 100644 linalg/assign_mul_bra_add_mul_ket_add_bi.c
 delete mode 100644 linalg/assign_mul_bra_add_mul_ket_add_bi.h
 delete mode 100644 linalg/diff_bi.c
 delete mode 100644 linalg/diff_bi.h
 delete mode 100644 linalg/mul_r_bi.c
 delete mode 100644 linalg/mul_r_bi.h
 delete mode 100644 linalg/scalar_prod_bi.c
 delete mode 100644 linalg/scalar_prod_bi.h
 delete mode 100644 linalg/scalar_prod_r_bi.c
 delete mode 100644 linalg/scalar_prod_r_bi.h
 delete mode 100644 linalg/square_norm_bi.c
 delete mode 100644 linalg/square_norm_bi.h
 delete mode 100644 solver/gram-schmidt_bi.c
 delete mode 100644 solver/gram-schmidt_bi.h

diff --git a/linalg/Makefile.in b/linalg/Makefile.in
index 3422732ec..2385cf7da 100644
--- a/linalg/Makefile.in
+++ b/linalg/Makefile.in
@@ -39,10 +39,7 @@ liblinalg_TARGETS = assign_add_mul_r_add_mul \
 	assign_diff_mul mul_add_mul mul assign_add_mul_add_mul \
 	assign_mul_bra_add_mul_ket_add assign_mul_add_mul_add_mul_add_mul_r \
 	mul_diff_mul_r assign_add_mul_add_mul_r \
-        comp_decomp square_norm_bi assign_bi scalar_prod_bi diff_bi \
-        assign_diff_mul_bi assign_add_mul_add_mul_bi \
-        assign_mul_bra_add_mul_ket_add_bi mul_r_bi \
-        scalar_prod_r_bi assign_add_mul_r_bi assign_mul_add_r_bi \
+        comp_decomp \
 	convert_eo_to_lexic assign_mul_add_mul_r mul_add_mul_r \
 	assign_mul_add_mul_add_mul_r mattimesvec \
 	scalar_prod_su3spinor \
diff --git a/linalg/assign_add_mul_add_mul_bi.c b/linalg/assign_add_mul_add_mul_bi.c
deleted file mode 100644
index b7e84ba30..000000000
--- a/linalg/assign_add_mul_add_mul_bi.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-/*******************************************************************************
- *
- * File assign_add_mul_add_mul.c
- *
- *   void assign_add_mul_add_mul(spinor * const R,spinor * const S,spinor * const U,const complex c1,const complex c2)
- *     (*R) = (*R) + c1 * (*S) + c2 * (*U) with c1 and c2 complex variables
- *
- * Author: Thomas Chiarappa
- *         Thomas.Chiarappa@mib.infn.it
- * 
- *******************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#ifdef OMP
-# include <omp.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include "su3.h"
-#include "assign_add_mul_add_mul_bi.h"
-
-
-/* S,U input, R inoutput, c1,c2 input */
-void assign_add_mul_add_mul_bi(bispinor * const R, bispinor * const S, bispinor * const U, const _Complex double c1, const _Complex double c2, const int N){
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-
-  spinor *r,*s,*u;
-
-#ifdef OMP
-#pragma omp for
-#endif
-  for (int ix = 0; ix < N; ++ix)
-  {
-    r=(spinor *) &R[ix].sp_up;
-    s=(spinor *) &S[ix].sp_up;
-    u=(spinor *) &U[ix].sp_up;
-    
-    r->s0.c0 += c1 * s->s0.c0 + c2 * u->s0.c0;
-    r->s0.c1 += c1 * s->s0.c1 + c2 * u->s0.c1;
-    r->s0.c2 += c1 * s->s0.c2 + c2 * u->s0.c2;
-
-    r->s1.c0 += c1 * s->s1.c0 + c2 * u->s1.c0;
-    r->s1.c1 += c1 * s->s1.c1 + c2 * u->s1.c1;
-    r->s1.c2 += c1 * s->s1.c2 + c2 * u->s1.c2;
-
-    r->s2.c0 += c1 * s->s2.c0 + c2 * u->s2.c0;
-    r->s2.c1 += c1 * s->s2.c1 + c2 * u->s2.c1;
-    r->s2.c2 += c1 * s->s2.c2 + c2 * u->s2.c2;
-
-    r->s3.c0 += c1 * s->s3.c0 + c2 * u->s3.c0;
-    r->s3.c1 += c1 * s->s3.c1 + c2 * u->s3.c1;
-    r->s3.c2 += c1 * s->s3.c2 + c2 * u->s3.c2;
-
-    r=(spinor *) &R[ix].sp_dn;
-    s=(spinor *) &S[ix].sp_dn;
-    u=(spinor *) &U[ix].sp_dn;
-    
-    r->s0.c0 += c1 * s->s0.c0 + c2 * u->s0.c0;
-    r->s0.c1 += c1 * s->s0.c1 + c2 * u->s0.c1;
-    r->s0.c2 += c1 * s->s0.c2 + c2 * u->s0.c2;
-
-    r->s1.c0 += c1 * s->s1.c0 + c2 * u->s1.c0;
-    r->s1.c1 += c1 * s->s1.c1 + c2 * u->s1.c1;
-    r->s1.c2 += c1 * s->s1.c2 + c2 * u->s1.c2;
-
-    r->s2.c0 += c1 * s->s2.c0 + c2 * u->s2.c0;
-    r->s2.c1 += c1 * s->s2.c1 + c2 * u->s2.c1;
-    r->s2.c2 += c1 * s->s2.c2 + c2 * u->s2.c2;
-
-    r->s3.c0 += c1 * s->s3.c0 + c2 * u->s3.c0;
-    r->s3.c1 += c1 * s->s3.c1 + c2 * u->s3.c1;
-    r->s3.c2 += c1 * s->s3.c2 + c2 * u->s3.c2;
-  }
-
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-}
diff --git a/linalg/assign_add_mul_add_mul_bi.h b/linalg/assign_add_mul_add_mul_bi.h
deleted file mode 100644
index 3ae06fdf1..000000000
--- a/linalg/assign_add_mul_add_mul_bi.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef _ASSIGN_ADD_MUL_ADD_MUL_BI_H
-#define _ASSIGN_ADD_MUL_ADD_MUL_BI_H
-
-#include "su3.h"
-
-/* (*R) = (*R) + c1*(*S) + c2*(*U) */
-void assign_add_mul_add_mul_bi(bispinor * const R, bispinor * const S, bispinor * const U, const _Complex double c1, const _Complex double c2, const int N);
-
-
-#endif
diff --git a/linalg/assign_add_mul_r_bi.c b/linalg/assign_add_mul_r_bi.c
deleted file mode 100644
index 9a6e0d157..000000000
--- a/linalg/assign_add_mul_r_bi.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- *
- *      Adpated routine evaluating the P=P+c*Q where P,Q are bispinors
- *
- * Author: Thomas Chiarappa
- *         Thomas.Chiarappa@mib.infn.it
- *
- ************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#ifdef MPI
-#include <mpi.h>
-#endif
-#ifdef OMP
-# include <omp.h>
-#endif
-#include "su3.h"
-#include "sse.h"
-#include "assign_add_mul_r_bi.h"
-
-
-#if ( defined SSE2 || defined SSE3 )
-/*  k input, l output */
-void assign_add_mul_r_bi(bispinor * const P, bispinor * const Q, const double c, const int N) {
-  
-  int ix;
-  su3_vector *s,*r;
-  __asm__ __volatile__ ("movsd %0, %%xmm7 \n\t"
-			"unpcklpd %%xmm7, %%xmm7"
-			:
-			:
-			"m" (c));
-  s=(su3_vector *) &P[0].sp_up.s0;
-  r=(su3_vector *) &Q[0].sp_up.s0;
-/*  for (ix = 0;ix < 4*N; ix++) { */
-  for (ix = 0;ix < 2*4*N; ix++) {
-    _sse_load_up(*r);
-    __asm__ __volatile__ ("mulpd %%xmm7, %%xmm3 \n\t"
-			  "mulpd %%xmm7, %%xmm4 \n\t"
-			  "mulpd %%xmm7, %%xmm5"
-			  :
-			  :);
-    _sse_load(*s);
-    __asm__ __volatile__ ("addpd %%xmm3, %%xmm0 \n\t"
-			  "addpd %%xmm4, %%xmm1 \n\t"
-			  "addpd %%xmm5, %%xmm2"
-			  :
-			  :);
-    _sse_store(*s);
-    s++; r++;
-  }
-
-}
-
-#else
-/*  k input, l output */
-void assign_add_mul_r_bi(bispinor * const P, bispinor * const Q, const double c, const int N)
-{
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-  spinor *r,*s;
-
-  /* Change due to even-odd preconditioning : VOLUME   to VOLUME/2 */   
-#ifdef OMP
-#pragma omp for
-#endif
-  for (int ix = 0; ix < N; ix++)
-  {
-    r=(spinor *) &P[ix].sp_up;
-    s=(spinor *) &Q[ix].sp_up;
-    
-    r->s0.c0 += c * s->s0.c0;
-    r->s0.c1 += c * s->s0.c1;
-    r->s0.c2 += c * s->s0.c2;
-    
-    r->s1.c0 += c * s->s1.c0;
-    r->s1.c1 += c * s->s1.c1;
-    r->s1.c2 += c * s->s1.c2;
-    
-    r->s2.c0 += c * s->s2.c0;
-    r->s2.c1 += c * s->s2.c1;
-    r->s2.c2 += c * s->s2.c2;       
-    
-    r->s3.c0 += c * s->s3.c0;
-    r->s3.c1 += c * s->s3.c1;
-    r->s3.c2 += c * s->s3.c2;
-
-    r=(spinor *) &P[ix].sp_dn;
-    s=(spinor *) &Q[ix].sp_dn;
-    
-    r->s0.c0 += c * s->s0.c0;
-    r->s0.c1 += c * s->s0.c1;
-    r->s0.c2 += c * s->s0.c2;
-    
-    r->s1.c0 += c * s->s1.c0;
-    r->s1.c1 += c * s->s1.c1; 
-    r->s1.c2 += c * s->s1.c2;
-    
-    r->s2.c0 += c * s->s2.c0;
-    r->s2.c1 += c * s->s2.c1;
-    r->s2.c2 += c * s->s2.c2;       
-    
-    r->s3.c0 += c * s->s3.c0;
-    r->s3.c1 += c * s->s3.c1;
-    r->s3.c2 += c * s->s3.c2;
-  }
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-}
-#endif
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/linalg/assign_add_mul_r_bi.h b/linalg/assign_add_mul_r_bi.h
deleted file mode 100644
index b4e636b82..000000000
--- a/linalg/assign_add_mul_r_bi.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef _ASSIGN_ADD_MUL_R_BI_H
-#define _ASSIGN_ADD_MUL_R_BI_H
-
-#include "su3.h"
-
-void assign_add_mul_r_bi(bispinor * const P, bispinor * const Q, const double c, const int N);
-
-#endif
diff --git a/linalg/assign_bi.c b/linalg/assign_bi.c
deleted file mode 100644
index bff7bf9db..000000000
--- a/linalg/assign_bi.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-/*******************************************************************************
- *
- * File assign_bi.c
- *
- *   void assign_bi(bispinor * const R, bispinor * const S)
- *     Assign (*R) = (*S)
- *
- * Author: Thomas Chiarappa
- *         Thomas.Chiarappa@mib.infn.it
- *
- *******************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#ifdef OMP
-#include <omp.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include "su3.h"
-#include "assign_bi.h"
-
-/* S input, R output */
-void assign_bi(bispinor * const R, bispinor * const S, const int N){
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-  spinor *r,*s;
-  
-#ifdef OMP
-#pragma omp for
-#endif
-  for (int ix = 0; ix < N; ++ix)
-  {
-    r=(spinor *) &R[ix].sp_up;
-    s=(spinor *) &S[ix].sp_up;
-   
-    r->s0.c0 = s->s0.c0;
-    r->s0.c1 = s->s0.c1;
-    r->s0.c2 = s->s0.c2;
-    
-    r->s1.c0 = s->s1.c0;
-    r->s1.c1 = s->s1.c1;
-    r->s1.c2 = s->s1.c2;
-    
-    r->s2.c0 = s->s2.c0;
-    r->s2.c1 = s->s2.c1;
-    r->s2.c2 = s->s2.c2;
-    
-    r->s3.c0 = s->s3.c0;
-    r->s3.c1 = s->s3.c1;
-    r->s3.c2 = s->s3.c2;
-
-    r=(spinor *) &R[ix].sp_dn;
-    s=(spinor *) &S[ix].sp_dn;
-   
-    r->s0.c0 = s->s0.c0;
-    r->s0.c1 = s->s0.c1;
-    r->s0.c2 = s->s0.c2;
-    
-    r->s1.c0 = s->s1.c0;
-    r->s1.c1 = s->s1.c1;
-    r->s1.c2 = s->s1.c2;
-    
-    r->s2.c0 = s->s2.c0;
-    r->s2.c1 = s->s2.c1;
-    r->s2.c2 = s->s2.c2;
-    
-    r->s3.c0 = s->s3.c0;
-    r->s3.c1 = s->s3.c1;
-    r->s3.c2 = s->s3.c2;
-  }
-#ifdef OMP
- }  /* OpenMP closing brace */
-#endif
-}
diff --git a/linalg/assign_bi.h b/linalg/assign_bi.h
deleted file mode 100644
index 97942528f..000000000
--- a/linalg/assign_bi.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef _ASSIGN_BI_H
-#define _ASSIGN_BI_H
-
-#include "su3.h"
-
-/* Assign (*R) = (*S) */
-void assign_bi(bispinor * const R, bispinor * const S, const int N);
-
-#endif
diff --git a/linalg/assign_diff_mul_bi.c b/linalg/assign_diff_mul_bi.c
deleted file mode 100644
index d9839fbc3..000000000
--- a/linalg/assign_diff_mul_bi.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is e softw: you candistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the e Softw Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for m details.
- * 
- * You should haveceived a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-/************************************************************************
- * 
- *      Adapted routine evaluating the S=S-c*Q wh S,Q  bispinors
- * 
- * Author: Thomas Chiarappa
- *         Thomas.Chiarappa@mib.infn.it
- *
- ************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#ifdef OMP
-# include <omp.h>
-#endif
-#include <stdlib.h>
-#include "su3.h"
-#include "assign_diff_mul_bi.h"
-
-
-/* S=S-c*Q */
-void assign_diff_mul_bi(bispinor * const S, bispinor * const R, const _Complex double c, const int N)
-{
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-  spinor *r, *s;
-
-#ifdef OMP
-#pragma omp for
-#endif
-  for (int ix = 0; ix < N; ++ix)
-  {
-    s = (spinor *) &S[ix].sp_up;
-    r = (spinor *) &R[ix].sp_up;
-
-    s->s0.c0 -= c * r->s0.c0;
-    s->s0.c1 -= c * r->s0.c1;
-    s->s0.c2 -= c * r->s0.c2;
-
-    s->s1.c0 -= c * r->s1.c0;
-    s->s1.c1 -= c * r->s1.c1;
-    s->s1.c2 -= c * r->s1.c2;
-
-    s->s2.c0 -= c * r->s2.c0;
-    s->s2.c1 -= c * r->s2.c1;
-    s->s2.c2 -= c * r->s2.c2;
-     
-    s->s3.c0 -= c * r->s3.c0;
-    s->s3.c1 -= c * r->s3.c1;
-    s->s3.c2 -= c * r->s3.c2;
-
-
-    s = (spinor *) &S[ix].sp_dn;
-    r = (spinor *) &R[ix].sp_dn;
-
-    s->s0.c0 -= c * r->s0.c0;
-    s->s0.c1 -= c * r->s0.c1;
-    s->s0.c2 -= c * r->s0.c2;
-
-    s->s1.c0 -= c * r->s1.c0;
-    s->s1.c1 -= c * r->s1.c1;
-    s->s1.c2 -= c * r->s1.c2;
-
-    s->s2.c0 -= c * r->s2.c0;
-    s->s2.c1 -= c * r->s2.c1;
-    s->s2.c2 -= c * r->s2.c2;
-     
-    s->s3.c0 -= c * r->s3.c0;
-    s->s3.c1 -= c * r->s3.c1;
-    s->s3.c2 -= c * r->s3.c2;
-  }
-
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-}
-
-
diff --git a/linalg/assign_diff_mul_bi.h b/linalg/assign_diff_mul_bi.h
deleted file mode 100644
index 76edbea4a..000000000
--- a/linalg/assign_diff_mul_bi.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef _ASSIGN_DIFF_MUL_BI_H
-#define _ASSIGN_DIFF_MUL_BI_H
-
-#include "su3.h"
-
-void assign_diff_mul_bi(bispinor * const S, bispinor * const R, const _Complex double c, const int N);
-
-#endif
diff --git a/linalg/assign_mul_add_r_bi.c b/linalg/assign_mul_add_r_bi.c
deleted file mode 100644
index 5b514a01c..000000000
--- a/linalg/assign_mul_add_r_bi.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-/************************************************************************
- *
- *      Adpated routine evaluating the P=c*P+Q where P,Q are bispinors
- *
- * Author: Thomas Chiarappa
- *         Thomas.Chiarappa@mib.infn.it
- *
- ************************************************************************/
-
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#ifdef OMP
-# include <omp.h>
-#endif
-#include <stdlib.h>
-#include "su3.h"
-#include "sse.h"
-#include "assign_mul_add_r_bi.h"
-
-
-#if ( defined SSE2 || defined SSE3 )
-/* k input , l output*/
-void assign_mul_add_r_bi(bispinor * const S, const double c, bispinor * const R, const int N) {
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-
-  int ix;
-  su3_vector *s,*r;
-  /*
-  su3_vector *t,*u;
-  */
-
-  __asm__ __volatile__ ("movsd %0, %%xmm7 \n\t"
-			"unpcklpd %%xmm7, %%xmm7"
-			:
-			:
-			"m" (c));
-  
-#ifndef OMP
-  s=(su3_vector *) &S[0].sp_up.s0;
-  r=(su3_vector *) &R[0].sp_up.s0;
-#endif
-
-/*  for (ix=0;ix<4*N;ix++) { */
-#ifdef OMP
-#pragma omp for
-#endif
-  for (ix=0;ix<2*4*N;ix++) {
-#ifdef OMP
-    s=((su3_vector *) &S[0].sp_up.s0) +  ix;
-    r=((su3_vector *) &R[0].sp_up.s0)  + ix;
-#endif
-
-    _sse_load(*s);
-    __asm__ __volatile__ ("mulpd %%xmm7, %%xmm0 \n\t"
-			  "mulpd %%xmm7, %%xmm1 \n\t"
-			  "mulpd %%xmm7, %%xmm2"
-			  :
-			  :);
-    _sse_load_up(*r);
-    __asm__ __volatile__ ("addpd %%xmm3, %%xmm0 \n\t"
-			  "addpd %%xmm4, %%xmm1 \n\t"
-			  "addpd %%xmm5, %%xmm2"
-			  :
-			  :);
-    _sse_store(*s);
-#ifndef OMP
-    s++; r++;
-#endif
-  }
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-}
-
-#else
-/* k input , l output*/
-void assign_mul_add_r_bi(bispinor * const R, const double c, bispinor * const S, const int N)
-{
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-
-  spinor *r,*s;
-  
-  /* Change due to even-odd preconditioning : VOLUME   to VOLUME/2 */   
-#ifdef OMP
-#pragma omp for
-#endif
-  for (int ix = 0; ix < N; ++ix)
-  {
-    r = (spinor *) &R[ix].sp_up;
-    s = (spinor *) &S[ix].sp_up;
-    
-    r->s0.c0 = c * r->s0.c0 + s->s0.c0;
-    r->s0.c1 = c * r->s0.c1 + s->s0.c1;
-    r->s0.c2 = c * r->s0.c2 + s->s0.c2;    
-
-    r->s1.c0 = c * r->s1.c0 + s->s1.c0;
-    r->s1.c1 = c * r->s1.c1 + s->s1.c1;
-    r->s1.c2 = c * r->s1.c2 + s->s1.c2;    
-
-    r->s2.c0 = c * r->s2.c0 + s->s2.c0;
-    r->s2.c1 = c * r->s2.c1 + s->s2.c1;
-    r->s2.c2 = c * r->s2.c2 + s->s2.c2;    
-
-    r->s3.c0 = c * r->s3.c0 + s->s3.c0;
-    r->s3.c1 = c * r->s3.c1 + s->s3.c1;
-    r->s3.c2 = c * r->s3.c2 + s->s3.c2;    
-
-    r = (spinor *) &R[ix].sp_dn;
-    s = (spinor *) &S[ix].sp_dn;
-    
-    r->s0.c0 = c * r->s0.c0 + s->s0.c0;
-    r->s0.c1 = c * r->s0.c1 + s->s0.c1;
-    r->s0.c2 = c * r->s0.c2 + s->s0.c2;    
-
-    r->s1.c0 = c * r->s1.c0 + s->s1.c0;
-    r->s1.c1 = c * r->s1.c1 + s->s1.c1;
-    r->s1.c2 = c * r->s1.c2 + s->s1.c2;    
-
-    r->s2.c0 = c * r->s2.c0 + s->s2.c0;
-    r->s2.c1 = c * r->s2.c1 + s->s2.c1;
-    r->s2.c2 = c * r->s2.c2 + s->s2.c2;    
-
-    r->s3.c0 = c * r->s3.c0 + s->s3.c0;
-    r->s3.c1 = c * r->s3.c1 + s->s3.c1;
-    r->s3.c2 = c * r->s3.c2 + s->s3.c2;    
-  }
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-}
-
-#endif
-
-
-
diff --git a/linalg/assign_mul_add_r_bi.h b/linalg/assign_mul_add_r_bi.h
deleted file mode 100644
index 922423dff..000000000
--- a/linalg/assign_mul_add_r_bi.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef ASSIGN_MUL_ADD_R_BI_H
-#define ASSIGN_MUL_ADD_R_BI_H
-
-#include "su3.h"
-
-void assign_mul_add_r_bi(bispinor * const S, const double c, bispinor * const R, const int N);
-
-#endif
diff --git a/linalg/assign_mul_bra_add_mul_ket_add_bi.c b/linalg/assign_mul_bra_add_mul_ket_add_bi.c
deleted file mode 100644
index 0b1a35c87..000000000
--- a/linalg/assign_mul_bra_add_mul_ket_add_bi.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- *
- * File assign_mul_bra_add_mul_ket_add.c
- *
- *   void assign_mul_bra_add_mul_ket_add
- *   (spinor * const R,spinor * const S,spinor * const U,const double c1,const double c2)
- *     (*R) = c2*(*R + c1*(*S)) + (*U)  with c1 and c2 complex variables
- *
- * Author: Thomas Chiarappa
- *         Thomas.Chiarappa@mib.infn.it
- *
- *******************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#ifdef OMP
-# include <omp.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include "su3.h"
-#include "sse.h"
-#include "assign_mul_bra_add_mul_ket_add_bi.h"
-
-/* R inoutput, S input, U input, c1 input, c2 input */
-void assign_mul_bra_add_mul_ket_add_bi(bispinor * const R, bispinor * const S, bispinor * const U, const _Complex double c1, const _Complex double c2, const int N)
-{ 
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-
-  spinor *r,*s,*u;
-
-#ifdef OMP
-#pragma omp for
-#endif
-  for (int ix = 0; ix < N; ++ix)
-  {
-    r=(spinor *) &R[ix].sp_up;
-    s=(spinor *) &S[ix].sp_up;
-    u=(spinor *) &U[ix].sp_up;
-    
-    r->s0.c0 = u->s0.c0 + c2 * (r->s0.c0 + c1 * s->s0.c0);
-    r->s0.c1 = u->s0.c1 + c2 * (r->s0.c1 + c1 * s->s0.c1);
-    r->s0.c2 = u->s0.c2 + c2 * (r->s0.c2 + c1 * s->s0.c2);
-    
-    r->s1.c0 = u->s1.c0 + c2 * (r->s1.c0 + c1 * s->s1.c0);
-    r->s1.c1 = u->s1.c1 + c2 * (r->s1.c1 + c1 * s->s1.c1);
-    r->s1.c2 = u->s1.c2 + c2 * (r->s1.c2 + c1 * s->s1.c2);
-
-    r->s2.c0 = u->s2.c0 + c2 * (r->s2.c0 + c1 * s->s2.c0);
-    r->s2.c1 = u->s2.c1 + c2 * (r->s2.c1 + c1 * s->s2.c1);
-    r->s2.c2 = u->s2.c2 + c2 * (r->s2.c2 + c1 * s->s2.c2);
-
-    r->s3.c0 = u->s3.c0 + c2 * (r->s3.c0 + c1 * s->s3.c0);
-    r->s3.c1 = u->s3.c1 + c2 * (r->s3.c1 + c1 * s->s3.c1);
-    r->s3.c2 = u->s3.c2 + c2 * (r->s3.c2 + c1 * s->s3.c2);
-
-    r=(spinor *) &R[ix].sp_dn;
-    s=(spinor *) &S[ix].sp_dn;
-    u=(spinor *) &U[ix].sp_dn;
-
-    r->s0.c0 = u->s0.c0 + c2 * (r->s0.c0 + c1 * s->s0.c0);
-    r->s0.c1 = u->s0.c1 + c2 * (r->s0.c1 + c1 * s->s0.c1);
-    r->s0.c2 = u->s0.c2 + c2 * (r->s0.c2 + c1 * s->s0.c2);
-    
-    r->s1.c0 = u->s1.c0 + c2 * (r->s1.c0 + c1 * s->s1.c0);
-    r->s1.c1 = u->s1.c1 + c2 * (r->s1.c1 + c1 * s->s1.c1);
-    r->s1.c2 = u->s1.c2 + c2 * (r->s1.c2 + c1 * s->s1.c2);
-
-    r->s2.c0 = u->s2.c0 + c2 * (r->s2.c0 + c1 * s->s2.c0);
-    r->s2.c1 = u->s2.c1 + c2 * (r->s2.c1 + c1 * s->s2.c1);
-    r->s2.c2 = u->s2.c2 + c2 * (r->s2.c2 + c1 * s->s2.c2);
-
-    r->s3.c0 = u->s3.c0 + c2 * (r->s3.c0 + c1 * s->s3.c0);
-    r->s3.c1 = u->s3.c1 + c2 * (r->s3.c1 + c1 * s->s3.c1);
-    r->s3.c2 = u->s3.c2 + c2 * (r->s3.c2 + c1 * s->s3.c2);
-  }
-
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-  
-}
diff --git a/linalg/assign_mul_bra_add_mul_ket_add_bi.h b/linalg/assign_mul_bra_add_mul_ket_add_bi.h
deleted file mode 100644
index 3be4373b0..000000000
--- a/linalg/assign_mul_bra_add_mul_ket_add_bi.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef _ASSIGN_MUL_BRA_ADD_MUL_KET_ADD_BI_H
-#define _ASSIGN_MUL_BRA_ADD_MUL_KET_ADD_BI_H
-
-#include "su3.h"
-
-/* (*R) =  c2*(*R + c1*(*S)) + (*U) */
-void assign_mul_bra_add_mul_ket_add_bi(bispinor * const R, bispinor * const S, bispinor * const U, const _Complex double c1, const _Complex double c2, const int N);
-
-#endif
diff --git a/linalg/diff_bi.c b/linalg/diff_bi.c
deleted file mode 100644
index b0ad41476..000000000
--- a/linalg/diff_bi.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-/*******************************************************************************
- *
- *   void diff(spinor * const Q,spinor * const R,spinor * const S)
- *     Makes the difference (*Q) = (*R) - (*S)
- * 
- * Author: Thomas Chiarappa
- *         Thomas.Chiarappa@mib.infn.it
- * 
- *******************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#ifdef OMP
-# include <omp.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include "su3.h"
-#include "diff_bi.h"
-
-void diff_bi(bispinor * const Q, bispinor * const R, bispinor * const S, const int N){
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-
-   int ix;
-   spinor *q,*r,*s;
-
-/* Change due to even-odd preconditioning : VOLUME   to VOLUME/2 */   
-#ifdef OMP
-#pragma omp for
-#endif
-   for (ix = 0; ix < N; ix++) {
-
-     q = (spinor *) &Q[ix].sp_up;
-     r = (spinor *) &R[ix].sp_up;
-     s = (spinor *) &S[ix].sp_up;
-     
-     q->s0.c0 = r->s0.c0 - s->s0.c0;
-     q->s0.c1 = r->s0.c1 - s->s0.c1;
-     q->s0.c2 = r->s0.c2 - s->s0.c2;
-     
-     q->s1.c0 = r->s1.c0 - s->s1.c0;
-     q->s1.c1 = r->s1.c1 - s->s1.c1;
-     q->s1.c2 = r->s1.c2 - s->s1.c2;
-     
-     q->s2.c0 = r->s2.c0 - s->s2.c0;
-     q->s2.c1 = r->s2.c1 - s->s2.c1;
-     q->s2.c2 = r->s2.c2 - s->s2.c2;
-     
-     q->s3.c0 = r->s3.c0 - s->s3.c0;
-     q->s3.c1 = r->s3.c1 - s->s3.c1;
-     q->s3.c2 = r->s3.c2 - s->s3.c2;
-
-     q = (spinor *) &Q[ix].sp_dn;
-     r = (spinor *) &R[ix].sp_dn;
-     s = (spinor *) &S[ix].sp_dn;
-     
-     q->s0.c0 = r->s0.c0 - s->s0.c0;
-     q->s0.c1 = r->s0.c1 - s->s0.c1;
-     q->s0.c2 = r->s0.c2 - s->s0.c2;
-     
-     q->s1.c0 = r->s1.c0 - s->s1.c0;
-     q->s1.c1 = r->s1.c1 - s->s1.c1;
-     q->s1.c2 = r->s1.c2 - s->s1.c2;
-     
-     q->s2.c0 = r->s2.c0 - s->s2.c0;
-     q->s2.c1 = r->s2.c1 - s->s2.c1;
-     q->s2.c2 = r->s2.c2 - s->s2.c2;
-     
-     q->s3.c0 = r->s3.c0 - s->s3.c0;
-     q->s3.c1 = r->s3.c1 - s->s3.c1;
-     q->s3.c2 = r->s3.c2 - s->s3.c2;
-   }
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-}
-
diff --git a/linalg/diff_bi.h b/linalg/diff_bi.h
deleted file mode 100644
index 3f94c70d7..000000000
--- a/linalg/diff_bi.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef _DIFF_BI_H
-#define _DIFF_BI_H
-
-#include "su3.h"
-
-/* Makes the difference (*Q) = (*R) - (*S) */
-void diff_bi(bispinor * const Q, bispinor * const R, bispinor * const S, const int N);
-
-
-#endif
diff --git a/linalg/mul_r_bi.c b/linalg/mul_r_bi.c
deleted file mode 100644
index 070358661..000000000
--- a/linalg/mul_r_bi.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-/*******************************************************************************
- *
- * File mul_r.c
- *
- *   void mul_r(spinor * const R, const double c, spinor * const S){
- *     Makes (*R)  =  c*(*S)        c is a real constant
- *       
- *******************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#ifdef OMP
-# include <omp.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include "su3.h"
-#include "mul_r.h"
-#include "linalg_eo.h"
-
-void mul_r_bi(bispinor * const R, const double cup, const double cdn, bispinor * const S, const int N)
-{
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-
-  spinor *r,*s;
-  
-#ifdef OMP
-#pragma omp for
-#endif
-  for (int ix  =  0; ix < N; ++ix)
-  {
-    r = (spinor *) &R[ix].sp_up;
-    s = (spinor *) &S[ix].sp_up;
-    
-    r->s0.c0 = cup*s->s0.c0;
-    r->s0.c1 = cup*s->s0.c1;
-    r->s0.c2 = cup*s->s0.c2;
-    
-    r->s1.c0 = cup*s->s1.c0;
-    r->s1.c1 = cup*s->s1.c1;
-    r->s1.c2 = cup*s->s1.c2;
-    
-    r->s2.c0 = cup*s->s2.c0;
-    r->s2.c1 = cup*s->s2.c1;
-    r->s2.c2 = cup*s->s2.c2;
-    
-    r->s3.c0 = cup*s->s3.c0;
-    r->s3.c1 = cup*s->s3.c1;
-    r->s3.c2 = cup*s->s3.c2;
-
-    r = (spinor *) &R[ix].sp_dn;
-    s = (spinor *) &S[ix].sp_dn;
-    
-    r->s0.c0 = cdn*s->s0.c0;
-    r->s0.c1 = cdn*s->s0.c1;
-    r->s0.c2 = cdn*s->s0.c2;
-    
-    r->s1.c0 = cdn*s->s1.c0;
-    r->s1.c1 = cdn*s->s1.c1;
-    r->s1.c2 = cdn*s->s1.c2;
-    
-    r->s2.c0 = cdn*s->s2.c0;
-    r->s2.c1 = cdn*s->s2.c1;
-    r->s2.c2 = cdn*s->s2.c2;
-    
-    r->s3.c0 = cdn*s->s3.c0;
-    r->s3.c1 = cdn*s->s3.c1;
-    r->s3.c2 = cdn*s->s3.c2;
-  }
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-}
diff --git a/linalg/mul_r_bi.h b/linalg/mul_r_bi.h
deleted file mode 100644
index 58b245ebf..000000000
--- a/linalg/mul_r_bi.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef _MUL_R_BI_H
-#define _MUL_R_BI_H
-
-#include "su3.h"
-
-/*   Makes (*R) = c*(*S)   c is a real constant*/
-void mul_r_bi(bispinor * const R, const double cup, const double cdn, bispinor * const S, const int N);
-
-#endif
diff --git a/linalg/scalar_prod_bi.c b/linalg/scalar_prod_bi.c
deleted file mode 100644
index 9d2aca4b8..000000000
--- a/linalg/scalar_prod_bi.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-/****************************************************************************
- *
- *     Scalar product routine adapted for the bispinor case
- *
- * Author: Thomas Chiarappa
- *         Thomas.Chiarappa@mib.infn.it
- *
- ***************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#include <stdlib.h>
-#ifdef MPI
-#include <mpi.h>
-#endif
-#include "su3.h"
-#include "scalar_prod_bi.h"
-
-
-/*  <S,R>=S^* times R */
-_Complex double scalar_prod_bi(bispinor * const S, bispinor * const R, const int N){
-
-  _Complex double ks,kc,ds,tr,ts,tt;
-  spinor *s,*r, *t, *u;
-  _Complex double c = 0.0;
-  
-  ks=0.0;
-  kc=0.0;
-  
-  for (int ix = 0; ix < N; ++ix)
-  {
-
-    s=(spinor *) &S[ix].sp_up;
-    r=(spinor *) &R[ix].sp_up;
-    t=(spinor *) &S[ix].sp_dn;
-    u=(spinor *) &R[ix].sp_dn;
-
-    
-    ds = conj(s->s0.c0) * r->s0.c0 + conj(s->s0.c1) * r->s0.c1 + conj(s->s0.c2) * r->s0.c2 +
-         conj(s->s1.c0) * r->s1.c0 + conj(s->s1.c1) * r->s1.c1 + conj(s->s1.c2) * r->s1.c2 +
-         conj(s->s2.c0) * r->s2.c0 + conj(s->s2.c1) * r->s2.c1 + conj(s->s2.c2) * r->s2.c2 +
-         conj(s->s3.c0) * r->s3.c0 + conj(s->s3.c1) * r->s3.c1 + conj(s->s3.c2) * r->s3.c2 +
-         conj(t->s0.c0) * u->s0.c0 + conj(t->s0.c1) * u->s0.c1 + conj(t->s0.c2) * u->s0.c2 +
-         conj(t->s1.c0) * u->s1.c0 + conj(t->s1.c1) * u->s1.c1 + conj(t->s1.c2) * u->s1.c2 +
-         conj(t->s2.c0) * u->s2.c0 + conj(t->s2.c1) * u->s2.c1 + conj(t->s2.c2) * u->s2.c2 +
-         conj(t->s3.c0) * u->s3.c0 + conj(t->s3.c1) * u->s3.c1 + conj(t->s3.c2) * u->s3.c2;
-
-    /* Kahan Summation */    
-    tr = ds+kc;
-    ts = tr+ks;
-    tt = ts-ks;
-    ks = ts;
-    kc = tr-tt;
-  }
-  kc = ks + kc;
-
-#if defined MPI
-  MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD);
-  kc = ks;
-#endif
-
-  c += kc;
-
-  return(c);
-
-}
diff --git a/linalg/scalar_prod_bi.h b/linalg/scalar_prod_bi.h
deleted file mode 100644
index c9addd8bd..000000000
--- a/linalg/scalar_prod_bi.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef _SCALAR_PROD_BI_H
-#define _SCALAR_PROD_BI_H
-
-#include "su3.h"
-/*  <S,R>=SxR^* */
-_Complex double scalar_prod_bi(bispinor * const S, bispinor * const R, const int N); 
-
-#endif
diff --git a/linalg/scalar_prod_r_bi.c b/linalg/scalar_prod_r_bi.c
deleted file mode 100644
index f8506da05..000000000
--- a/linalg/scalar_prod_r_bi.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-/*******************************************************************************
- *
- * File scalar_prod_r_bi.c
- *
- *   double scalar_prod_r_bi(bispinor * const S,bispinor * const R, const int N)
- *     Returns the real part of the scalar product (*R,*S)
- *
- * 
- * Author: Thomas Chiarappa
- *         Thomas.Chiarappa@mib.infn.it
- * 
- *******************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#ifdef MPI
-# include <mpi.h>
-#endif
-#include "su3.h"
-#include "scalar_prod_r_bi.h"
-
-
-/*  R input, S input */
-double scalar_prod_r_bi(bispinor * const S,bispinor * const R, const int N){
-
-
-  int ix;
-  static double ks,kc,ds,tr,ts,tt;
-  spinor *s,*r,*t,*u;
-  
-  ks=0.0;
-  kc=0.0;
-  
-  for (ix=0;ix<N;ix++){
-
-    s = (spinor *) &S[ix].sp_up;
-    r = (spinor *) &R[ix].sp_up;
-    t = (spinor *) &S[ix].sp_dn;
-    u = (spinor *) &R[ix].sp_dn;
-    
-    ds = r->s0.c0 * conj(s->s0.c0) + r->s0.c1 * conj(s->s0.c1) + r->s0.c2 * conj(s->s0.c2) +
-    r->s1.c0 * conj(s->s1.c0) + r->s1.c1 * conj(s->s1.c1) + r->s1.c2 * conj(s->s1.c2) +
-    r->s2.c0 * conj(s->s2.c0) + r->s2.c1 * conj(s->s2.c1) + r->s2.c2 * conj(s->s2.c2) +
-    r->s3.c0 * conj(s->s3.c0) + r->s3.c1 * conj(s->s3.c1) + r->s3.c2 * conj(s->s3.c2) +
-    u->s0.c0 * conj(t->s0.c0) + u->s0.c1 * conj(t->s0.c1) + u->s0.c2 * conj(t->s0.c2) +
-    u->s1.c0 * conj(t->s1.c0) + u->s1.c1 * conj(t->s1.c1) + u->s1.c2 * conj(t->s1.c2) +
-    u->s2.c0 * conj(t->s2.c0) + u->s2.c1 * conj(t->s2.c1) + u->s2.c2 * conj(t->s2.c2) +
-    u->s3.c0 * conj(t->s3.c0) + u->s3.c1 * conj(t->s3.c1) + u->s3.c2 * conj(t->s3.c2);
-    
-    tr = ds + kc;
-    ts = tr + ks;
-    tt = ts-ks;
-    ks = ts;
-    kc = tr-tt;
-  }
-  kc = ks + kc;
-
-#if defined MPI
-  
-  MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-  return ks;
-
-#endif
-  
-  return kc;
-}
- 
-
diff --git a/linalg/scalar_prod_r_bi.h b/linalg/scalar_prod_r_bi.h
deleted file mode 100644
index dfb95d243..000000000
--- a/linalg/scalar_prod_r_bi.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef _SCALAR_PROD_R_BI_H
-#define _SCALAR_PROD_R_BI_H
-
-#include "su3.h"
-
-/* Returns the real part of the scalar product (*R,*S) */
-double scalar_prod_r_bi(bispinor * const S,bispinor * const R, const int N);
-
-#endif
diff --git a/linalg/square_norm_bi.c b/linalg/square_norm_bi.c
deleted file mode 100644
index c5ed05cfc..000000000
--- a/linalg/square_norm_bi.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-/*******************************************************************************
- *
- * File square_norm_bi.c
- *
- *   double square_norm_bi(bispinor * const P )
- *     Returns the square norm of *P
- *
- * Author: Thomas Chiarappa
- *         Thomas.Chiarappa@mib.infn.it
- * 
- *******************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#ifdef MPI
-# include <mpi.h>
-#endif
-#include "su3.h"
-#include "sse.h"
-#include "square_norm_bi.h"
-
-double square_norm_bi(bispinor  *  const P, const int N) {
-  int ix;
-  static double ks,kc,ds,tr,ts,tt;
-  spinor  * s,  * t;
-  
-  ks = 0.0;
-  kc = 0.0;
-  
-  /*  Change due to even-odd preconditioning : VOLUME   to VOLUME/2  */   
-  for (ix  =  0; ix < N; ix++)
-  {
-    s = &P[ix].sp_up;
-    t = &P[ix].sp_dn;
-
-    
-    ds = creal(s->s0.c0) * creal(s->s0.c0) + cimag(s->s0.c0) * cimag(s->s0.c0) + 
-         creal(s->s0.c1) * creal(s->s0.c1) + cimag(s->s0.c1) * cimag(s->s0.c1) + 
-         creal(s->s0.c2) * creal(s->s0.c2) + cimag(s->s0.c2) * cimag(s->s0.c2) + 
-         creal(s->s1.c0) * creal(s->s1.c0) + cimag(s->s1.c0) * cimag(s->s1.c0) + 
-         creal(s->s1.c1) * creal(s->s1.c1) + cimag(s->s1.c1) * cimag(s->s1.c1) + 
-         creal(s->s1.c2) * creal(s->s1.c2) + cimag(s->s1.c2) * cimag(s->s1.c2) + 
-         creal(s->s2.c0) * creal(s->s2.c0) + cimag(s->s2.c0) * cimag(s->s2.c0) + 
-         creal(s->s2.c1) * creal(s->s2.c1) + cimag(s->s2.c1) * cimag(s->s2.c1) + 
-         creal(s->s2.c2) * creal(s->s2.c2) + cimag(s->s2.c2) * cimag(s->s2.c2) + 
-         creal(s->s3.c0) * creal(s->s3.c0) + cimag(s->s3.c0) * cimag(s->s3.c0) + 
-         creal(s->s3.c1) * creal(s->s3.c1) + cimag(s->s3.c1) * cimag(s->s3.c1) + 
-         creal(s->s3.c2) * creal(s->s3.c2) + cimag(s->s3.c2) * cimag(s->s3.c2) +
-         creal(t->s0.c0) * creal(t->s0.c0) + cimag(t->s0.c0) * cimag(t->s0.c0) + 
-         creal(t->s0.c1) * creal(t->s0.c1) + cimag(t->s0.c1) * cimag(t->s0.c1) + 
-         creal(t->s0.c2) * creal(t->s0.c2) + cimag(t->s0.c2) * cimag(t->s0.c2) + 
-         creal(t->s1.c0) * creal(t->s1.c0) + cimag(t->s1.c0) * cimag(t->s1.c0) + 
-         creal(t->s1.c1) * creal(t->s1.c1) + cimag(t->s1.c1) * cimag(t->s1.c1) + 
-         creal(t->s1.c2) * creal(t->s1.c2) + cimag(t->s1.c2) * cimag(t->s1.c2) + 
-         creal(t->s2.c0) * creal(t->s2.c0) + cimag(t->s2.c0) * cimag(t->s2.c0) + 
-         creal(t->s2.c1) * creal(t->s2.c1) + cimag(t->s2.c1) * cimag(t->s2.c1) + 
-         creal(t->s2.c2) * creal(t->s2.c2) + cimag(t->s2.c2) * cimag(t->s2.c2) + 
-         creal(t->s3.c0) * creal(t->s3.c0) + cimag(t->s3.c0) * cimag(t->s3.c0) + 
-         creal(t->s3.c1) * creal(t->s3.c1) + cimag(t->s3.c1) * cimag(t->s3.c1) + 
-         creal(t->s3.c2) * creal(t->s3.c2) + cimag(t->s3.c2) * cimag(t->s3.c2);
-    
-    tr = ds + kc;
-    ts = tr + ks;
-    tt = ts-ks;
-    ks = ts;
-    kc = tr-tt;
-  }
-  kc = ks + kc;
-#ifdef MPI
-  MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-  return ks;
-#else
-  return kc;
-#endif
- 
-}
diff --git a/linalg/square_norm_bi.h b/linalg/square_norm_bi.h
deleted file mode 100644
index ee6e4b60d..000000000
--- a/linalg/square_norm_bi.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef _SQUARE_NORM_BI_H
-#define _SQUARE_NORM_BI_H
-
-#include "su3.h"
-
-/* double square_norm_bi(bispinor * const P )
- *     Returns the square norm of *P */
-
-double square_norm_bi(bispinor * const P, const int N);
-
-
-#endif
-
-
-
diff --git a/solver/Makefile.in b/solver/Makefile.in
index 3b4e21381..9a7ef1b30 100644
--- a/solver/Makefile.in
+++ b/solver/Makefile.in
@@ -34,7 +34,7 @@ libsolver_TARGETS = bicgstab_complex gmres \
 	            bicgstabell bicgstab2 eigenvalues fgmres \
 	            gcr gcr4complex diagonalise_general_matrix \
 	            quicksort gmres_dr lu_solve jdher Msap \
-                    jdher_bi gram-schmidt_bi gram-schmidt \
+                    jdher_bi gram-schmidt \
                     bicgstab_complex_bi cg_her_bi pcg_her \
                     sub_low_ev cg_her_nd poly_precon \
                     generate_dfl_subspace dfl_projector \
diff --git a/solver/bicgstab_complex_bi.c b/solver/bicgstab_complex_bi.c
index dd2637a31..538a6f9ef 100644
--- a/solver/bicgstab_complex_bi.c
+++ b/solver/bicgstab_complex_bi.c
@@ -70,14 +70,14 @@ int bicgstab_complex_bi(bispinor * const P, bispinor * const Q, const int max_it
   t = bisolver_field[5];
 
   f(r, P);
-  diff_bi(p, Q, r, N);
-  assign_bi(r, p, N);
-  assign_bi(hatr, p, N);
-  rho0 = scalar_prod_bi(hatr, r, N);
-  squarenorm = square_norm_bi(Q, N);
+  diff((spinor*)p, (spinor*)Q, (spinor*)r, 2*N);
+  assign((spinor*)r, (spinor*)p, 2*N);
+  assign((spinor*)hatr, (spinor*)p, 2*N);
+  rho0 = scalar_prod((spinor*)hatr, (spinor*)r, 2*N, 1);
+  squarenorm = square_norm((spinor*)Q, 2*N, 1);
 
   for(i = 0; i < max_iter; i++){
-    err = square_norm_bi(r, N);
+    err = square_norm((spinor*)r, 2*N, 1);
     if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
       printf("%d %e\n", i, err);
       fflush(stdout);
@@ -88,22 +88,22 @@ int bicgstab_complex_bi(bispinor * const P, bispinor * const Q, const int max_it
       return(i);
     }
     f(v, p);
-    denom = scalar_prod_bi(hatr, v, N);
+    denom = scalar_prod((spinor*)hatr, (spinor*)v, 2*N, 1);
     alpha = rho0 / denom;
-    assign_bi(s, r, N);
-    assign_diff_mul_bi(s, v, alpha, N);
+    assign((spinor*)s, (spinor*)r, 2*N);
+    assign_diff_mul((spinor*)s, (spinor*)v, alpha, 2*N);
     f(t, s);
-    omega = scalar_prod_bi(t,s, N);
-    omega /= square_norm_bi(t, N);
-    assign_add_mul_add_mul_bi(P, p, s, alpha, omega, N);
-    assign_bi(r, s, N);
-    assign_diff_mul_bi(r, t, omega, N);
-    rho1 = scalar_prod_bi(hatr, r, N);
+    omega = scalar_prod((spinor*)t, (spinor*)s, 2*N, 1);
+    omega /= square_norm((spinor*)t, 2*N, 1);
+    assign_add_mul_add_mul((spinor*)P, (spinor*)p, (spinor*)s, alpha, omega, 2*N);
+    assign((spinor*)r, (spinor*)s, 2*N);
+    assign_diff_mul((spinor*)r, (spinor*)t, omega, 2*N);
+    rho1 = scalar_prod((spinor*)hatr, (spinor*)r, 2*N, 1);
     nom = alpha * rho1;
     denom = omega * rho0;
     beta = nom / denom;
     omega = -omega;
-    assign_mul_bra_add_mul_ket_add_bi(p, v, r, omega, beta, N);
+    assign_mul_bra_add_mul_ket_add((spinor*)p, (spinor*)v, (spinor*)r, omega, beta, 2*N);
     rho0 = rho1;
   }
   finalize_bisolver(bisolver_field, nr_sf);
diff --git a/solver/cg_her_bi.c b/solver/cg_her_bi.c
index 6b239eb8f..677e0603e 100644
--- a/solver/cg_her_bi.c
+++ b/solver/cg_her_bi.c
@@ -77,51 +77,51 @@ int cg_her_bi(bispinor * const P, bispinor * const Q, const int max_iter,
   else {
     init_bisolver_field(&bisolver_field, VOLUMEPLUSRAND/2, nr_sf);
   }
-  squarenorm = square_norm_bi(Q, N);  
+  squarenorm = square_norm((spinor*)Q, 2*N, 1);  
   /*        !!!!   INITIALIZATION    !!!! */
-  assign_bi(bisolver_field[0], P, N);
+  assign((spinor*)bisolver_field[0], (spinor*)P, 2*N);
   /*        (r_0,r_0)  =  normsq         */
-  normsp=square_norm_bi(P, N);
-  assign_bi(bisolver_field[5], Q, N);
+  normsp=square_norm((spinor*)P, 2*N, 1);
+  assign((spinor*)bisolver_field[5], (spinor*)Q, 2*N);
   
   /* initialize residue r and search vector p */
   if(normsp == 0) {
     /* if a starting solution vector equal to zero is chosen */
-    assign_bi(bisolver_field[1], bisolver_field[5], N);
-    assign_bi(bisolver_field[2], bisolver_field[5], N);
-    normsq=square_norm_bi(Q, N);
+    assign((spinor*)bisolver_field[1], (spinor*)bisolver_field[5], 2*N);
+    assign((spinor*)bisolver_field[2], (spinor*)bisolver_field[5], 2*N);
+    normsq=square_norm((spinor*)Q, 2*N, 1);
   }
   else {
     /* if a starting solution vector different from zero is chosen */
     f(bisolver_field[3], bisolver_field[0]);
-    diff_bi(bisolver_field[1], bisolver_field[5], 
-	    bisolver_field[3], N);
-    assign_bi(bisolver_field[2], bisolver_field[1], N);
-    normsq=square_norm_bi(bisolver_field[2], N);
+    diff((spinor*)bisolver_field[1], (spinor*)bisolver_field[5], 
+	 (spinor*)bisolver_field[3], 2*N);
+    assign((spinor*)bisolver_field[2], (spinor*)bisolver_field[1], 2*N);
+    normsq=square_norm((spinor*)bisolver_field[2], 2*N, 1);
   }
   
   /* main loop */
   for(iteration = 0; iteration < max_iter; iteration++) {
     f(bisolver_field[4], bisolver_field[2]);
-    pro=scalar_prod_r_bi(bisolver_field[2], bisolver_field[4], N);
+    pro=scalar_prod_r((spinor*)bisolver_field[2], (spinor*)bisolver_field[4], 2*N, 1);
      
     /*  Compute alpha_cg(i+1)   */
     alpha_cg=normsq/pro;
      
     /*  Compute x_(i+1) = x_i + alpha_cg(i+1) p_i    */
-    assign_add_mul_r_bi(bisolver_field[0], bisolver_field[2],  alpha_cg, N);
+    assign_add_mul_r((spinor*)bisolver_field[0], (spinor*)bisolver_field[2],  alpha_cg, 2*N);
     /*  Compute r_(i+1) = r_i - alpha_cg(i+1) Qp_i   */
-    assign_add_mul_r_bi(bisolver_field[1], bisolver_field[4], -alpha_cg, N);
+    assign_add_mul_r((spinor*)bisolver_field[1], (spinor*)bisolver_field[4], -alpha_cg, 2*N);
 
     /* Check whether the precision is reached ... */
-    err=square_norm_bi(bisolver_field[1], N);
+    err=square_norm((spinor*)bisolver_field[1], 2*N, 1);
 
     if((g_proc_id == g_stdio_proc) && (g_debug_level > 1)) {
       printf("%d\t%g\n",iteration,err); fflush( stdout);
     }
     
     if(((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) {
-      assign_bi(P, bisolver_field[0], N);
+      assign((spinor*)P, (spinor*)bisolver_field[0], 2*N);
       finalize_bisolver(bisolver_field, nr_sf);
       return(iteration+1);
     }
@@ -129,20 +129,11 @@ int cg_her_bi(bispinor * const P, bispinor * const Q, const int max_iter,
     /* Compute beta_cg(i+1)
        Compute p_(i+1) = r_i+1 + beta_(i+1) p_i     */
     beta_cg=err/normsq;
-    assign_mul_add_r_bi(bisolver_field[2], beta_cg, bisolver_field[1], N);
+    assign_mul_add_r((spinor*)bisolver_field[2], beta_cg, (spinor*)bisolver_field[1], 2*N);
     normsq=err;
   }
   
-  assign_bi(P, bisolver_field[0], N);  
+  assign((spinor*)P, (spinor*)bisolver_field[0], 2*N);  
   finalize_bisolver(bisolver_field, nr_sf);
   return(-1);
 }
-
-
-
-
-
-
-
-
-
diff --git a/solver/gram-schmidt_bi.c b/solver/gram-schmidt_bi.c
deleted file mode 100644
index bcdc73b7e..000000000
--- a/solver/gram-schmidt_bi.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-/**************************************************************************
- *
- *
- *  Iterated Classical Gram-Schmidt Orthogonalization for bispinors
- *
- *  Orthogonalizes v with respect to A.
- *
- * Author: Thomas Chiarappa
- *         Thomas.Chiarappa@mib.infn.it
- *
- *************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#include <math.h>
-#include <stdio.h>
-#include "su3spinor.h"
-#include <complex.h>
-#include "linalg_eo.h"
-#include "linalg/blas.h"
-#include "gram-schmidt_bi.h"
-
-const int max_cgs_it_bi=5;
-static int ONE = 1;
-
-/*
- *
- *  Iterated Classical Gram-Schmidt Orthogonalization
- *
- *  Orthogonalizes v with respect to A.
- *
- */
-
-void IteratedClassicalGS_bi(_Complex double v[], double *vnrm, int n, int m, _Complex double A[], 
-			 _Complex double work1[], int lda) {
-  const double alpha = 0.5;
-
-  double vnrm_old;
-  int i, isorth = 0;
-  int j;
-  _Complex double CMONE, CONE;
-  char *fupl_n = "N";
-
-  CMONE = -1.;
-  CONE = 1.;
-
-  vnrm_old = sqrt(square_norm_bi((bispinor*) v, n*sizeof(_Complex double)/sizeof(bispinor)));
-
-  for(i = 0; !isorth && i < max_cgs_it_bi; i ++) {
-
-    for(j = 0; j < m; j++){
-      work1[j] = scalar_prod_bi((bispinor*)(A+j*lda), (bispinor*) v, n*sizeof(_Complex double)/sizeof(bispinor));
-    }
-    _FT(zgemv)(fupl_n, &n, &m, &CMONE, A, &lda, work1, &ONE, &CONE, v, &ONE, 1);
-    (*vnrm) = sqrt(square_norm_bi((bispinor*) v, n*sizeof(_Complex double)/sizeof(bispinor)));
-
-    isorth=((*vnrm) > alpha*vnrm_old);
-    vnrm_old = *vnrm;
-  }
-  if (i >= max_cgs_it_bi) {
-/*     errorhandler(400,""); */
-  }
-}
-
-
-/*
- *  ModifiedGramSchmidt 
- *
- *  Orthogonlaizes v with respect to span{A[:,1:m]}
- */
-
-void ModifiedGS_bi(_Complex double v[], int n, int m, _Complex double A[], int lda){
-
-  int i;
-  _Complex double s;
-
-  for (i = 0; i < m; i ++) {
-    s = -scalar_prod_bi((bispinor*)(A+i*lda), (bispinor*) v, n*sizeof(_Complex double)/sizeof(bispinor));
-    _FT(zaxpy)(&n, &s, A+i*lda, &ONE, v, &ONE); 
-  }
-}
-
diff --git a/solver/gram-schmidt_bi.h b/solver/gram-schmidt_bi.h
deleted file mode 100644
index 06a6c29f5..000000000
--- a/solver/gram-schmidt_bi.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-#ifndef _GRAM_SCHMIDT_BI_H
-#define _GRAM_SCHMIDT_BI_H
-#include <complex.h>
-
-void IteratedClassicalGS_bi(_Complex double v[], double *vnrm, int n, int m, _Complex double A[], 
-			 _Complex double work1[], int lda) ;
-
-void ModifiedGS_bi(_Complex double v[], int n, int m, _Complex double A[], int lda);
-
-#endif
diff --git a/solver/jdher_bi.c b/solver/jdher_bi.c
index 0497108b8..3418a57d8 100644
--- a/solver/jdher_bi.c
+++ b/solver/jdher_bi.c
@@ -62,6 +62,7 @@
 #include <complex.h>
 #include "solver/solver.h"
 #include "solver/gram-schmidt_bi.h"
+#include "solver/gram-schmidt.h"
 #include "solver/quicksort.h"
 #include "jdher.h"
 #include "jdher_bi.h"
@@ -312,8 +313,8 @@ void jdher_bi(int n, int lda, double tau, double tol,
     j = blksize;
   }
   for (cnt = 0; cnt < j; cnt ++) {
-    ModifiedGS_bi(V + cnt*lda, n, cnt, V, lda);
-    alpha = sqrt(square_norm_bi((bispinor*)(V+cnt*lda), N));
+    ModifiedGS(V + cnt*lda, n, cnt, V, lda);
+    alpha = sqrt(square_norm((spinor*)(V+cnt*lda), 2*N, 1));
     alpha = 1.0 / alpha;
     _FT(dscal)(&n2, &alpha, (double *)(V + cnt*lda), &ONE);
   }
@@ -323,7 +324,7 @@ void jdher_bi(int n, int lda, double tau, double tol,
     A_psi((bispinor*) temp1, (bispinor*)(V+cnt*lda));
     idummy = cnt+1;
     for(i = 0; i < idummy; i++) {
-      M[cnt*jmax+i] = scalar_prod_bi((bispinor*)(V+i*lda), (bispinor*) temp1, N);
+      M[cnt*jmax+i] = scalar_prod((spinor*)(V+i*lda), (spinor*) temp1, 2*N, 1);
     }
   }
   /* Other initializations */
@@ -405,7 +406,7 @@ void jdher_bi(int n, int lda, double tau, double tol,
 
 	/* Compute norm of the residual and update arrays convind/keepind*/
 	resnrm_old[act] = resnrm[act];
-	resnrm[act] = sqrt(square_norm_bi((bispinor*) r, N));
+	resnrm[act] = sqrt(square_norm((spinor*) r, 2*N, 1));
 	if (resnrm[act] < tol){
 	  convind[conv] = act; 
 	  conv = conv + 1; 
@@ -594,7 +595,7 @@ void jdher_bi(int n, int lda, double tau, double tol,
       solvestep[act] = solvestep[act] + 1;
 
       /* equation and project if necessary */
-      ModifiedGS_bi(r, n, k + actblksize, Q, lda);
+      ModifiedGS(r, n, k + actblksize, Q, lda);
 
       g_sloppy_precision = 1;
       /* Solve the correction equation ...  */
@@ -626,8 +627,8 @@ void jdher_bi(int n, int lda, double tau, double tol,
 	 apply "IteratedCGS" to prevent numerical breakdown 
          in order to orthogonalize v to V */
 
-      ModifiedGS_bi(v, n, k+actblksize, Q, lda);
-      IteratedClassicalGS_bi(v, &alpha, n, j, V, temp1, lda);
+      ModifiedGS(v, n, k+actblksize, Q, lda);
+      IteratedClassicalGS(v, &alpha, n, j, V, temp1, lda);
 
       alpha = 1.0 / alpha;
       _FT(dscal)(&n2, &alpha, (double*) v, &ONE);
@@ -636,7 +637,7 @@ void jdher_bi(int n, int lda, double tau, double tol,
       A_psi((bispinor*) temp1, (bispinor*) v);
       idummy = j+1;
       for(i = 0; i < idummy; i++){
- 	M[j*jmax+i] = scalar_prod_bi((bispinor*)(V+i*lda), (bispinor*) temp1, N);
+ 	M[j*jmax+i] = scalar_prod((spinor*)(V+i*lda), (spinor*) temp1, 2*N, 1);
       }
       /* Increasing SearchSpaceSize j */
       j ++;
@@ -678,7 +679,7 @@ void jdher_bi(int n, int lda, double tau, double tol,
       theta = -lambda[act];
       A_psi((bispinor*) r, (bispinor*) q);
       _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE);
-      alpha = sqrt(square_norm_bi((bispinor*) r, N));
+      alpha = sqrt(square_norm((spinor*) r, 2*N, 1));
       if(g_proc_id == 0) {
 	printf("%3d %22.15e %12.5e\n", act+1, lambda[act],
 	       alpha);
@@ -853,8 +854,8 @@ void Proj_A_psi_bi(bispinor * const y, bispinor * const x){
   _FT(daxpy)(&p_n2, &mtheta, (double*) x, &ONE, (double*) y, &ONE);
   /* p_work_bi = Q^dagger*y */ 
   for(i = 0; i < p_k; i++) {
-    p_work_bi[i] = scalar_prod_bi((bispinor*)(p_Q_bi+i*p_lda), (bispinor*) y, 
-				  p_n*sizeof(_Complex double)/sizeof(bispinor));
+    p_work_bi[i] = scalar_prod((spinor*)(p_Q_bi+i*p_lda), (spinor*) y, 
+			       p_n*sizeof(_Complex double)/sizeof(spinor), 1);
   }
   /* y = y - Q*p_work_bi */ 
   _FT(zgemv)(fupl_n, &p_n, &p_k, &CMONE, p_Q_bi, &p_lda, (_Complex double*) p_work_bi, 
diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index 4bdf6e2f6..55641f680 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -294,7 +294,6 @@ void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
                      spinor * const k_strange, spinor * const k_charm, const _Complex double z){
 
-  int ix;
   spinor *r, *s;
   su3_vector ALIGN phi1;
   double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
@@ -343,9 +342,9 @@ void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
   /************ loop over all lattice sites ************/
 
 #ifdef OMP
-#pragma omp parallel for private(r) private(s) private(phi1) private(ix)
+#pragma omp parallel for private(r) private(s) private(phi1)
 #endif
-  for(ix = 0; ix < (VOLUME/2); ix++){
+  for(int ix = 0; ix < (VOLUME/2); ix++){
 
     r=l_strange + ix;
     s=k_strange + ix;
@@ -649,7 +648,7 @@ void mul_one_pm_iconst(spinor * const l, spinor * const k,
 #ifdef OMP
 #pragma omp for
 #endif
-  for(int ix = 0; ix < (VOLUME/2); ++ix){
+  for(unsigned int ix = 0; ix < (VOLUME/2); ++ix){
     r=l + ix;
     s=k + ix;
     /* Multiply the spinorfield with the inverse of 1+imu\gamma_5 */

From 0aa9f439c96e39cb838d899f89c054970f4404b8 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 5 Oct 2012 11:53:54 +0200
Subject: [PATCH 026/110] removed all _bi linalg functions as they are
 superfluous

---
 linalg_eo.h       | 11 -----------
 solver/jdher_bi.c |  1 -
 2 files changed, 12 deletions(-)

diff --git a/linalg_eo.h b/linalg_eo.h
index 4d1af71ba..b33fc1340 100644
--- a/linalg_eo.h
+++ b/linalg_eo.h
@@ -50,17 +50,6 @@
 #include "linalg/mul_add_mul_r.h"
 
 #include "linalg/comp_decomp.h"
-#include "linalg/square_norm_bi.h"
-#include "linalg/assign_bi.h"
-#include "linalg/scalar_prod_bi.h"
-#include "linalg/diff_bi.h"
-#include "linalg/assign_diff_mul_bi.h"
-#include "linalg/assign_add_mul_add_mul_bi.h"
-#include "linalg/assign_mul_bra_add_mul_ket_add_bi.h"
-#include "linalg/mul_r_bi.h"
-#include "linalg/scalar_prod_r_bi.h"
-#include "linalg/assign_add_mul_r_bi.h"
-#include "linalg/assign_mul_add_r_bi.h"
 
 #include "linalg/mattimesvec.h"
 
diff --git a/solver/jdher_bi.c b/solver/jdher_bi.c
index 3418a57d8..a0f8ca223 100644
--- a/solver/jdher_bi.c
+++ b/solver/jdher_bi.c
@@ -61,7 +61,6 @@
 #include "linalg_eo.h"
 #include <complex.h>
 #include "solver/solver.h"
-#include "solver/gram-schmidt_bi.h"
 #include "solver/gram-schmidt.h"
 #include "solver/quicksort.h"
 #include "jdher.h"

From 747e331284a799d339ff4eb411980ea57275fe2e Mon Sep 17 00:00:00 2001
From: "Luigi Scorzato (ECT/FBK) scorzato@ectstar.eu" <scorzato@aserv1.fbk.eu>
Date: Fri, 5 Oct 2012 16:28:23 +0200
Subject: [PATCH 027/110] Fixes of some broken LapH files. Sample Input added

---
 linalg/assign.c             |  1 -
 sample-input/LapH.input     | 31 ++++++++++++++++
 solver/eigenvalues_Jacobi.c | 20 +++++------
 solver/jdher_su3vect.c      | 70 ++++++++++++++++++++-----------------
 4 files changed, 78 insertions(+), 44 deletions(-)
 create mode 100644 sample-input/LapH.input

diff --git a/linalg/assign.c b/linalg/assign.c
index 4563384cd..277513dff 100644
--- a/linalg/assign.c
+++ b/linalg/assign.c
@@ -80,5 +80,4 @@ void assign_su3vect(su3_vector * const R, su3_vector * const S, const int N)
     r->c2 = s->c2;
   }
 }
-}
 #endif
diff --git a/sample-input/LapH.input b/sample-input/LapH.input
new file mode 100644
index 000000000..1f84257d4
--- /dev/null
+++ b/sample-input/LapH.input
@@ -0,0 +1,31 @@
+# This is an example of input file for the LapH program.
+# Configure with --enable-laph, (lapack also needed. If parallel, --enable-indexindepgeom --enable-tsplitpar are also needed)
+# e.g., (parallel):
+# ./configure --enable-sse3 --enable-mpi --with-mpidimension=XYZ  --disable-halfspinor --enable-indexindepgeom --enable-tsplitpar --enable-laph --with-limedir=${limedir} --with-lemondir=${lemondir} --with-lapack=${lapacklib} CC="${mpicomp} -O3" CFLAGS="-msse3 -O3"
+# e.g. serial:
+# ./configure --enable-sse3 --disable-mpi --disable-halfspinor  --enable-laph --with-limedir=${limedir} --with-lapack=${lapacklib}
+# Compile with make LapH_ev
+# Executable LapH_ev 
+# Use this file as an example of input.
+# The code will write out the eigenvalues and eigenvectors (7 in this example) of the LapH operator (see 0905.2160).
+# The eigenvalues are written in text files eigenvalue.TT.CCCC (TT=timeslice, CCCC= configuration number).
+# The eigenvectors are written in binary files eigenvector.NN.TT.CCC (NN=eigenvalue number, TT, CC as above),
+# and have size L^3*Nc*2*sizeof(double).
+
+L=16
+T=32
+
+DisableIOChecks = yes
+DebugLevel = 5
+InitialStoreCounter = 20
+NoEigenvalues = 7
+Measurements = 1
+2kappaMu = 0.001286848
+kappa = 0.160856
+NrXProcs = 2
+NrYProcs = 1
+NrZProcs = 2
+BCAngleT = 1.
+GaugeConfigInputFile = conf
+UseEvenOdd = yes
+
diff --git a/solver/eigenvalues_Jacobi.c b/solver/eigenvalues_Jacobi.c
index 9fefc302a..f798f2040 100644
--- a/solver/eigenvalues_Jacobi.c
+++ b/solver/eigenvalues_Jacobi.c
@@ -64,7 +64,7 @@ double eigenvalues_Jacobi(int * nr_of_eigenvalues, const int max_iterations,
   int verbosity = 1, converged = 0, blocksize = 1 , blockwise=0;
   int solver_it_max = 50, j_max, j_min;
   double decay_min = 1.7, decay_max = 1.5, prec, threshold_min = 1.e-3, threshold_max = 5.e-2;
-volatile  int v0dim = 0;
+  int v0dim = 0;
   matrix_mult_su3vect f;
   int N=SPACEVOLUME, N2=(SPACEVOLUME + SPACERAND);
   su3_vector * max_eigenvector_ = NULL, *max_eigenvector;
@@ -128,12 +128,12 @@ volatile  int v0dim = 0;
   solver_it_max = 64;
   /* compute the maximal one first */
   /* DEBUG 
-  jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex),
+  jdher_su3vect(N*sizeof(su3_vector)/sizeof(_Complex double), N2*sizeof(su3_vector)/sizeof(_Complex double),
 		50., 1.e-12, 
 		1, 15, 8, max_iterations, 1, 0, 0, NULL,
 		CG, solver_it_max,
 		threshold_max, decay_max, verbosity,
-		&converged, (complex*) max_eigenvector, (double*) &max_eigenvalue_su3v,
+		&converged, (_Complex double*) max_eigenvector, (double*) &max_eigenvalue_su3v,
 		&returncode2, JD_MAXIMAL, 1,tslice,f);
   */
   
@@ -146,23 +146,23 @@ volatile  int v0dim = 0;
   solver_it_max = 256;
   
   if(maxmin)
-    jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex),
+    jdher_su3vect(N*sizeof(su3_vector)/sizeof(_Complex double), N2*sizeof(su3_vector)/sizeof(_Complex double),
 		  50., prec, 
 		  (*nr_of_eigenvalues), j_max, j_min, 
-		  max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors_su3v,
+		  max_iterations, blocksize, blockwise, v0dim, (_Complex double*) eigenvectors_su3v,
 		  CG, solver_it_max,
 		  threshold_max, decay_max, verbosity,
-		  &converged, (complex*) eigenvectors_su3v, eigenvls_su3v,
+		  &converged, (_Complex double*) eigenvectors_su3v, eigenvls_su3v,
 		  &returncode, JD_MAXIMAL, 1,tslice,
 		  f);
   else
-    jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex),
+    jdher_su3vect(N*sizeof(su3_vector)/sizeof(_Complex double), N2*sizeof(su3_vector)/sizeof(_Complex double),
 		  0., prec, 
 		  (*nr_of_eigenvalues), j_max, j_min, 
-		  max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors_su3v,
+		  max_iterations, blocksize, blockwise, v0dim, (_Complex double*) eigenvectors_su3v,
 		  CG, solver_it_max,
 		  threshold_min, decay_min, verbosity,
-		  &converged, (complex*) eigenvectors_su3v, eigenvls_su3v,
+		  &converged, (_Complex double*) eigenvectors_su3v, eigenvls_su3v,
 		  &returncode, JD_MINIMAL, 1,tslice,
 		  f);
   
@@ -194,7 +194,7 @@ volatile  int v0dim = 0;
 
     MPI_File_open(g_cart_grid, filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fp);
     writer = lemonCreateWriter(&fp, g_cart_grid);
-    header = lemonCreateHeader(1 /* MB */, 1 /* ME */, "lattice-su3_vector-data",SPACEVOLUME*3*sizeof(complex));
+    header = lemonCreateHeader(1 /* MB */, 1 /* ME */, "lattice-su3_vector-data",SPACEVOLUME*3*sizeof(_Complex double));
     lemonWriteRecordHeader(header, writer);
     lemonDestroyHeader(header);
     lemonWriteLatticeParallel(writer, s, siteSize, dims);
diff --git a/solver/jdher_su3vect.c b/solver/jdher_su3vect.c
index ad5038fe8..9f28a03ec 100644
--- a/solver/jdher_su3vect.c
+++ b/solver/jdher_su3vect.c
@@ -32,6 +32,7 @@
 #include <stdlib.h>
 #include <math.h>
 #include <errno.h>
+#include <complex.h>
 #include "global.h"
 #include "su3.h"
 #include "linalg/fortran.h"
@@ -61,7 +62,7 @@
 static void print_status_su3vect(int clvl, int it, int k, int j, int kmax, 
 				 int blksize, int actblksize,
 				 double *s, double *resnrm, int *actcorrits);
-static void sorteig_su3vect(int j, double S[], complex U[], int ldu, double tau,
+static void sorteig_su3vect(int j, double S[], _Complex double U[], int ldu, double tau,
 			    double dtemp[], int idx1[], int idx2[], int strategy);
 
 /* Projection routines */
@@ -83,14 +84,14 @@ void jderrorhandler_su3vect(const int i, char * message)
  ****************************************************************************/
 /* static double DMONE = -1.0, DZER = 0.0, DONE = 1.0; */
 static int MONE = -1, ONE = 1;
-static complex CONE, CZERO, CMONE;
+static _Complex double CONE, CZERO, CMONE;
 
 /* Projector variables */
 
 static int p_n, p_n2, p_k, p_lda;
 static double p_theta;
-complex * p_Q;
-complex * p_work;
+_Complex double * p_Q;
+_Complex double * p_work;
 matrix_mult_su3vect p_A_psi_s3;
 
 static char * fupl_u = "U", * fupl_c = "C", *fupl_n = "N", * fupl_a = "A", *fupl_v = "V", *filaenv = "zhetrd", *fvu = "VU";
@@ -98,11 +99,11 @@ static char * fupl_u = "U", * fupl_c = "C", *fupl_n = "N", * fupl_a = "A", *fupl
 void jdher_su3vect(int n, int lda, double tau, double tol, 
 	   int kmax, int jmax, int jmin, int itmax,
 	   int blksize, int blkwise, 
-	   int V0dim, complex *V0, 
+	   int V0dim, _Complex double *V0, 
 	   int solver_flag, 
 	   int linitmax, double eps_tr, double toldecay,
 	   int verbosity,
-	   int *k_conv, complex *Q, double *lambda, int *it,
+	   int *k_conv, _Complex double *Q, double *lambda, int *it,
 	   int maxmin, int shift_mode, int tslice,
 	   matrix_mult_su3vect A_psi)
 {
@@ -114,18 +115,19 @@ void jdher_su3vect(int n, int lda, double tau, double tol,
 /* allocatables:                                              *
  * initialize with NULL, so we can free even unallocated ptrs */
 double *s = NULL, *resnrm = NULL, *resnrm_old = NULL, *dtemp = NULL, *rwork = NULL;
-volatile complex *V_ = NULL;
-volatile complex  *V; 
-complex *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL, *Res_ = NULL, *Res, *eigwork = NULL, *temp1_ = NULL, *temp1;
+_Complex double *V_ = NULL; 
+_Complex double  *V; 
+_Complex double *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL, *Res_ = NULL, *Res, *eigwork = NULL, 
+  *temp1_ = NULL, *temp1;
 int *idx1 = NULL, *idx2 = NULL, *convind = NULL, *keepind = NULL, *solvestep = NULL, *actcorrits = NULL;
 
 /* non-allocated ptrs */
-complex *q, *v, *u, *r = NULL;  
+_Complex double *q, *v, *u, *r = NULL;  
 /* scalar vars */
 double theta, alpha, it_tol;
 int i, k, j, actblksize, eigworklen, found, conv, keep, n2;
 int act, cnt, idummy, info, CntCorrIts=0, endflag=0;
-int N=n*sizeof(complex)/sizeof(su3_vector);
+int N=n*sizeof(_Complex double)/sizeof(su3_vector);
 int IDIST = 1;
 int ISEED[4] = {2, 3, 5, 7};
  ISEED[0] = 2;
@@ -162,22 +164,25 @@ int ISEED[4] = {2, 3, 5, 7};
  if(eps_tr < 0.) jderrorhandler(500,"");
  if(toldecay <= 1.0) jderrorhandler(501,"");
  
- CONE.re=1.; CONE.im=0.;
+/* CONE.re=1.; CONE.im=0.;
  CZERO.re=0.; CZERO.im=0.;
- CMONE.re=-1.; CMONE.im=0.;
+ CMONE.re=-1.; CMONE.im=0.; */
+ CONE=(_Complex double)1.0;
+ CZERO=(_Complex double)0.0;
+ CMONE=_Complex_I;
  
  /* Get hardware-dependent values:
    * Opt size of workspace for ZHEEV is (NB+1)*j, where NB is the opt.
    * block size... */
  eigworklen = (2 + _FT(ilaenv)(&ONE, filaenv, fvu, &jmax, &MONE, &MONE, &MONE, 6, 2)) * jmax;
 
- if((void*)(V_ = (complex *)malloc((lda * jmax + 4) * sizeof(complex))) == NULL) 
+ if((void*)(V_ = (_Complex double *)malloc((lda * jmax + 4) * sizeof(_Complex double))) == NULL) 
    {
      errno = 0;
      jderrorhandler(300,"V in jdher");
   }
  V = V_;
- if((void*)(U = (complex *)malloc(jmax * jmax * sizeof(complex))) == NULL) 
+ if((void*)(U = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) 
    {
      jderrorhandler(300,"U in jdher");
    }
@@ -185,7 +190,7 @@ int ISEED[4] = {2, 3, 5, 7};
    {
      jderrorhandler(300,"s in jdher");
    }
- if((void*)(Res_ = (complex *)malloc((lda * blksize+4) * sizeof(complex))) == NULL) 
+ if((void*)(Res_ = (_Complex double *)malloc((lda * blksize+4) * sizeof(_Complex double))) == NULL) 
    {
      jderrorhandler(300,"Res in jdher");
    }
@@ -199,15 +204,15 @@ int ISEED[4] = {2, 3, 5, 7};
    {
      jderrorhandler(300,"resnrm_old in jdher");
    }
- if((void*)(M = (complex *)malloc(jmax * jmax * sizeof(complex))) == NULL) 
+ if((void*)(M = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) 
    {
      jderrorhandler(300,"M in jdher");
    }
- if((void*)(Vtmp = (complex *)malloc(jmax * jmax * sizeof(complex))) == NULL) 
+ if((void*)(Vtmp = (_Complex double *)malloc(jmax * jmax * sizeof(_Complex double))) == NULL) 
    {
      jderrorhandler(300,"Vtmp in jdher");
    }
- if((void*)(p_work = (complex *)malloc(lda * sizeof(complex))) == NULL) 
+ if((void*)(p_work = (_Complex double *)malloc(lda * sizeof(_Complex double))) == NULL) 
    {
      jderrorhandler(300,"p_work in jdher");
    }
@@ -240,7 +245,7 @@ int ISEED[4] = {2, 3, 5, 7};
      jderrorhandler(300,"actcorrits in jdher");
    }
  
- if((void*)(eigwork = (complex *)malloc(eigworklen * sizeof(complex))) == NULL) 
+ if((void*)(eigwork = (_Complex double *)malloc(eigworklen * sizeof(_Complex double))) == NULL) 
    {
      jderrorhandler(300,"eigwork in jdher");
    }
@@ -248,12 +253,12 @@ int ISEED[4] = {2, 3, 5, 7};
    {
      jderrorhandler(300,"rwork in jdher");
    }
- if((void*)(temp1_ = (complex *)malloc((lda+4) * sizeof(complex))) == NULL) 
+ if((void*)(temp1_ = (_Complex double *)malloc((lda+4) * sizeof(_Complex double))) == NULL) 
    {
      jderrorhandler(300,"temp1 in jdher");
    }
  temp1 = temp1_;
- if((void*)(dtemp = (double *)malloc(lda * sizeof(complex))) == NULL) 
+ if((void*)(dtemp = (double *)malloc(lda * sizeof(_Complex double))) == NULL) 
    {
      jderrorhandler(300,"dtemp in jdher");
    }
@@ -445,7 +450,7 @@ int ISEED[4] = {2, 3, 5, 7};
 	      _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1);
 	      for (act = 0; act < j; act++)
 		{
-		  M[act*jmax + act].re = s[act];
+		  M[act*jmax + act] = s[act];
 		}
 	      /* ... and U the Identity(jnew,jnew) */
 	      _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1);
@@ -517,7 +522,7 @@ int ISEED[4] = {2, 3, 5, 7};
 	      _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1);
 	      _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1);
 	      for (act = 0; act < j; act++)
-		M[act*jmax + act].re = s[act];
+		M[act*jmax + act] = s[act];
 	    }
 	} /* while(found) */    
       
@@ -540,8 +545,7 @@ int ISEED[4] = {2, 3, 5, 7};
 	  v = V + j*lda;
 	  for (cnt = 0; cnt < n; cnt ++)
 	    { 
-	      v[cnt].re = 0.;
-	      v[cnt].im = 0.;
+	      v[cnt] = (_Complex double)0.;
 	    }
 	  /* Adaptive accuracy and shift for the lin.solver. In case the
 	     residual is big, we don't need a too precise solution for the
@@ -579,7 +583,7 @@ int ISEED[4] = {2, 3, 5, 7};
 	  if(solver_flag == CG)
 	    {
 	      info = cg_her_su3vect((su3_vector*) v, (su3_vector*) r, linitmax, it_tol*it_tol, 0, 
-				    n*sizeof(complex)/sizeof(su3_vector),tslice, &Proj_A_psi_su3vect);
+				    n*sizeof(_Complex double)/sizeof(su3_vector),tslice, &Proj_A_psi_su3vect);
 	    }
 	  g_sloppy_precision = 0;
 	  
@@ -760,7 +764,7 @@ static void print_status_su3vect(int verbosity, int it, int k, int j, int kmax,
  *   avoid computation of zero eigenvalues.
  */
 
-static void sorteig_su3vect(int j, double S[], complex U[], int ldu, double tau,
+static void sorteig_su3vect(int j, double S[], _Complex double U[], int ldu, double tau,
 			    double dtemp[], int idx1[], int idx2[], int strategy){
   int i;
 
@@ -798,9 +802,9 @@ static void sorteig_su3vect(int j, double S[], complex U[], int ldu, double tau,
   /* sort eigenvectors (in place) */
   for (i = 0; i < j; i ++) {
     if (i != idx1[i]) {
-      memcpy(dtemp, U+i*ldu, j*sizeof(complex));
-      memcpy(U+i*ldu, U+idx1[i]*ldu, j*sizeof(complex));
-      memcpy(U+idx1[i]*ldu, dtemp, j*sizeof(complex));
+      memcpy(dtemp, U+i*ldu, j*sizeof(_Complex double));
+      memcpy(U+i*ldu, U+idx1[i]*ldu, j*sizeof(_Complex double));
+      memcpy(U+idx1[i]*ldu, dtemp, j*sizeof(_Complex double));
       idx1[idx2[i]] = idx1[i];
       idx2[idx1[i]] = idx2[i];
     }
@@ -819,10 +823,10 @@ void Proj_A_psi_su3vect(su3_vector * const y, su3_vector * const x, int tslice){
   _FT(daxpy)(&p_n2, &mtheta, (double*) x, &ONE, (double*) y, &ONE);
   /* p_work = Q^dagger*y */ 
   for(i = 0; i < p_k; i++) {
-    p_work[i] = scalar_prod_su3vect((su3_vector*) (p_Q+i*p_lda), (su3_vector*) y, p_n*sizeof(complex)/sizeof(su3_vector), 1);
+    p_work[i] = scalar_prod_su3vect((su3_vector*) (p_Q+i*p_lda), (su3_vector*) y, p_n*sizeof(_Complex double)/sizeof(su3_vector), 1);
   }
   /* y = y - Q*p_work */ 
-  _FT(zgemv)(fupl_n, &p_n, &p_k, &CMONE, p_Q, &p_lda, (complex*) p_work, &ONE, &CONE, (complex*) y, &ONE, 1);
+  _FT(zgemv)(fupl_n, &p_n, &p_k, &CMONE, p_Q, &p_lda, (_Complex double*) p_work, &ONE, &CONE, (_Complex double*) y, &ONE, 1);
 }
 
 #endif // WITHLAPH

From a5f2d6301ffb407af6e44a1266927ab1674c0661 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 5 Oct 2012 18:32:51 +0200
Subject: [PATCH 028/110] first functions for ND clover doublet

---
 clover_leaf.c        |   6 +-
 clovertm_operators.c | 490 ++++++++++++++++++++++++++++++++++++++++++-
 clovertm_operators.h |  16 ++
 tm_operators_nd.c    |  19 +-
 tm_operators_nd.h    |   5 +-
 5 files changed, 524 insertions(+), 12 deletions(-)

diff --git a/clover_leaf.c b/clover_leaf.c
index ab30a6245..0dbb9e5fb 100644
--- a/clover_leaf.c
+++ b/clover_leaf.c
@@ -704,7 +704,7 @@ inline void add_shift_6x6(_Complex double a[6][6], const double mshift) {
 
 // This function computes
 //
-// 1/((1+T)^2 + barmu^2 - bareps^)^{-1}
+// 1/((1+T)^2 + barmu^2 - bareps^2)^{-1}
 //
 // for all even x,
 // which is stored in sw_inv[0-(VOLUME/2-1)]
@@ -738,10 +738,12 @@ void sw_invert_nd(const double mshift) {
       populate_6x6_matrix(a, &v, 3, 0);
       populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
 
+      // compute (1+T)^2 and store in b
       mult_6x6(b, a, a);
-      // we add the mass shift term
+      // we add the mass shift term, which is a real number
       add_shift_6x6(b, mshift);
       // so b = (1+T)^2 + shift
+      // now invert this matrix
       six_invert(&err, b); 
       // here we need to catch the error! 
       if(err > 0 && g_proc_id == 0) {
diff --git a/clovertm_operators.c b/clovertm_operators.c
index 8ff0d2ea6..2fec29d55 100644
--- a/clovertm_operators.c
+++ b/clovertm_operators.c
@@ -236,6 +236,91 @@ void clover_inv(const int ieo, spinor * const l, const double mu) {
   return;
 }
 
+void clover_inv_nd(const int ieo, spinor * const l_s, spinor * const l_c) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+  int icy;
+  su3_vector ALIGN psi, chi, phi1, phi3;
+  int ioff = 0;
+  const su3 *w1, *w2, *w3, *w4;
+  spinor *rn_s, *rn_c;
+
+
+  if(ieo == 1) ioff = VOLUME/2;
+
+#ifndef OMP
+  icy = ioff;
+#endif
+  /************************ loop over all lattice sites *************************/
+#ifdef OMP
+#pragma omp for
+#endif
+  for(unsigned int icx = 0; icx < (VOLUME/2); icx++) {
+#ifdef OMP
+    icy = ioff + icx;
+#endif
+
+    rn_s = l_s + icx;
+    rn_c = l_c + icx;
+    _vector_assign(phi1,(*rn_s).s0);
+
+    w1=&sw_inv[icy][0][0];
+    w2=w1+2;  /* &sw_inv[icy][1][0]; */
+    w3=w1+4;  /* &sw_inv[icy][2][0]; */
+    w4=w1+6;  /* &sw_inv[icy][3][0]; */
+    _su3_multiply(psi, *w1, phi1); 
+    _su3_multiply(chi, *w2, (*rn_s).s1);
+    _vector_add((*rn_s).s0, psi,chi);
+    _su3_multiply(psi, *w4, phi1); 
+    _su3_multiply(chi, *w3, (*rn_s).s1);
+    _vector_add((*rn_s).s1, psi, chi);
+
+    _vector_assign(phi1,(*rn_c).s0);
+
+    _su3_multiply(psi, *w1, phi1); 
+    _su3_multiply(chi, *w2, (*rn_c).s1);
+    _vector_add((*rn_c).s0, psi,chi);
+    _su3_multiply(psi, *w4, phi1); 
+    _su3_multiply(chi, *w3, (*rn_c).s1);
+    _vector_add((*rn_c).s1, psi, chi);
+
+    _vector_assign(phi3,(*rn_s).s2);
+
+    w1++; /* &sw_inv[icy][0][1]; */
+    w2++; /* &sw_inv[icy][1][1]; */
+    w3++; /* &sw_inv[icy][2][1]; */
+    w4++; /* &sw_inv[icy][3][1]; */
+    _su3_multiply(psi, *w1, phi3); 
+    _su3_multiply(chi, *w2, (*rn_s).s3);
+    _vector_add((*rn_s).s2, psi, chi);
+    _su3_multiply(psi, *w4, phi3); 
+    _su3_multiply(chi, *w3, (*rn_s).s3);
+    _vector_add((*rn_s).s3, psi, chi);
+
+    _vector_assign(phi3,(*rn_c).s2);
+
+    _su3_multiply(psi, *w1, phi3); 
+    _su3_multiply(chi, *w2, (*rn_c).s3);
+    _vector_add((*rn_c).s2, psi, chi);
+    _su3_multiply(psi, *w4, phi3); 
+    _su3_multiply(chi, *w3, (*rn_c).s3);
+    _vector_add((*rn_c).s3, psi, chi);
+
+#ifndef OMP
+    ++icy;
+#endif
+
+    /******************************** end of loop *********************************/
+  }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+  return;
+}
+
+
 /**************************************************************
  *
  * clover_gamma5 applies the clover term to spinor k, adds k 
@@ -319,8 +404,8 @@ void clover_gamma5(const int ieo,
 
 /**************************************************************
  *
- * clover applies the clover term to spinor k, adds k 
- * to j then and stores it in l
+ * clover applies (1 + T + imug5) to spinor k, 
+ * subtracts j from k and stores in l
  *
  * it is assumed that the clover leaf is computed and stored
  * in sw[VOLUME][3][2]
@@ -338,7 +423,7 @@ void clover(const int ieo,
 #endif
   su3_vector ALIGN chi, psi1, psi2;
   int ix;
-  int ioff,icx;
+  int ioff;
   const su3 *w1,*w2,*w3;
   spinor *r;
   const spinor *s,*t;
@@ -349,11 +434,10 @@ void clover(const int ieo,
   else {
     ioff = (VOLUME+RAND)/2;
   }
-  /************************ loop over all lattice sites *************************/
 #ifdef OMP
 #pragma omp for
 #endif
-  for(icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+  for(unsigned int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
     ix = g_eo2lexic[icx];
     
     r = l + icx-ioff;
@@ -402,6 +486,268 @@ void clover(const int ieo,
   return;
 }
 
+/**************************************************************
+ *
+ * clover_nd applies the clover (1 + T + imug5tau3 + epstau1) 
+ * term to spinor k, subtracts j from k and stores in l
+ *
+ * it is assumed that the clover leaf is computed and stored
+ * in sw[VOLUME][3][2]
+ * the corresponding routine can be found in clover_leaf.c
+ *
+ **************************************************************/
+
+void clover_nd(const int ieo, 
+	       spinor * const l_s, spinor * const l_c, 
+	       const spinor * const k_s, const spinor * const k_c, 
+	       const spinor * const j_s, const spinor * const j_c,
+	       const double mubar, const double epsbar) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+  su3_vector ALIGN chi, psi1, psi2;
+  int ix;
+  int ioff;
+  const su3 *w1,*w2,*w3;
+  spinor *r_s, *r_c;
+  const spinor *s_s, *s_c, *t_s, *t_c;
+  
+  if(ieo == 0) {
+    ioff = 0;
+  } 
+  else {
+    ioff = (VOLUME+RAND)/2;
+  }
+  /************************ loop over all lattice sites *************************/
+#ifdef OMP
+#pragma omp for
+#endif
+  for(unsigned int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+    ix = g_eo2lexic[icx];
+    
+    r_s = l_s + icx-ioff;
+    r_c = l_c + icx-ioff;
+    s_s = k_s + icx-ioff;
+    s_c = k_c + icx-ioff;
+    t_s = j_s + icx-ioff;
+    t_c = j_c + icx-ioff;
+
+    // upper two spin components first
+    w1=&sw[ix][0][0];
+    w2=w1+2; /*&sw[ix][1][0];*/
+    w3=w1+4; /*&sw[ix][2][0];*/
+    _su3_multiply(psi1, *w1, (*s_s).s0); 
+    _su3_multiply(chi, *w2, (*s_s).s1);
+    _vector_add_assign(psi1, chi);
+    _su3_inverse_multiply(psi2, *w2, (*s_s).s0); 
+    _su3_multiply(chi, *w3, (*s_s).s1);
+    _vector_add_assign(psi2, chi); 
+
+    // add in the twisted mass term (plus in the upper components)
+    _vector_add_i_mul(psi1, mubar, (*s_s).s0);
+    _vector_add_i_mul(psi2, mubar, (*s_s).s1);
+
+    _vector_add_mul(psi1, epsbar, (*s_c).s0);
+    _vector_add_mul(psi2, epsbar, (*s_c).s1);
+
+    _vector_sub((*r_s).s0, psi1, (*t_s).s0);
+    _vector_sub((*r_s).s1, psi2, (*t_s).s1);
+
+    _su3_multiply(psi1, *w1, (*s_c).s0); 
+    _su3_multiply(chi, *w2, (*s_c).s1);
+    _vector_add_assign(psi1, chi);
+    _su3_inverse_multiply(psi2, *w2, (*s_c).s0); 
+    _su3_multiply(chi, *w3, (*s_c).s1);
+    _vector_add_assign(psi2, chi); 
+
+    // add in the twisted mass term (plus in the upper components)
+    _vector_add_i_mul(psi1, -mubar, (*s_c).s0);
+    _vector_add_i_mul(psi2, -mubar, (*s_c).s1);
+
+    _vector_add_mul(psi1, epsbar, (*s_s).s0);
+    _vector_add_mul(psi2, epsbar, (*s_s).s1);
+
+    _vector_sub((*r_c).s0, psi1, (*t_c).s0);
+    _vector_sub((*r_c).s1, psi2, (*t_c).s1);
+
+
+    // now lower to spin components
+    w1++; /*=&sw[ix][0][1];*/
+    w2++; /*=&sw[ix][1][1];*/
+    w3++; /*=&sw[ix][2][1];*/
+    _su3_multiply(psi1, *w1, (*s_s).s2); 
+    _su3_multiply(chi, *w2, (*s_s).s3);
+    _vector_add_assign(psi1, chi); 
+    _su3_inverse_multiply(psi2, *w2, (*s_s).s2); 
+    _su3_multiply(chi, *w3, (*s_s).s3);
+    _vector_add_assign(psi2, chi); 
+
+    // add in the twisted mass term (minus from g5 in the lower components)
+    _vector_add_i_mul(psi1, -mubar, (*s_s).s2);
+    _vector_add_i_mul(psi2, -mubar, (*s_s).s3);
+
+    _vector_add_mul(psi1, epsbar, (*s_c).s2);
+    _vector_add_mul(psi2, epsbar, (*s_c).s3);
+
+    _vector_sub((*r_s).s2,psi1,(*t_s).s2);
+    _vector_sub((*r_s).s3,psi2,(*t_s).s3);
+
+    _su3_multiply(psi1, *w1, (*s_c).s2); 
+    _su3_multiply(chi, *w2, (*s_c).s3);
+    _vector_add_assign(psi1, chi); 
+    _su3_inverse_multiply(psi2, *w2, (*s_c).s2); 
+    _su3_multiply(chi, *w3, (*s_c).s3);
+    _vector_add_assign(psi2, chi); 
+
+    // add in the twisted mass term (minus from g5 in the lower components)
+    _vector_add_i_mul(psi1, mubar, (*s_c).s2);
+    _vector_add_i_mul(psi2, mubar, (*s_c).s3);
+
+    _vector_add_mul(psi1, epsbar, (*s_s).s2);
+    _vector_add_mul(psi2, epsbar, (*s_s).s3);
+
+    _vector_sub((*r_c).s2, psi1, (*t_c).s2);
+    _vector_sub((*r_c).s3, psi2, (*t_c).s3);
+  }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+  return;
+}
+
+void clover_gamma5_nd(const int ieo, 
+		      spinor * const l_s, spinor * const l_c, 
+		      const spinor * const k_s, const spinor * const k_c, 
+		      const spinor * const j_s, const spinor * const j_c,
+		      const double mubar, const double epsbar) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+  su3_vector ALIGN chi, psi1, psi2;
+  int ix;
+  int ioff;
+  const su3 *w1,*w2,*w3;
+  spinor *r_s, *r_c;
+  const spinor *s_s, *s_c, *t_s, *t_c;
+  
+  if(ieo == 0) {
+    ioff = 0;
+  } 
+  else {
+    ioff = (VOLUME+RAND)/2;
+  }
+  /************************ loop over all lattice sites *************************/
+#ifdef OMP
+#pragma omp for
+#endif
+  for(unsigned int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+    ix = g_eo2lexic[icx];
+    
+    r_s = l_s + icx-ioff;
+    r_c = l_c + icx-ioff;
+    s_s = k_s + icx-ioff;
+    s_c = k_c + icx-ioff;
+    t_s = j_s + icx-ioff;
+    t_c = j_c + icx-ioff;
+
+    // upper two spin components first
+    w1=&sw[ix][0][0];
+    w2=w1+2; /*&sw[ix][1][0];*/
+    w3=w1+4; /*&sw[ix][2][0];*/
+    _su3_multiply(psi1, *w1, (*s_s).s0); 
+    _su3_multiply(chi, *w2, (*s_s).s1);
+    _vector_add_assign(psi1, chi);
+    _su3_inverse_multiply(psi2, *w2, (*s_s).s0); 
+    _su3_multiply(chi, *w3, (*s_s).s1);
+    _vector_add_assign(psi2, chi); 
+
+    // add in the twisted mass term (plus in the upper components)
+    _vector_add_i_mul(psi1, mubar, (*s_s).s0);
+    _vector_add_i_mul(psi2, mubar, (*s_s).s1);
+
+    _vector_add_mul(psi1, epsbar, (*s_c).s0);
+    _vector_add_mul(psi2, epsbar, (*s_c).s1);
+
+    _vector_sub((*r_s).s0, psi1, (*t_s).s0);
+    _vector_sub((*r_s).s1, psi2, (*t_s).s1);
+
+    _su3_multiply(psi1, *w1, (*s_c).s0); 
+    _su3_multiply(chi, *w2, (*s_c).s1);
+    _vector_add_assign(psi1, chi);
+    _su3_inverse_multiply(psi2, *w2, (*s_c).s0); 
+    _su3_multiply(chi, *w3, (*s_c).s1);
+    _vector_add_assign(psi2, chi); 
+
+    // add in the twisted mass term (plus in the upper components)
+    _vector_add_i_mul(psi1, -mubar, (*s_c).s0);
+    _vector_add_i_mul(psi2, -mubar, (*s_c).s1);
+
+    _vector_add_mul(psi1, epsbar, (*s_s).s0);
+    _vector_add_mul(psi2, epsbar, (*s_s).s1);
+
+    _vector_sub((*r_c).s0, psi1, (*t_c).s0);
+    _vector_sub((*r_c).s1, psi2, (*t_c).s1);
+
+
+    // now lower to spin components
+    w1++; /*=&sw[ix][0][1];*/
+    w2++; /*=&sw[ix][1][1];*/
+    w3++; /*=&sw[ix][2][1];*/
+    _su3_multiply(psi1, *w1, (*s_s).s2); 
+    _su3_multiply(chi, *w2, (*s_s).s3);
+    _vector_add_assign(psi1, chi); 
+    _su3_inverse_multiply(psi2, *w2, (*s_s).s2); 
+    _su3_multiply(chi, *w3, (*s_s).s3);
+    _vector_add_assign(psi2, chi); 
+
+    // add in the twisted mass term (minus from g5 in the lower components)
+    _vector_add_i_mul(psi1, -mubar, (*s_s).s2);
+    _vector_add_i_mul(psi2, -mubar, (*s_s).s3);
+
+    _vector_add_mul(psi1, epsbar, (*s_c).s2);
+    _vector_add_mul(psi2, epsbar, (*s_c).s3);
+
+    _vector_sub((*r_s).s2, (*t_s).s2, psi1);
+    _vector_sub((*r_s).s3, (*t_s).s3, psi2);
+
+    _su3_multiply(psi1, *w1, (*s_c).s2); 
+    _su3_multiply(chi, *w2, (*s_c).s3);
+    _vector_add_assign(psi1, chi); 
+    _su3_inverse_multiply(psi2, *w2, (*s_c).s2); 
+    _su3_multiply(chi, *w3, (*s_c).s3);
+    _vector_add_assign(psi2, chi); 
+
+    // add in the twisted mass term (minus from g5 in the lower components)
+    _vector_add_i_mul(psi1, mubar, (*s_c).s2);
+    _vector_add_i_mul(psi2, mubar, (*s_c).s3);
+
+    _vector_add_mul(psi1, epsbar, (*s_s).s2);
+    _vector_add_mul(psi2, epsbar, (*s_s).s3);
+
+    _vector_sub((*r_c).s2, (*t_c).s2, psi1);
+    _vector_sub((*r_c).s3, (*t_c).s3, psi2);
+  }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+  return;
+}
+
+
+/**************************************************************
+ *
+ * assign_mul_one_sw_pm_imu applies (1 + T + imug5) to spinor l
+ * and stores it in k
+ *
+ * it is assumed that the clover leaf is computed and stored
+ * in sw[VOLUME][3][2]
+ * the corresponding routine can be found in clover_leaf.c
+ *
+ **************************************************************/
+
+
 void assign_mul_one_sw_pm_imu(const int ieo, 
 			      spinor * const k, const spinor * const l,
 			      const double mu) {
@@ -411,7 +757,7 @@ void assign_mul_one_sw_pm_imu(const int ieo,
 #endif
   su3_vector ALIGN chi, psi1, psi2;
   int ix;
-  int ioff, icx;
+  int ioff;
   const su3 *w1, *w2, *w3;
   spinor *r;
   const spinor *s;
@@ -426,7 +772,7 @@ void assign_mul_one_sw_pm_imu(const int ieo,
 #ifdef OMP
 #pragma omp for
 #endif
-  for(icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+  for(unsigned icx = ioff; icx < (VOLUME/2+ioff); icx++) {
     ix = g_eo2lexic[icx];
     
     r = k + icx-ioff;
@@ -474,6 +820,136 @@ void assign_mul_one_sw_pm_imu(const int ieo,
   return;
 }
 
+/**************************************************************
+ *
+ * assign_mul_one_sw_pm_imu_eps applies 
+ * (1 + T + imug5tau3 + epstau1) to spinor l
+ * and stores it in k
+ *
+ * it is assumed that the clover leaf is computed and stored
+ * in sw[VOLUME][3][2]
+ * the corresponding routine can be found in clover_leaf.c
+ *
+ **************************************************************/
+
+
+void assign_mul_one_sw_pm_imu_eps(const int ieo, 
+				  spinor * const k_s, spinor * const k_c, 
+				  const spinor * const l_s, const spinor * const l_c,
+				  const double mu, const double eps) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+  su3_vector ALIGN chi, psi1, psi2;
+  int ix;
+  int ioff;
+  const su3 *w1, *w2, *w3;
+  spinor *r_s, *r_c;
+  const spinor *s_s, *s_c;
+  
+  if(ieo == 0) {
+    ioff = 0;
+  } 
+  else {
+    ioff = (VOLUME+RAND)/2;
+  }
+  /************************ loop over all lattice sites *************************/
+#ifdef OMP
+#pragma omp for
+#endif
+  for(unsigned int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+    ix = g_eo2lexic[icx];
+    
+    r_s = k_s + icx-ioff;
+    r_c = k_c + icx-ioff;
+    s_s = l_s + icx-ioff;
+    s_c = l_c + icx-ioff;
+
+    // upper two spin components first
+    w1=&sw[ix][0][0];
+    w2=w1+2; /*&sw[ix][1][0];*/
+    w3=w1+4; /*&sw[ix][2][0];*/
+    _su3_multiply(psi1, *w1, (*s_s).s0); 
+    _su3_multiply(chi, *w2, (*s_s).s1);
+    _vector_add_assign(psi1, chi);
+    _su3_inverse_multiply(psi2, *w2, (*s_s).s0); 
+    _su3_multiply(chi, *w3, (*s_s).s1);
+    _vector_add_assign(psi2, chi); 
+
+    // add in the twisted mass term (plus in the upper components)
+    _vector_add_i_mul(psi1, mu, (*s_s).s0);
+    _vector_add_i_mul(psi2, mu, (*s_s).s1);
+
+    _vector_add_mul(psi1, eps, (*s_c).s0);
+    _vector_add_mul(psi2, eps, (*s_c).s1);
+
+    _vector_assign((*r_s).s0, psi1);
+    _vector_assign((*r_s).s1, psi2);
+
+    _su3_multiply(psi1,*w1, (*s_c).s0); 
+    _su3_multiply(chi,*w2, (*s_c).s1);
+    _vector_add_assign(psi1, chi);
+    _su3_inverse_multiply(psi2, *w2, (*s_c).s0); 
+    _su3_multiply(chi, *w3, (*s_c).s1);
+    _vector_add_assign(psi2, chi); 
+
+    // add in the twisted mass term (plus in the upper components)
+    _vector_add_i_mul(psi1, -mu, (*s_c).s0);
+    _vector_add_i_mul(psi2, -mu, (*s_c).s1);
+
+    _vector_add_mul(psi1, eps, (*s_s).s0);
+    _vector_add_mul(psi2, eps, (*s_s).s1);
+
+    _vector_assign((*r_c).s0, psi1);
+    _vector_assign((*r_c).s1, psi2);
+
+    // now lower to spin components
+    w1++; /*=&sw[ix][0][1];*/
+    w2++; /*=&sw[ix][1][1];*/
+    w3++; /*=&sw[ix][2][1];*/
+    _su3_multiply(psi1, *w1, (*s_s).s2); 
+    _su3_multiply(chi, *w2, (*s_s).s3);
+    _vector_add_assign(psi1, chi); 
+    _su3_inverse_multiply(psi2, *w2, (*s_s).s2); 
+    _su3_multiply(chi, *w3, (*s_s).s3);
+    _vector_add_assign(psi2, chi); 
+
+    // add in the twisted mass term (minus from g5 in the lower components)
+    _vector_add_i_mul(psi1, -mu, (*s_s).s2);
+    _vector_add_i_mul(psi2, -mu, (*s_s).s3);
+
+    _vector_add_mul(psi1, eps, (*s_c).s0);
+    _vector_add_mul(psi2, eps, (*s_c).s1);
+
+    _vector_assign((*r_s).s2, psi1);
+    _vector_assign((*r_s).s3, psi2);
+
+    _su3_multiply(psi1, *w1, (*s_c).s2); 
+    _su3_multiply(chi, *w2, (*s_c).s3);
+    _vector_add_assign(psi1, chi); 
+    _su3_inverse_multiply(psi2, *w2, (*s_c).s2); 
+    _su3_multiply(chi, *w3, (*s_c).s3);
+    _vector_add_assign(psi2, chi); 
+
+    // add in the twisted mass term (minus from g5 in the lower components)
+    _vector_add_i_mul(psi1, mu, (*s_c).s2);
+    _vector_add_i_mul(psi2, mu, (*s_c).s3);
+
+    _vector_add_mul(psi1, eps, (*s_s).s0);
+    _vector_add_mul(psi2, eps, (*s_s).s1);
+
+    _vector_assign((*r_c).s2, psi1);
+    _vector_assign((*r_c).s3, psi2);
+
+  }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+  return;
+}
+
+
 
 void assign_mul_one_sw_pm_imu_inv(const int ieo, 
 				  spinor * const k, const spinor * const l,
diff --git a/clovertm_operators.h b/clovertm_operators.h
index 365863558..318cef2ca 100644
--- a/clovertm_operators.h
+++ b/clovertm_operators.h
@@ -45,4 +45,20 @@ void Msw_minus_psi(spinor * const l, spinor * const k);
 void H_eo_sw_inv_psi(spinor * const l, spinor * const k, const int ieo, const double mu);
 void init_sw_fields();
 
+void clover_nd(const int ieo, 
+	       spinor * const l_s, spinor * const l_c, 
+	       const spinor * const k_s, const spinor * const k_c, 
+	       const spinor * const j_s, const spinor * const j_c,
+	       const double mubar, const double epsbar);
+void clover_gamma5_nd(const int ieo, 
+		      spinor * const l_s, spinor * const l_c, 
+		      const spinor * const k_s, const spinor * const k_c, 
+		      const spinor * const j_s, const spinor * const j_c,
+		      const double mubar, const double epsbar);
+void clover_inv_nd(const int ieo, spinor * const l_s, spinor * const l_c);
+
+void assign_mul_one_sw_pm_imu_eps(const int ieo, 
+				  spinor * const k_s, spinor * const k_c, 
+				  const spinor * const l_s, const spinor * const l_c,
+				  const double mu, const double eps);
 #endif
diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index 55641f680..9da736e76 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -39,6 +39,7 @@
 #include "linsolve.h"
 #include "linalg_eo.h"
 #include "tm_operators.h"
+#include "clovertm_operators.h"
 #include "tm_operators_nd.h"
 
 
@@ -181,7 +182,7 @@ void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
  * on a half spinor
  ******************************************/
 void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
-                           spinor * const k_strange, spinor * const k_charm){
+		  spinor * const k_strange, spinor * const k_charm){
 
   double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
 
@@ -564,7 +565,7 @@ void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 }
 
 void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		 spinor * const k_strange, spinor * const k_charm) {
+		    const spinor * const k_strange, const spinor * const k_charm) {
   
   double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
 
@@ -582,6 +583,20 @@ void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm,
 
 }
 
+// for this routine we need to have sw_invert_nd and sw_term called before hand
+// and the clover term must be initialised
+void Msw_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
+		      const spinor * const k_strange, const spinor * const k_charm) {
+  
+
+  /* recall:   strange <-> up    while    charm <-> dn   */
+
+  assign_mul_one_sw_pm_imu_eps(EE, l_strange, l_charm, k_strange, k_charm, -g_mubar, g_epsbar);
+
+  clover_inv_nd(EE, l_strange, l_charm);
+  return;
+}
+
 
 
 void Q_test_epsilon(spinor * const l_strange, spinor * const l_charm,
diff --git a/tm_operators_nd.h b/tm_operators_nd.h
index 4625e2ab0..0b455f3b1 100644
--- a/tm_operators_nd.h
+++ b/tm_operators_nd.h
@@ -46,7 +46,10 @@ void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 	     const int ieo);
 
 void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		 spinor * const k_strange, spinor * const k_charm);
+		    const spinor * const k_strange, const spinor * const k_charm);
+
+void Msw_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
+		      const spinor * const k_strange, const spinor * const k_charm);
 
 void Q_test_epsilon(spinor * const l_strange, spinor * const l_charm,
                     spinor * const k_strange, spinor * const k_charm);

From 633b692d6ab67e57c463bba12107140cfdfd03b5 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sat, 6 Oct 2012 18:46:25 +0200
Subject: [PATCH 029/110] DBCLOVER introduced in input file

---
 clover_leaf.h       |   3 +
 doc/input.tex       |  14 ++
 invert_doublet_eo.c | 395 +++++++++++++++++++++++++-------------------
 invert_doublet_eo.h |   7 +
 operator.c          |  38 ++++-
 operator.h          |   1 +
 prepare_source.c    |   2 +-
 read_input.l        |  15 +-
 8 files changed, 290 insertions(+), 185 deletions(-)

diff --git a/clover_leaf.h b/clover_leaf.h
index 71881e4ad..87454fccf 100644
--- a/clover_leaf.h
+++ b/clover_leaf.h
@@ -36,4 +36,7 @@ void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll);
 void sw_all(hamiltonian_field_t * const hf, const double kappa, const double c_sw);
 int init_swpm(const int V);
 
+double sw_trace_nd(const int ieo, const double mu, const double eps);
+void sw_invert_nd(const double mshift);
+
 #endif
diff --git a/doc/input.tex b/doc/input.tex
index a5e235ec5..9476ec3cb 100644
--- a/doc/input.tex
+++ b/doc/input.tex
@@ -568,12 +568,26 @@ \subsubsection{Chosing the Operator for Inversions}
   \item {\ttfamily 2KappaMu}
   \item {\ttfamily UseEvenOdd}
   \end{itemize}
+\item {\ttfamily CLOVER}: Clover Twisted Mass Dirac operator, with
+  options:
+  \begin{itemize}
+  \item {\ttfamily 2KappaMu}
+  \item {\ttfamily UseEvenOdd}
+  \item {\ttfamily CSW}
+  \end{itemize}
 \item {\ttfamily DBTMWILSON}: two flavour mass non-degenerate Wilson
   Twisted Mass Dirac operator:
   \begin{itemize}
   \item {\ttfamily 2KappaMubar}
   \item {\ttfamily 2KappaEpsbar}
   \end{itemize}
+\item {\ttfamily DBCLOVER}: two flavour mass non-degenerate Clover
+  Twisted Mass Dirac operator:
+  \begin{itemize}
+  \item {\ttfamily CSW}
+  \item {\ttfamily 2KappaMubar}
+  \item {\ttfamily 2KappaEpsbar}
+  \end{itemize}
 \item {\ttfamily OVERLAP}: overlap  operator:
   \begin{itemize}
   \item {\ttfamily m}
diff --git a/invert_doublet_eo.c b/invert_doublet_eo.c
index 375710028..687b9ea44 100644
--- a/invert_doublet_eo.c
+++ b/invert_doublet_eo.c
@@ -50,14 +50,14 @@
 
 
 #ifdef HAVE_GPU
-  #include"GPU/cudadefs.h"
-  #include"temporalgauge.h"
-  #include"measure_gauge_action.h"
-  int mixedsolve_eo_nd (spinor *, spinor *, spinor *, spinor *, int, double, int);
-  int mixedsolve_eo_nd_mpi(spinor *, spinor *, spinor *, spinor *, int, double, int);
-  #ifdef TEMPORALGAUGE
-    extern su3* g_trafo;
-  #endif
+#  include"GPU/cudadefs.h"
+#  include"temporalgauge.h"
+#  include"measure_gauge_action.h"
+int mixedsolve_eo_nd (spinor *, spinor *, spinor *, spinor *, int, double, int);
+int mixedsolve_eo_nd_mpi(spinor *, spinor *, spinor *, spinor *, int, double, int);
+#  ifdef TEMPORALGAUGE
+extern su3* g_trafo;
+#  endif
 #endif
 
 
@@ -71,84 +71,84 @@ int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
   int iter = 0;
   
   
-  #ifdef HAVE_GPU
-  #ifdef TEMPORALGAUGE
+#ifdef HAVE_GPU
+#  ifdef TEMPORALGAUGE
   
-    /* initialize temporal gauge here */
-    int retval;
-    double dret1, dret2;
-    double plaquette1 = 0.0;
-    double plaquette2 = 0.0;
-
-    if (usegpu_flag) {
+  /* initialize temporal gauge here */
+  int retval;
+  double dret1, dret2;
+  double plaquette1 = 0.0;
+  double plaquette2 = 0.0;
+  
+  if (usegpu_flag) {
+    
+    /* need VOLUME here (not N=VOLUME/2)*/
+    if ((retval = init_temporalgauge_trafo(VOLUME, g_gauge_field)) != 0 ) {				// initializes the transformation matrices
+      if (g_proc_id == 0) printf("Error while gauge fixing to temporal gauge. Aborting...\n");   	//	g_tempgauge_field as a copy of g_gauge_field
+      exit(200);
+    }
+    
+    /* do trafo */
+    plaquette1 = measure_gauge_action(g_gauge_field);
+    apply_gtrafo(g_gauge_field, g_trafo);								// transformation of the gauge field
+    plaquette2 = measure_gauge_action(g_gauge_field);
+    if (g_proc_id == 0) printf("\tPlaquette before gauge fixing: %.16e\n", plaquette1/6./VOLUME);
+    if (g_proc_id == 0) printf("\tPlaquette after gauge fixing:  %.16e\n", plaquette2/6./VOLUME);
+    
+    /* do trafo to odd_s part of source */
+    dret1 = square_norm(Odd_s, VOLUME/2 , 1);
+    apply_gtrafo_spinor_odd(Odd_s, g_trafo);								// odd spinor transformation, strange
+    dret2 = square_norm(Odd_s, VOLUME/2, 1);
+    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
+    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
+    
+    /* do trafo to odd_c part of source */
+    dret1 = square_norm(Odd_c, VOLUME/2 , 1);
+    apply_gtrafo_spinor_odd(Odd_c, g_trafo);								// odd spinor transformation, charm
+    dret2 = square_norm(Odd_c, VOLUME/2, 1);
+    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
+    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);       
+    
+    /* do trafo to even_s part of source */
+    dret1 = square_norm(Even_s, VOLUME/2 , 1);
+    apply_gtrafo_spinor_even(Even_s, g_trafo);							// even spinor transformation, strange
+    dret2 = square_norm(Even_s, VOLUME/2, 1);
+    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
+    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
+    
+    /* do trafo to even_c part of source */
+    dret1 = square_norm(Even_c, VOLUME/2 , 1);
+    apply_gtrafo_spinor_even(Even_c, g_trafo);							// even spinor transformation, charm
+    dret2 = square_norm(Even_c, VOLUME/2, 1);
+    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
+    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
+    
+#    ifdef MPI
+    xchange_gauge(g_gauge_field);
+#    endif
     
-      /* need VOLUME here (not N=VOLUME/2)*/
-      if ((retval = init_temporalgauge_trafo(VOLUME, g_gauge_field)) != 0 ) {				// initializes the transformation matrices
-	if (g_proc_id == 0) printf("Error while gauge fixing to temporal gauge. Aborting...\n");   	//	g_tempgauge_field as a copy of g_gauge_field
-	  exit(200);
-      }
-      
-      /* do trafo */
-      plaquette1 = measure_gauge_action(g_gauge_field);
-      apply_gtrafo(g_gauge_field, g_trafo);								// transformation of the gauge field
-      plaquette2 = measure_gauge_action(g_gauge_field);
-      	if (g_proc_id == 0) printf("\tPlaquette before gauge fixing: %.16e\n", plaquette1/6./VOLUME);
-      	if (g_proc_id == 0) printf("\tPlaquette after gauge fixing:  %.16e\n", plaquette2/6./VOLUME);
-      
-      /* do trafo to odd_s part of source */
-      dret1 = square_norm(Odd_s, VOLUME/2 , 1);
-      apply_gtrafo_spinor_odd(Odd_s, g_trafo);								// odd spinor transformation, strange
-      dret2 = square_norm(Odd_s, VOLUME/2, 1);
-      	if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
-      	if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
-      
-      /* do trafo to odd_c part of source */
-      dret1 = square_norm(Odd_c, VOLUME/2 , 1);
-      apply_gtrafo_spinor_odd(Odd_c, g_trafo);								// odd spinor transformation, charm
-      dret2 = square_norm(Odd_c, VOLUME/2, 1);
-      	if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
-      	if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);       
-      
-      /* do trafo to even_s part of source */
-      dret1 = square_norm(Even_s, VOLUME/2 , 1);
-      apply_gtrafo_spinor_even(Even_s, g_trafo);							// even spinor transformation, strange
-      dret2 = square_norm(Even_s, VOLUME/2, 1);
-      	if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
-      	if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
-      
-      /* do trafo to even_c part of source */
-      dret1 = square_norm(Even_c, VOLUME/2 , 1);
-      apply_gtrafo_spinor_even(Even_c, g_trafo);							// even spinor transformation, charm
-      dret2 = square_norm(Even_c, VOLUME/2, 1);
-      	if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
-      	if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
-      
-      #ifdef MPI
-        xchange_gauge(g_gauge_field);
-      #endif
-            
-    } 
-#endif  
+  } 
+#  endif  
 #endif /* HAVE_GPU*/
 
 
   /* here comes the inversion using even/odd preconditioning */
   if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);}
   M_ee_inv_ndpsi(Even_new_s, Even_new_c, 
-	      Even_s, Even_c);
+		 Even_s, Even_c);
   Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s);
   Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c);
-
+  
   /* The sign is plus, since in Hopping_Matrix */
   /* the minus is missing                      */
   assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd_s, VOLUME/2);
   assign_mul_add_r(g_spinor_field[DUM_DERI+1], +1., Odd_c, VOLUME/2);
-
+  
   /* Do the inversion with the preconditioned  */
   /* matrix to get the odd sites               */
   
   /* Here we invert the hermitean operator squared */
-
+  
   if(g_proc_id == 0) {
     printf("# Using CG for TMWILSON flavour doublet!\n"); 
     fflush(stdout);
@@ -157,131 +157,186 @@ int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
   gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], VOLUME/2);
   
   
-  #ifdef HAVE_GPU
-    if (usegpu_flag) {	// GPU, mixed precision solver
-      #if defined(MPI) && defined(PARALLELT)
-        iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
-                                    max_iter, precision, rel_prec);
-      #elif !defined(MPI) && !defined(PARALLELT)
-        iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
-                                max_iter, precision, rel_prec);
-      #else
-        printf("MPI and/or PARALLELT are not appropriately set for the GPU implementation. Aborting...\n");
-        exit(-1);
-      #endif
-    }
-    else {		// CPU, conjugate gradient
-      iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
-		       max_iter, precision, rel_prec, 
-		       VOLUME/2, &Qtm_pm_ndpsi);
-    }
-  #else			// CPU, conjugate gradient
+#ifdef HAVE_GPU
+  if (usegpu_flag) {	// GPU, mixed precision solver
+#  if defined(MPI) && defined(PARALLELT)
+    iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
+			    max_iter, precision, rel_prec);
+#  elif !defined(MPI) && !defined(PARALLELT)
+    iter = mixedsolve_eo_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
+			    max_iter, precision, rel_prec);
+#  else
+    printf("MPI and/or PARALLELT are not appropriately set for the GPU implementation. Aborting...\n");
+    exit(-1);
+#  endif
+  }
+  else {		// CPU, conjugate gradient
     iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
 		     max_iter, precision, rel_prec, 
 		     VOLUME/2, &Qtm_pm_ndpsi);
-  #endif
+  }
+#else			// CPU, conjugate gradient
+  iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
+		   max_iter, precision, rel_prec, 
+		   VOLUME/2, &Qtm_pm_ndpsi);
+#endif
   
   
   Qtm_dagger_ndpsi(Odd_new_s, Odd_new_c,
-			Odd_new_s, Odd_new_c);
+		   Odd_new_s, Odd_new_c);
   
   /* Reconstruct the even sites                */
   Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s);
   Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c);
   M_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3],
-	      g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
-
+		 g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+  
   /* The sign is plus, since in Hopping_Matrix */
   /* the minus is missing                      */
   assign_add_mul_r(Even_new_s, g_spinor_field[DUM_DERI+2], +1., VOLUME/2);
   assign_add_mul_r(Even_new_c, g_spinor_field[DUM_DERI+3], +1., VOLUME/2);
   
   
-  #ifdef HAVE_GPU  
-    /* return from temporal gauge again */
-  #ifdef TEMPORALGAUGE
+#ifdef HAVE_GPU  
+  /* return from temporal gauge again */
+#  ifdef TEMPORALGAUGE
   
-    if (usegpu_flag) { 
+  if (usegpu_flag) { 
     
-      /* undo trafo */
-      /* apply_inv_gtrafo(g_gauge_field, g_trafo);*/
-      /* copy back the saved original field located in g_tempgauge_field -> update necessary*/
-      plaquette1 = measure_gauge_action(g_gauge_field);
-      copy_gauge_field(g_gauge_field, g_tempgauge_field);
-      g_update_gauge_copy = 1;
-      plaquette2 = measure_gauge_action(g_gauge_field);
-      	if (g_proc_id == 0) printf("\tPlaquette before inverse gauge fixing: %.16e\n", plaquette1/6./VOLUME);
-      	if (g_proc_id == 0) printf("\tPlaquette after inverse gauge fixing:  %.16e\n", plaquette2/6./VOLUME);
-   
-      /* undo trafo to source Even_s */
-      dret1 = square_norm(Even_s, VOLUME/2 , 1);
-      apply_inv_gtrafo_spinor_even(Even_s, g_trafo);
-      dret2 = square_norm(Even_s, VOLUME/2, 1);
-      	if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
-      	if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
-      
-      
-      /* undo trafo to source Even_c */
-      dret1 = square_norm(Even_c, VOLUME/2 , 1);
-      apply_inv_gtrafo_spinor_even(Even_c, g_trafo);
-      dret2 = square_norm(Even_c, VOLUME/2, 1);
-      	if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1);
-      	if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2); 
-      
-      /* undo trafo to source Odd_s */
-      dret1 = square_norm(Odd_s, VOLUME/2 , 1);
-      apply_inv_gtrafo_spinor_odd(Odd_s, g_trafo);
-      dret2 = square_norm(Odd_s, VOLUME/2, 1);
-      	if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
-      	if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
-      
-      /* undo trafo to source Odd_c */
-      dret1 = square_norm(Odd_c, VOLUME/2 , 1);
-      apply_inv_gtrafo_spinor_odd(Odd_c, g_trafo);
-      dret2 = square_norm(Odd_c, VOLUME/2, 1);
-      	if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
-      	if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2); 
+    /* undo trafo */
+    /* apply_inv_gtrafo(g_gauge_field, g_trafo);*/
+    /* copy back the saved original field located in g_tempgauge_field -> update necessary*/
+    plaquette1 = measure_gauge_action(g_gauge_field);
+    copy_gauge_field(g_gauge_field, g_tempgauge_field);
+    g_update_gauge_copy = 1;
+    plaquette2 = measure_gauge_action(g_gauge_field);
+    if (g_proc_id == 0) printf("\tPlaquette before inverse gauge fixing: %.16e\n", plaquette1/6./VOLUME);
+    if (g_proc_id == 0) printf("\tPlaquette after inverse gauge fixing:  %.16e\n", plaquette2/6./VOLUME);
     
+    /* undo trafo to source Even_s */
+    dret1 = square_norm(Even_s, VOLUME/2 , 1);
+    apply_inv_gtrafo_spinor_even(Even_s, g_trafo);
+    dret2 = square_norm(Even_s, VOLUME/2, 1);
+    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
+    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
     
-      // Even_new_s
-      dret1 = square_norm(Even_new_s, VOLUME/2 , 1);
-      apply_inv_gtrafo_spinor_even(Even_new_s, g_trafo);
-      dret2 = square_norm(Even_new_s, VOLUME/2, 1);
-      	if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
-      	if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
-      
-      // Even_new_c
-      dret1 = square_norm(Even_new_c, VOLUME/2 , 1);
-      apply_inv_gtrafo_spinor_even(Even_new_c, g_trafo);
-      dret2 = square_norm(Even_new_c, VOLUME/2, 1);
-      	if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
-      	if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
-      
-      // Odd_new_s
-      dret1 = square_norm(Odd_new_s, VOLUME/2 , 1);
-      apply_inv_gtrafo_spinor_odd(Odd_new_s, g_trafo);
-      dret2 = square_norm(Odd_new_s, VOLUME/2, 1);
-      	if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
-      	if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
-      
-      // Odd_new_c
-      dret1 = square_norm(Odd_new_c, VOLUME/2 , 1);
-      apply_inv_gtrafo_spinor_odd(Odd_new_c, g_trafo);
-      dret2 = square_norm(Odd_new_c, VOLUME/2, 1);
-      	if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
-      	if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2); 
-  
-      finalize_temporalgauge();
-      
-      #ifdef MPI
-        xchange_gauge(g_gauge_field);
-      #endif
-      
-    }
-  #endif
-  #endif
+    
+    /* undo trafo to source Even_c */
+    dret1 = square_norm(Even_c, VOLUME/2 , 1);
+    apply_inv_gtrafo_spinor_even(Even_c, g_trafo);
+    dret2 = square_norm(Even_c, VOLUME/2, 1);
+    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1);
+    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2); 
+    
+    /* undo trafo to source Odd_s */
+    dret1 = square_norm(Odd_s, VOLUME/2 , 1);
+    apply_inv_gtrafo_spinor_odd(Odd_s, g_trafo);
+    dret2 = square_norm(Odd_s, VOLUME/2, 1);
+    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
+    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
+    
+    /* undo trafo to source Odd_c */
+    dret1 = square_norm(Odd_c, VOLUME/2 , 1);
+    apply_inv_gtrafo_spinor_odd(Odd_c, g_trafo);
+    dret2 = square_norm(Odd_c, VOLUME/2, 1);
+    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
+    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2); 
+    
+    
+    // Even_new_s
+    dret1 = square_norm(Even_new_s, VOLUME/2 , 1);
+    apply_inv_gtrafo_spinor_even(Even_new_s, g_trafo);
+    dret2 = square_norm(Even_new_s, VOLUME/2, 1);
+    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
+    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
+    
+    // Even_new_c
+    dret1 = square_norm(Even_new_c, VOLUME/2 , 1);
+    apply_inv_gtrafo_spinor_even(Even_new_c, g_trafo);
+    dret2 = square_norm(Even_new_c, VOLUME/2, 1);
+    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
+    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
+    
+    // Odd_new_s
+    dret1 = square_norm(Odd_new_s, VOLUME/2 , 1);
+    apply_inv_gtrafo_spinor_odd(Odd_new_s, g_trafo);
+    dret2 = square_norm(Odd_new_s, VOLUME/2, 1);
+    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
+    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2);
+    
+    // Odd_new_c
+    dret1 = square_norm(Odd_new_c, VOLUME/2 , 1);
+    apply_inv_gtrafo_spinor_odd(Odd_new_c, g_trafo);
+    dret2 = square_norm(Odd_new_c, VOLUME/2, 1);
+    if (g_proc_id == 0) printf("\tsquare norm before gauge fixing: %.16e\n", dret1); 
+    if (g_proc_id == 0) printf("\tsquare norm after gauge fixing:  %.16e\n", dret2); 
+    
+    finalize_temporalgauge();
+    
+#    ifdef MPI
+    xchange_gauge(g_gauge_field);
+#    endif
+    
+  }
+#  endif
+#endif
+  return(iter);
+}
 
 
+int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s, 
+			    spinor * const Even_new_c, spinor * const Odd_new_c, 
+			    spinor * const Even_s, spinor * const Odd_s,
+			    spinor * const Even_c, spinor * const Odd_c,
+			    const double precision, const int max_iter,
+			    const int solver_flag, const int rel_prec) {
+  
+  int iter = 0;
+  
+  
+  /* here comes the inversion using even/odd preconditioning */
+  if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);}
+  M_ee_inv_ndpsi(Even_new_s, Even_new_c, 
+		 Even_s, Even_c);
+  Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s);
+  Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c);
+  
+  /* The sign is plus, since in Hopping_Matrix */
+  /* the minus is missing                      */
+  assign_mul_add_r(g_spinor_field[DUM_DERI], +1., Odd_s, VOLUME/2);
+  assign_mul_add_r(g_spinor_field[DUM_DERI+1], +1., Odd_c, VOLUME/2);
+  
+  /* Do the inversion with the preconditioned  */
+  /* matrix to get the odd sites               */
+  
+  /* Here we invert the hermitean operator squared */
+  
+  if(g_proc_id == 0) {
+    printf("# Using CG for TMWILSON flavour doublet!\n"); 
+    fflush(stdout);
+  }
+  gamma5(g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI], VOLUME/2);
+  gamma5(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+1], VOLUME/2);
+  
+  iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
+		   max_iter, precision, rel_prec, 
+		   VOLUME/2, &Qtm_pm_ndpsi);
+  
+  
+  Qtm_dagger_ndpsi(Odd_new_s, Odd_new_c,
+		   Odd_new_s, Odd_new_c);
+  
+  /* Reconstruct the even sites                */
+  Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s);
+  Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c);
+  M_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3],
+		 g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+  
+  /* The sign is plus, since in Hopping_Matrix */
+  /* the minus is missing                      */
+  assign_add_mul_r(Even_new_s, g_spinor_field[DUM_DERI+2], +1., VOLUME/2);
+  assign_add_mul_r(Even_new_c, g_spinor_field[DUM_DERI+3], +1., VOLUME/2);
+  
   return(iter);
 }
 
diff --git a/invert_doublet_eo.h b/invert_doublet_eo.h
index 610982bd4..d6835f3ff 100644
--- a/invert_doublet_eo.h
+++ b/invert_doublet_eo.h
@@ -45,4 +45,11 @@ int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
 /* 	    spinor * const Even, spinor * const Odd); */
 /* void M_minus_1_timesC(spinor * const Even_new, spinor * const Odd_new,  */
 /* 		      spinor * const Even, spinor * const Odd); */
+
+int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s, 
+			    spinor * const Even_new_c, spinor * const Odd_new_c, 
+			    spinor * const Even_s, spinor * const Odd_s,
+			    spinor * const Even_c, spinor * const Odd_c,
+			    const double precision, const int max_iter,
+			    const int solver_flag, const int rel_prec);
 #endif
diff --git a/operator.c b/operator.c
index 301aade9f..0737ce6c2 100644
--- a/operator.c
+++ b/operator.c
@@ -130,7 +130,7 @@ int add_operator(const int type) {
     optr->m = 0.;
     optr->inverter = &op_invert;
   }
-  if(optr->type == DBTMWILSON) {
+  if(optr->type == DBTMWILSON || optr->type == DBCLOVER) {
     optr->no_flavours = 2;
     g_running_phmc = 1;
   }
@@ -144,7 +144,6 @@ int add_operator(const int type) {
 }
 
 int init_operators() {
-  FILE * ifs;
   int i;
   operator * optr;
   for(i = 0; i < no_operators; i++) {
@@ -193,6 +192,10 @@ int init_operators() {
       /* 	exit(0); */
       /*       } */
     }
+    else if(optr->type == DBCLOVER) {
+      optr->even_odd_flag = 1;
+      optr->applyDbQsq = &Qtm_pm_ndpsi;
+    }
   }
   return(0);
 }
@@ -285,15 +288,32 @@ void op_invert(const int op_id, const int index_start) {
         break;
     }
   }
-  else if(optr->type == DBTMWILSON) {
+  else if(optr->type == DBTMWILSON || optr->type == DBCLOVER) {
     g_mubar = optr->mubar;
     g_epsbar = optr->epsbar;
-    for(i = 0; i < SourceInfo.no_flavours; i++) {
-      optr->iterations = invert_doublet_eo( optr->prop0, optr->prop1, optr->prop2, optr->prop3, 
-                                            optr->sr0, optr->sr1, optr->sr2, optr->sr3,
-                                            optr->eps_sq, optr->maxiter,
-                                            optr->solver, optr->rel_prec);
+    if(optr->type == DBCLOVER) {
+      g_c_sw = optr->c_sw;
+      if (g_cart_id == 0 && g_debug_level > 1) {
+	printf("#\n# csw = %e, computing clover leafs\n", g_c_sw);
+      }
+      init_sw_fields(VOLUME);
+      sw_term( (const su3**) g_gauge_field, optr->kappa, optr->c_sw); 
+      sw_invert_nd(g_mubar*g_mubar-g_epsbar*g_epsbar);
+    }
 
+    for(i = 0; i < SourceInfo.no_flavours; i++) {
+      if(optr->type != DBCLOVER) {
+	optr->iterations = invert_doublet_eo( optr->prop0, optr->prop1, optr->prop2, optr->prop3, 
+					      optr->sr0, optr->sr1, optr->sr2, optr->sr3,
+					      optr->eps_sq, optr->maxiter,
+					      optr->solver, optr->rel_prec);
+      }
+      else {
+	optr->iterations = invert_cloverdoublet_eo( optr->prop0, optr->prop1, optr->prop2, optr->prop3, 
+						    optr->sr0, optr->sr1, optr->sr2, optr->sr3,
+						    optr->eps_sq, optr->maxiter,
+						    optr->solver, optr->rel_prec);
+      }
       g_mu = optr->mubar;
       M_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1); 
       assign_add_mul_r(g_spinor_field[DUM_DERI+1], optr->prop2, -optr->epsbar, VOLUME/2);
@@ -396,7 +416,7 @@ void op_write_prop(const int op_id, const int index_start, const int append_) {
   paramsSourceFormat *sourceFormat = NULL;
   paramsPropagatorFormat *propagatorFormat = NULL;
   paramsInverterInfo *inverterInfo = NULL;
-  if(optr->type == DBTMWILSON) {
+  if(optr->type == DBTMWILSON || optr->type == DBCLOVER) {
     strcpy(ending, "hinverted");
   }
   else if(optr->type == OVERLAP) {
diff --git a/operator.h b/operator.h
index 1dd19f26a..486ec920d 100644
--- a/operator.h
+++ b/operator.h
@@ -30,6 +30,7 @@
 #define WILSON 2
 #define DBTMWILSON 3
 #define CLOVER 4
+#define DBCLOVER 5
 
 #define max_no_operators 10
 
diff --git a/prepare_source.c b/prepare_source.c
index 3a34349dc..9e0548c04 100644
--- a/prepare_source.c
+++ b/prepare_source.c
@@ -62,7 +62,7 @@ void prepare_source(const int nstore, const int isample, const int ix, const int
   SourceInfo.sample = isample;
   SourceInfo.ix = ix;
 
-  if(optr->type != DBTMWILSON) {
+  if(optr->type != DBTMWILSON && optr->type != DBCLOVER) {
     SourceInfo.no_flavours = 1;
     /* no volume sources */
     if(source_type != 1) {
diff --git a/read_input.l b/read_input.l
index 9e90818fd..e12ce7dcc 100644
--- a/read_input.l
+++ b/read_input.l
@@ -268,6 +268,7 @@ inline void rmQuotes(char *str){
 %x WILSONOP
 %x OVERLAPOP
 %x CLOVEROP
+%x DBCLOVEROP
 %x TMSOLVER
 %x DBTMSOLVER
 %x OVSOLVER
@@ -465,6 +466,9 @@ inline void rmQuotes(char *str){
   else if(strcmp(yytext, "CLOVER")==0) {
     optr->type = CLOVER;
   }
+  else if(strcmp(yytext, "DBCLOVER")==0) {
+    optr->type = DBCLOVER;
+  }
   else if(strcmp(yytext, "DBTMWILSON")==0) {
     optr->type = DBTMWILSON;
   }
@@ -488,10 +492,11 @@ inline void rmQuotes(char *str){
   else if(optr->type == CLOVER) BEGIN(CLOVEROP);
   else if(optr->type == TMWILSON) BEGIN(TMOP);
   else if(optr->type == DBTMWILSON) BEGIN(DBTMOP);
+  else if(optr->type == DBCLOVER) BEGIN(DBCLOVEROP);
   else BEGIN(OVERLAPOP);
 }
 
-<WILSONOP,TMOP,OVERLAPOP,DBTMOP,CLOVEROP>{
+<WILSONOP,TMOP,OVERLAPOP,DBTMOP,CLOVEROP,DBCLOVEROP>{
   {SPC}*kappa{EQL}{FLT} {
     sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
     optr->kappa = c;
@@ -561,7 +566,7 @@ inline void rmQuotes(char *str){
     optr->no_extra_masses = 0;
     char * token = NULL;
     if( strtok(yytext,"\n\t =,\\") != NULL ) {
-      /* drop the first token, it is ExtraMasses" */
+      /* drop the first token, it is ExtraMasses */
       token = strtok(NULL," =\t");
       if( token != NULL ) {
         printf("  CGMMS Reading extra masses input file %s\n",token);
@@ -617,7 +622,7 @@ inline void rmQuotes(char *str){
   }
 }
 
-<DBTMOP>{
+<DBTMOP,DBCLOVEROP>{
   {SPC}*2KappaMubar{EQL}{FLT} {
     sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
     optr->mubar = c;
@@ -650,7 +655,7 @@ inline void rmQuotes(char *str){
   }
 }
 
-<CLOVEROP>{
+<CLOVEROP,DBCLOVEROP>{
   {SPC}*csw{EQL}{FLT} {
     sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
     optr->c_sw = c;
@@ -1934,7 +1939,7 @@ inline void rmQuotes(char *str){
 }
 
 
-<INITMONOMIAL,DETMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,NDPOLYMONOMIAL,GAUGEMONOMIAL,SFGAUGEMONOMIAL,INTEGRATOR,INITINTEGRATOR,INITMEASUREMENT,PIONNORMMEAS,ONLINEMEAS,INITOPERATOR,TMOP,DBTMOP,OVERLAPOP,WILSONOP,CLOVEROP,POLYMONOMIAL,PLOOP,INITGPU,GPU>\n   {
+<INITMONOMIAL,DETMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,NDPOLYMONOMIAL,GAUGEMONOMIAL,SFGAUGEMONOMIAL,INTEGRATOR,INITINTEGRATOR,INITMEASUREMENT,PIONNORMMEAS,ONLINEMEAS,INITOPERATOR,TMOP,DBTMOP,OVERLAPOP,WILSONOP,CLOVEROP,DBCLOVEROP,POLYMONOMIAL,PLOOP,INITGPU,GPU>\n   {
   line_of_file++;
 }
 <*>\n                       {

From 764ec38fd8af4a77dec1f7781ddf074a7df2079b Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sat, 6 Oct 2012 20:28:56 +0200
Subject: [PATCH 030/110] tested implementation of Qsw_pm_ndpsi

---
 clover_leaf.c        |  2 +-
 clovertm_operators.c | 30 ++++++++++++-------------
 invert_doublet_eo.c  |  6 ++---
 operator.c           |  3 ++-
 tm_operators_nd.c    | 52 ++++++++++++++++++++++++++++++++++++++++++++
 tm_operators_nd.h    |  2 ++
 6 files changed, 75 insertions(+), 20 deletions(-)

diff --git a/clover_leaf.c b/clover_leaf.c
index 0dbb9e5fb..254156fd6 100644
--- a/clover_leaf.c
+++ b/clover_leaf.c
@@ -161,7 +161,7 @@ void sw_term(const su3 ** const gf, const double kappa, const double c_sw) {
 
     // this is the one in flavour and colour space
     // twisted mass term is treated in clover, sw_inv and
-    // clover_gamma5
+    // clover_gamma5 and the corresponding nd versions
     _su3_one(sw[x][0][0]);
     _su3_one(sw[x][2][0]);
     _su3_one(sw[x][0][1]);
diff --git a/clovertm_operators.c b/clovertm_operators.c
index 2fec29d55..b7b110b0d 100644
--- a/clovertm_operators.c
+++ b/clovertm_operators.c
@@ -236,7 +236,7 @@ void clover_inv(const int ieo, spinor * const l, const double mu) {
   return;
 }
 
-void clover_inv_nd(const int ieo, spinor * const l_s, spinor * const l_c) {
+void clover_inv_nd(const int ieo, spinor * const l_c, spinor * const l_s) {
 #ifdef OMP
 #pragma omp parallel
   {
@@ -253,7 +253,7 @@ void clover_inv_nd(const int ieo, spinor * const l_s, spinor * const l_c) {
 #ifndef OMP
   icy = ioff;
 #endif
-  /************************ loop over all lattice sites *************************/
+
 #ifdef OMP
 #pragma omp for
 #endif
@@ -498,9 +498,9 @@ void clover(const int ieo,
  **************************************************************/
 
 void clover_nd(const int ieo, 
-	       spinor * const l_s, spinor * const l_c, 
-	       const spinor * const k_s, const spinor * const k_c, 
-	       const spinor * const j_s, const spinor * const j_c,
+	       spinor * const l_c, spinor * const l_s, 
+	       const spinor * const k_c, const spinor * const k_s, 
+	       const spinor * const j_c, const spinor * const j_s,
 	       const double mubar, const double epsbar) {
 #ifdef OMP
 #pragma omp parallel
@@ -617,9 +617,9 @@ void clover_nd(const int ieo,
 }
 
 void clover_gamma5_nd(const int ieo, 
-		      spinor * const l_s, spinor * const l_c, 
-		      const spinor * const k_s, const spinor * const k_c, 
-		      const spinor * const j_s, const spinor * const j_c,
+		      spinor * const l_c, spinor * const l_s, 
+		      const spinor * const k_c, const spinor * const k_s, 
+		      const spinor * const j_c, const spinor * const j_s,
 		      const double mubar, const double epsbar) {
 #ifdef OMP
 #pragma omp parallel
@@ -887,8 +887,8 @@ void assign_mul_one_sw_pm_imu_eps(const int ieo,
     _vector_assign((*r_s).s0, psi1);
     _vector_assign((*r_s).s1, psi2);
 
-    _su3_multiply(psi1,*w1, (*s_c).s0); 
-    _su3_multiply(chi,*w2, (*s_c).s1);
+    _su3_multiply(psi1, *w1, (*s_c).s0); 
+    _su3_multiply(chi, *w2, (*s_c).s1);
     _vector_add_assign(psi1, chi);
     _su3_inverse_multiply(psi2, *w2, (*s_c).s0); 
     _su3_multiply(chi, *w3, (*s_c).s1);
@@ -904,7 +904,7 @@ void assign_mul_one_sw_pm_imu_eps(const int ieo,
     _vector_assign((*r_c).s0, psi1);
     _vector_assign((*r_c).s1, psi2);
 
-    // now lower to spin components
+    // now lower two spin components
     w1++; /*=&sw[ix][0][1];*/
     w2++; /*=&sw[ix][1][1];*/
     w3++; /*=&sw[ix][2][1];*/
@@ -919,8 +919,8 @@ void assign_mul_one_sw_pm_imu_eps(const int ieo,
     _vector_add_i_mul(psi1, -mu, (*s_s).s2);
     _vector_add_i_mul(psi2, -mu, (*s_s).s3);
 
-    _vector_add_mul(psi1, eps, (*s_c).s0);
-    _vector_add_mul(psi2, eps, (*s_c).s1);
+    _vector_add_mul(psi1, eps, (*s_c).s2);
+    _vector_add_mul(psi2, eps, (*s_c).s3);
 
     _vector_assign((*r_s).s2, psi1);
     _vector_assign((*r_s).s3, psi2);
@@ -936,8 +936,8 @@ void assign_mul_one_sw_pm_imu_eps(const int ieo,
     _vector_add_i_mul(psi1, mu, (*s_c).s2);
     _vector_add_i_mul(psi2, mu, (*s_c).s3);
 
-    _vector_add_mul(psi1, eps, (*s_s).s0);
-    _vector_add_mul(psi2, eps, (*s_s).s1);
+    _vector_add_mul(psi1, eps, (*s_s).s2);
+    _vector_add_mul(psi2, eps, (*s_s).s3);
 
     _vector_assign((*r_c).s2, psi1);
     _vector_assign((*r_c).s3, psi2);
diff --git a/invert_doublet_eo.c b/invert_doublet_eo.c
index 687b9ea44..85d78042a 100644
--- a/invert_doublet_eo.c
+++ b/invert_doublet_eo.c
@@ -296,8 +296,8 @@ int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
   
   /* here comes the inversion using even/odd preconditioning */
   if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);}
-  M_ee_inv_ndpsi(Even_new_s, Even_new_c, 
-		 Even_s, Even_c);
+  Msw_ee_inv_ndpsi(Even_new_s, Even_new_c, 
+		   Even_s, Even_c);
   Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s);
   Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c);
   
@@ -320,7 +320,7 @@ int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
   
   iter = cg_her_nd(Odd_new_s, Odd_new_c, g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
 		   max_iter, precision, rel_prec, 
-		   VOLUME/2, &Qtm_pm_ndpsi);
+		   VOLUME/2, &Qsw_pm_ndpsi);
   
   
   Qtm_dagger_ndpsi(Odd_new_s, Odd_new_c,
diff --git a/operator.c b/operator.c
index 0737ce6c2..0f0fac9ac 100644
--- a/operator.c
+++ b/operator.c
@@ -291,6 +291,7 @@ void op_invert(const int op_id, const int index_start) {
   else if(optr->type == DBTMWILSON || optr->type == DBCLOVER) {
     g_mubar = optr->mubar;
     g_epsbar = optr->epsbar;
+    g_c_sw = 0.;
     if(optr->type == DBCLOVER) {
       g_c_sw = optr->c_sw;
       if (g_cart_id == 0 && g_debug_level > 1) {
@@ -298,7 +299,7 @@ void op_invert(const int op_id, const int index_start) {
       }
       init_sw_fields(VOLUME);
       sw_term( (const su3**) g_gauge_field, optr->kappa, optr->c_sw); 
-      sw_invert_nd(g_mubar*g_mubar-g_epsbar*g_epsbar);
+      sw_invert_nd(optr->mubar*optr->mubar-optr->epsbar*optr->epsbar);
     }
 
     for(i = 0; i < SourceInfo.no_flavours; i++) {
diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index 9da736e76..c9bcd761e 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -269,6 +269,56 @@ void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   return;
 }
 
+void Qsw_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
+		  spinor * const k_strange, spinor * const k_charm) {
+
+  double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
+
+  /* FIRST THE  Qhat(2x2)^dagger  PART*/
+  /* Here the  M_oe Mee^-1 M_eo  implementation  */
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
+
+  assign_mul_one_sw_pm_imu_eps(EE, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3], 
+			       g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], -g_mubar, g_epsbar);
+  clover_inv_nd(EE, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3]);
+
+  Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+2]);
+  Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
+
+  // Here the M_oo  implementation  
+  clover_gamma5_nd(OO, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3], 
+  		   k_charm, k_strange,
+  		   g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+  		   -g_mubar, -g_epsbar);
+
+  // and then the  Qhat(2x2)  PART 
+  // Recall in fact that   Q^hat = tau_1 Q tau_1  
+  // Here the  M_oe Mee^-1 M_eo  implementation  
+  // the re-ordering in s and c components is due to tau_1
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+3]);
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+2]);
+
+  assign_mul_one_sw_pm_imu_eps(EE, g_spinor_field[DUM_MATRIX+7], g_spinor_field[DUM_MATRIX+6], 
+			       g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX], g_mubar, g_epsbar);
+  clover_inv_nd(EE, g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+7]);
+
+  Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+6]);
+  Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+7]);
+
+  clover_gamma5_nd(OO, l_charm, l_strange,
+  		   g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3],
+  		   g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX],
+  		   g_mubar, -g_epsbar);
+
+  /* At the end, the normalisation by the max. eigenvalue  */ 
+  /* Twice  phmc_invmaxev  since we consider here  D Ddag  !!! */
+  mul_r(l_charm, phmc_invmaxev*phmc_invmaxev, l_charm, VOLUME/2);
+  mul_r(l_strange, phmc_invmaxev*phmc_invmaxev, l_strange, VOLUME/2);
+  return;
+}
+
+
 
 /******************************************
  *
@@ -594,6 +644,8 @@ void Msw_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm,
   assign_mul_one_sw_pm_imu_eps(EE, l_strange, l_charm, k_strange, k_charm, -g_mubar, g_epsbar);
 
   clover_inv_nd(EE, l_strange, l_charm);
+  //  clover_inv(EE, l_strange, +1);
+  //  clover_inv(EE, l_charm, +1);
   return;
 }
 
diff --git a/tm_operators_nd.h b/tm_operators_nd.h
index 0b455f3b1..9e9fea972 100644
--- a/tm_operators_nd.h
+++ b/tm_operators_nd.h
@@ -34,6 +34,8 @@ void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
 
 void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
                   spinor * const k_strange, spinor * const k_charm);
+void Qsw_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
+		  spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k);
 

From 04a6256eb626d1962d6029c8e5f557e32a8698b9 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sat, 6 Oct 2012 22:00:09 +0200
Subject: [PATCH 031/110] implementation of Qsw_dagger_ndpsi

---
 invert_doublet_eo.c |  6 +++++-
 tm_operators_nd.c   | 26 +++++++++++++++++++++++---
 tm_operators_nd.h   |  2 ++
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/invert_doublet_eo.c b/invert_doublet_eo.c
index 85d78042a..3512ed07f 100644
--- a/invert_doublet_eo.c
+++ b/invert_doublet_eo.c
@@ -298,6 +298,10 @@ int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
   if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);}
   Msw_ee_inv_ndpsi(Even_new_s, Even_new_c, 
 		   Even_s, Even_c);
+  printf("strange %e %e\n", creal(Even_new_s[0].s0.c0), cimag(Even_new_s[0].s0.c0));
+  printf("strange %e %e\n", creal(Even_new_s[0].s2.c0), cimag(Even_new_s[0].s2.c0));
+  printf("charm %e %e\n", creal(Even_new_c[0].s0.c0), cimag(Even_new_c[0].s0.c0));
+  printf("charm %e %e\n", creal(Even_new_c[0].s2.c0), cimag(Even_new_c[0].s2.c0));
   Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s);
   Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c);
   
@@ -323,7 +327,7 @@ int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
 		   VOLUME/2, &Qsw_pm_ndpsi);
   
   
-  Qtm_dagger_ndpsi(Odd_new_s, Odd_new_c,
+  Qsw_dagger_ndpsi(Odd_new_s, Odd_new_c,
 		   Odd_new_s, Odd_new_c);
   
   /* Reconstruct the even sites                */
diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index c9bcd761e..7c4ac458a 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -122,7 +122,7 @@ void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
  * on a half spinor
  ******************************************/
 void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
-                           spinor * const k_strange, spinor * const k_charm){
+		      spinor * const k_strange, spinor * const k_charm) {
 
   double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
 
@@ -164,6 +164,28 @@ void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
 
 }
 
+void Qsw_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
+		      spinor * const k_strange, spinor * const k_charm) {
+
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
+
+  assign_mul_one_sw_pm_imu_eps(EE, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3], 
+			       g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], -g_mubar, g_epsbar);
+  clover_inv_nd(EE, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3]);
+
+  Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+2]);
+  Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
+
+  clover_gamma5_nd(OO, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3], 
+  		   k_charm, k_strange,
+  		   g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+  		   -g_mubar, -g_epsbar);
+  mul_r(l_charm, phmc_invmaxev, g_spinor_field[DUM_MATRIX+2], VOLUME/2);
+  mul_r(l_strange, phmc_invmaxev, g_spinor_field[DUM_MATRIX+3], VOLUME/2);
+  return;
+}
+
 
 /******************************************
  *
@@ -272,8 +294,6 @@ void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 void Qsw_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 		  spinor * const k_strange, spinor * const k_charm) {
 
-  double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
-
   /* FIRST THE  Qhat(2x2)^dagger  PART*/
   /* Here the  M_oe Mee^-1 M_eo  implementation  */
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
diff --git a/tm_operators_nd.h b/tm_operators_nd.h
index 9e9fea972..df460eff2 100644
--- a/tm_operators_nd.h
+++ b/tm_operators_nd.h
@@ -31,6 +31,8 @@ void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 
 void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
                            spinor * const k_strange, spinor * const k_charm);
+void Qsw_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
+		      spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
                   spinor * const k_strange, spinor * const k_charm);

From adc5cc5f792b7a7a31934110515a0e64cce741ea Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sat, 6 Oct 2012 22:16:58 +0200
Subject: [PATCH 032/110] clover nd solver working and tested

---
 invert_doublet_eo.c |  4 ++--
 operator.c          | 14 ++++++++++++--
 tm_operators_nd.c   | 38 ++++++++++++++++++++++++++++++--------
 tm_operators_nd.h   |  6 ++++--
 4 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/invert_doublet_eo.c b/invert_doublet_eo.c
index 3512ed07f..deb2b296a 100644
--- a/invert_doublet_eo.c
+++ b/invert_doublet_eo.c
@@ -333,8 +333,8 @@ int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
   /* Reconstruct the even sites                */
   Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s);
   Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c);
-  M_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3],
-		 g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+  Msw_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3],
+		   g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
   
   /* The sign is plus, since in Hopping_Matrix */
   /* the minus is missing                      */
diff --git a/operator.c b/operator.c
index 0f0fac9ac..ee5fbb33e 100644
--- a/operator.c
+++ b/operator.c
@@ -316,12 +316,22 @@ void op_invert(const int op_id, const int index_start) {
 						    optr->solver, optr->rel_prec);
       }
       g_mu = optr->mubar;
-      M_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1); 
+      if(optr->type != DBCLOVER) {
+	M_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1); 
+      }
+      else {
+	Msw_full(g_spinor_field[DUM_DERI+1], g_spinor_field[DUM_DERI+2], optr->prop0, optr->prop1); 
+      }
       assign_add_mul_r(g_spinor_field[DUM_DERI+1], optr->prop2, -optr->epsbar, VOLUME/2);
       assign_add_mul_r(g_spinor_field[DUM_DERI+2], optr->prop3, -optr->epsbar, VOLUME/2);
 
       g_mu = -g_mu;
-      M_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3); 
+      if(optr->type != DBCLOVER) {
+	M_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3); 
+      }
+      else {
+	Msw_full(g_spinor_field[DUM_DERI+3], g_spinor_field[DUM_DERI+4], optr->prop2, optr->prop3);
+      }
       assign_add_mul_r(g_spinor_field[DUM_DERI+3], optr->prop0, -optr->epsbar, VOLUME/2);
       assign_add_mul_r(g_spinor_field[DUM_DERI+4], optr->prop1, -optr->epsbar, VOLUME/2);
 
diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index 7c4ac458a..8fc378d3b 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -63,7 +63,7 @@ void mul_one_pm_iconst(spinor * const l, spinor * const k,
  * on a half spinor
  ******************************************/
 void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
-                     spinor * const k_strange, spinor * const k_charm){
+	       spinor * const k_strange, spinor * const k_charm){
 
   double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
 
@@ -71,19 +71,19 @@ void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_strange);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_charm);
 
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX], g_mubar, -1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_mubar, -1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
 
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
+  assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
+  assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
 
-  mul_r(g_spinor_field[DUM_MATRIX+4], nrm, g_spinor_field[DUM_MATRIX+4], VOLUME/2);
   mul_r(g_spinor_field[DUM_MATRIX+3], nrm, g_spinor_field[DUM_MATRIX+3], VOLUME/2);
+  mul_r(g_spinor_field[DUM_MATRIX+2], nrm, g_spinor_field[DUM_MATRIX+2], VOLUME/2);
   /* where nrm (= 1/(1+mu^2 -eps^2)) has been defined at the beginning of 
      the subroutine */
   
-  Hopping_Matrix(OE, l_strange, g_spinor_field[DUM_MATRIX+4]);
-  Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+3]);
+  Hopping_Matrix(OE, l_strange, g_spinor_field[DUM_MATRIX+3]);
+  Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+2]);
 
   /* Here the M_oo  implementation  */
   mul_one_pm_iconst(g_spinor_field[DUM_MATRIX], k_strange, g_mubar, +1);
@@ -104,6 +104,28 @@ void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   mul_r(l_charm, phmc_invmaxev, l_charm, VOLUME/2);
 }
 
+void Qsw__ndpsi(spinor * const l_strange, spinor * const l_charm,
+		spinor * const k_strange, spinor * const k_charm) {
+
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
+
+  assign_mul_one_sw_pm_imu_eps(EE, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3], 
+			       g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], -g_mubar, g_epsbar);
+  clover_inv_nd(EE, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3]);
+
+  Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+2]);
+  Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
+
+  clover_gamma5_nd(OO, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3], 
+  		   k_charm, k_strange,
+  		   g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+  		   -g_mubar, -g_epsbar);
+  mul_r(l_charm, phmc_invmaxev, g_spinor_field[DUM_MATRIX+2], VOLUME/2);
+  mul_r(l_strange, phmc_invmaxev, g_spinor_field[DUM_MATRIX+3], VOLUME/2);
+  return;
+}
+
 /******************************************
  *
  * This is the implementation of
diff --git a/tm_operators_nd.h b/tm_operators_nd.h
index df460eff2..ba6a19c37 100644
--- a/tm_operators_nd.h
+++ b/tm_operators_nd.h
@@ -27,10 +27,12 @@ void mul_one_pm_itau2(spinor * const p, spinor * const q,
 		      const double sign, const int N);
 
 void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
-                     spinor * const k_strange,  spinor * const k_charm);
+	       spinor * const k_strange,  spinor * const k_charm);
+void Qsw_ndpsi(spinor * const l_strange, spinor * const l_charm,
+	       spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
-                           spinor * const k_strange, spinor * const k_charm);
+		      spinor * const k_strange, spinor * const k_charm);
 void Qsw_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
 		      spinor * const k_strange, spinor * const k_charm);
 

From 0db524f6ac8ee042fb1324a97991bd4de3c40ee0 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sat, 6 Oct 2012 22:17:39 +0200
Subject: [PATCH 033/110] clover nd solver working and tested, debugging output
 removed

---
 invert_doublet_eo.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/invert_doublet_eo.c b/invert_doublet_eo.c
index deb2b296a..9ad52c851 100644
--- a/invert_doublet_eo.c
+++ b/invert_doublet_eo.c
@@ -298,10 +298,6 @@ int invert_cloverdoublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
   if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);}
   Msw_ee_inv_ndpsi(Even_new_s, Even_new_c, 
 		   Even_s, Even_c);
-  printf("strange %e %e\n", creal(Even_new_s[0].s0.c0), cimag(Even_new_s[0].s0.c0));
-  printf("strange %e %e\n", creal(Even_new_s[0].s2.c0), cimag(Even_new_s[0].s2.c0));
-  printf("charm %e %e\n", creal(Even_new_c[0].s0.c0), cimag(Even_new_c[0].s0.c0));
-  printf("charm %e %e\n", creal(Even_new_c[0].s2.c0), cimag(Even_new_c[0].s2.c0));
   Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s);
   Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c);
   

From 6d04ca6db0885587a8bf2f2de1168797e81697bd Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sat, 6 Oct 2012 23:47:02 +0200
Subject: [PATCH 034/110] removing a lot of superfluous code in the old ND
 implementation

---
 tm_operators_nd.c | 402 ++++++++++++++++------------------------------
 tm_operators_nd.h |   4 +-
 2 files changed, 142 insertions(+), 264 deletions(-)

diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index 8fc378d3b..16bd47e54 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -45,7 +45,9 @@
 
 void mul_one_pm_iconst(spinor * const l, spinor * const k, 
 		       const double mu_, const int sign_);
-
+void mul_one_p_imug5t3_p_epst1_inv(spinor * const l_c, spinor * const l_s, 
+				   spinor * const k_c, spinor * const k_s,
+				   const double mu, const double eps);
 
 /* external functions */
 
@@ -65,23 +67,14 @@ void mul_one_pm_iconst(spinor * const l, spinor * const k,
 void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 	       spinor * const k_strange, spinor * const k_charm){
 
-  double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
-
   /* Here the  M_oe Mee^-1 M_eo  implementation  */
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_strange);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_charm);
 
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_mubar, -1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
+  mul_one_p_imug5t3_p_epst1_inv(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+2],
+				g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+				g_mubar, g_epsbar);
 
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
-
-  mul_r(g_spinor_field[DUM_MATRIX+3], nrm, g_spinor_field[DUM_MATRIX+3], VOLUME/2);
-  mul_r(g_spinor_field[DUM_MATRIX+2], nrm, g_spinor_field[DUM_MATRIX+2], VOLUME/2);
-  /* where nrm (= 1/(1+mu^2 -eps^2)) has been defined at the beginning of 
-     the subroutine */
-  
   Hopping_Matrix(OE, l_strange, g_spinor_field[DUM_MATRIX+3]);
   Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+2]);
 
@@ -146,23 +139,14 @@ void Qsw__ndpsi(spinor * const l_strange, spinor * const l_charm,
 void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
 		      spinor * const k_strange, spinor * const k_charm) {
 
-  double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
-
   /* Here the  M_oe Mee^-1 M_eo  implementation  */
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar, -1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
+  mul_one_p_imug5t3_p_epst1_inv(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3],
+				g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+				g_mubar, g_epsbar);
 
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
-
-  mul_r(g_spinor_field[DUM_MATRIX+2], nrm, g_spinor_field[DUM_MATRIX+2], VOLUME/2);
-  mul_r(g_spinor_field[DUM_MATRIX+3], nrm, g_spinor_field[DUM_MATRIX+3], VOLUME/2);
-  /* where nrm (= 1/(1+mu^2 -eps^2)) has been defined at the beginning of 
-     the subroutine */
-  
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+2]);
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
 
@@ -222,28 +206,23 @@ void Qsw_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
  * k_charm and k_strange are the input fields
  * l_* the output fields
  *
+ * l_ and k_ can be identical
+ *
  * it acts only on the odd part or only
  * on a half spinor
  ******************************************/
 void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 		  spinor * const k_strange, spinor * const k_charm){
 
-  double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
-
-  /* FIRST THE  Qhat(2x2)^dagger  PART*/
+  /* first the  Qhat(2x2)^dagger  PART*/
   /* Here the  M_oe Mee^-1 M_eo  implementation  */
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar, -1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
-
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
+  mul_one_p_imug5t3_p_epst1_inv(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3],
+				g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+				g_mubar, g_epsbar);
 
-  mul_r(g_spinor_field[DUM_MATRIX+2], nrm, g_spinor_field[DUM_MATRIX+2], VOLUME/2);
-  mul_r(g_spinor_field[DUM_MATRIX+3], nrm, g_spinor_field[DUM_MATRIX+3], VOLUME/2);
-  
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+2]);
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
 
@@ -254,58 +233,41 @@ void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], k_strange, -g_epsbar, VOLUME/2);
   assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], k_charm, -g_epsbar, VOLUME/2);
    
-  diff(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], VOLUME/2);
-  diff(g_spinor_field[DUM_MATRIX+5], g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], VOLUME/2);
+  diff(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], VOLUME/2);
+  diff(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], VOLUME/2);
 
   /* and finally the  gamma_5  multiplication  */
-  gamma5(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+4], VOLUME/2);
-  gamma5(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+5], VOLUME/2);
-
-  /* The normalisation by the max. eigenvalue  is done twice at the end */
-
+  gamma5(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+2], VOLUME/2);
+  gamma5(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+3], VOLUME/2);
 
   /* We have to reassigin as follows to avoid overwriting */
   /* Recall in fact that   Q^hat = tau_1 Q tau_1  , hence  */
-
-  /*  ABOVE: dum_matrix+2  is  l_charm   goes to  dum_matrix+6 :BELOW */
-  /*  ABOVE: dum_matrix+3  is  l_strange   goes to  dum_matrix+7 :BELOW */
-  assign(g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+2], VOLUME/2);
-  assign(g_spinor_field[DUM_MATRIX+7], g_spinor_field[DUM_MATRIX+3], VOLUME/2);
-
-
-  /* AND THEN THE  Qhat(2x2)  PART */
+  /* and then the  Qhat(2x2)  PART */
 
   /* Here the  M_oe Mee^-1 M_eo  implementation  */
-  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7]);
-  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6]);
-
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar, -1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+3]);
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+2]);
 
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
+  mul_one_p_imug5t3_p_epst1_inv(g_spinor_field[DUM_MATRIX+5], g_spinor_field[DUM_MATRIX+4],
+				g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX],
+				-g_mubar, g_epsbar);
 
-  mul_r(g_spinor_field[DUM_MATRIX+2], nrm, g_spinor_field[DUM_MATRIX+2], VOLUME/2);
-  mul_r(g_spinor_field[DUM_MATRIX+3], nrm, g_spinor_field[DUM_MATRIX+3], VOLUME/2);
- 
-  Hopping_Matrix(OE, l_strange, g_spinor_field[DUM_MATRIX+2]);
-  Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+3]);
+  Hopping_Matrix(OE, l_strange, g_spinor_field[DUM_MATRIX+4]);
+  Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+5]);
 
   /* Here the M_oo  implementation  */
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7], g_mubar, +1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6], g_mubar, -1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+3], g_mubar, +1);
+  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+2], g_mubar, -1);
 
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+6], -g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+7], -g_epsbar, VOLUME/2);
+  assign_add_mul_r(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+2], -g_epsbar, VOLUME/2);
+  assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3], -g_epsbar, VOLUME/2);
    
   diff(l_strange, g_spinor_field[DUM_MATRIX], l_strange, VOLUME/2);
   diff(l_charm, g_spinor_field[DUM_MATRIX+1], l_charm, VOLUME/2);
 
-  /* and finally the  gamma_5  multiplication  */
   gamma5(l_strange, l_strange, VOLUME/2);
   gamma5(l_charm, l_charm, VOLUME/2);
 
-
   /* At the end, the normalisation by the max. eigenvalue  */ 
   /* Twice  phmc_invmaxev  since we consider here  D Ddag  !!! */
   mul_r(l_charm, phmc_invmaxev*phmc_invmaxev, l_charm, VOLUME/2);
@@ -385,11 +347,11 @@ void Qsw_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
  * on a half spinor
  ******************************************/
 void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
-                     spinor * const k_strange, spinor * const k_charm, const _Complex double z){
+			    spinor * const k_strange, spinor * const k_charm, 
+			    const _Complex double z){
 
   spinor *r, *s;
   su3_vector ALIGN phi1;
-  double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
 
   /*   tau_1   inverts the   k_charm  <->  k_strange   spinors */
   /*  Apply first  Qhat(2x2)  and finally substract the constant  */
@@ -399,20 +361,12 @@ void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX], g_mubar, -1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
+  mul_one_p_imug5t3_p_epst1_inv(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+2],
+				g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+				g_mubar, g_epsbar);
 
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
-
-  mul_r(g_spinor_field[DUM_MATRIX+4], nrm, g_spinor_field[DUM_MATRIX+4], VOLUME/2);
-  mul_r(g_spinor_field[DUM_MATRIX+3], nrm, g_spinor_field[DUM_MATRIX+3], VOLUME/2);
-  /* where nrm (= 1/(1+mu^2 -eps^2)) has been defined at the beginning of
-     the subroutine */
-
-
-  Hopping_Matrix(OE, l_strange, g_spinor_field[DUM_MATRIX+4]);
-  Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+3]);
+  Hopping_Matrix(OE, l_strange, g_spinor_field[DUM_MATRIX+3]);
+  Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+2]);
 
   /* Here the M_oo  implementation  */
   mul_one_pm_iconst(g_spinor_field[DUM_MATRIX], k_charm, g_mubar, +1);
@@ -432,8 +386,10 @@ void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
   mul_r(l_strange, phmc_invmaxev, l_strange, VOLUME/2);
   mul_r(l_charm, phmc_invmaxev, l_charm, VOLUME/2);
 
-  /************ loop over all lattice sites ************/
-
+  /* Finally, we add k to l and multiply all */
+  /* by the constant  phmc_Cpol  */
+  /* which renders the polynomial in monomials  */
+  /* identical to the polynomial a la clenshaw */;
 #ifdef OMP
 #pragma omp parallel for private(r) private(s) private(phi1)
 #endif
@@ -444,32 +400,34 @@ void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
     
     _complex_times_vector(phi1, z, s->s0);
     _vector_sub_assign(r->s0, phi1);
+    _vector_mul(r->s0, phmc_Cpol, r->s0);
     _complex_times_vector(phi1, z, s->s1);
     _vector_sub_assign(r->s1, phi1);
+    _vector_mul(r->s1, phmc_Cpol, r->s1);
     _complex_times_vector(phi1, z, s->s2);
     _vector_sub_assign(r->s2, phi1);
+    _vector_mul(r->s2, phmc_Cpol, r->s2);
     _complex_times_vector(phi1, z, s->s3);
     _vector_sub_assign(r->s3, phi1);
-    
+    _vector_mul(r->s3, phmc_Cpol, r->s3);
+
     r=l_charm + ix;
     s=k_charm + ix;
     
     _complex_times_vector(phi1, z, s->s0);
     _vector_sub_assign(r->s0, phi1);
+    _vector_mul(r->s0, phmc_Cpol, r->s0);
     _complex_times_vector(phi1, z, s->s1);
     _vector_sub_assign(r->s1, phi1);
+    _vector_mul(r->s1, phmc_Cpol, r->s1);
     _complex_times_vector(phi1, z, s->s2);
     _vector_sub_assign(r->s2, phi1);
+    _vector_mul(r->s2, phmc_Cpol, r->s2);
     _complex_times_vector(phi1, z, s->s3);
     _vector_sub_assign(r->s3, phi1);    
+    _vector_mul(r->s3, phmc_Cpol, r->s3);
   }
-  
-  /* Finally, we multiply by the constant  phmc_Cpol  */
-  /* which renders the polynomial in monomials  */
-  /* identical to the polynomial a la clenshaw */;
-  mul_r(l_strange, phmc_Cpol, l_strange, VOLUME/2);
-  mul_r(l_charm, phmc_Cpol, l_charm, VOLUME/2);
-
+  return;
 }
 
 
@@ -495,129 +453,14 @@ void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
  ******************************************/
 void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k){
 
-  double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
-  static int memalloc = 0;
-
-  static spinor *k_strange, *k_charm;
-  static spinor *l_strange, *l_charm;
-
-#if ( defined SSE || defined SSE2 || defined SSE3)
-  static spinor *k_strange_, *k_charm_;
-  static spinor *l_strange_, *l_charm_;
-  if(memalloc == 0) {
-    memalloc = 1;
-    k_strange_ = (spinor*)calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
-    k_strange  = (spinor *)(((unsigned long int)(k_strange_)+ALIGN_BASE)&~ALIGN_BASE);
-    k_charm_   = (spinor*)calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
-    k_charm    = (spinor *)(((unsigned long int)(k_charm_)+ALIGN_BASE)&~ALIGN_BASE);
-
-    l_strange_ = (spinor*)calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
-    l_strange  = (spinor *)(((unsigned long int)(l_strange_)+ALIGN_BASE)&~ALIGN_BASE);
-    l_charm_   = (spinor*)calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
-    l_charm    = (spinor *)(((unsigned long int)(l_charm_)+ALIGN_BASE)&~ALIGN_BASE);
-  }
-#else
-  if(memalloc == 0) {
-    memalloc = 1;
-    k_strange  = (spinor*)calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-    k_charm    = (spinor*)calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-    
-    l_strange  = (spinor*)calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-    l_charm    = (spinor*)calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-  }
-#endif
-
-  /*  CREATE 2 SPINORS OUT OF 1 (INPUT) BISPINOR  */
-  decompact(k_strange, k_charm, bisp_k);
-
-  /* FIRST THE  Qhat(2x2)^dagger  PART*/
-
-  /* Here the  M_oe Mee^-1 M_eo  implementation  */
-  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
-  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
-
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar, -1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
-
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
-
-  mul_r(g_spinor_field[DUM_MATRIX+2], nrm, g_spinor_field[DUM_MATRIX+2], VOLUME/2);
-  mul_r(g_spinor_field[DUM_MATRIX+3], nrm, g_spinor_field[DUM_MATRIX+3], VOLUME/2);
-  /* where nrm (= 1/(1+mu^2 -eps^2)) has been defined at the beginning of 
-     the subroutine */
-
-  Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+2]);
-  Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
-
-  /* Here the M_oo  implementation  */
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], k_charm, g_mubar, +1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], k_strange, g_mubar, -1);
-
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], k_strange, -g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], k_charm, -g_epsbar, VOLUME/2);
-   
-  diff(g_spinor_field[DUM_MATRIX+4], g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], VOLUME/2);
-  diff(g_spinor_field[DUM_MATRIX+5], g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], VOLUME/2);
-
-  /* and finally the  gamma_5  multiplication  */
-  gamma5(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+4], VOLUME/2);
-  gamma5(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+5], VOLUME/2);
+  /*  create 2 spinors out of 1 (input) bispinor  */
+  decompact(g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+7], bisp_k);
 
-  /* The normalisation by the max. eigenvalue  is done twice at the end */
+  Qtm_pm_ndpsi(g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+7],
+	       g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+7]);
 
-
-  /* We have to reassigin as follows to avoid overwriting */
-  /* Recall in fact that   Q^hat = tau_1 Q tau_1  , hence  */
-
-  /*  ABOVE: dum_matrix+2  is  l_charm   goes to  dum_matrix+6 :BELOW */
-  /*  ABOVE: dum_matrix+3  is  l_strange   goes to  dum_matrix+7 :BELOW */
-  assign(g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+2], VOLUME/2);
-  assign(g_spinor_field[DUM_MATRIX+7], g_spinor_field[DUM_MATRIX+3], VOLUME/2);
-
-
-  /* AND THEN THE  Qhat(2x2)  PART */
-
-  /* Here the  M_oe Mee^-1 M_eo  implementation  */
-  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7]);
-  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6]);
-
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], g_mubar, -1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], g_mubar, +1);
-
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
-
-  mul_r(g_spinor_field[DUM_MATRIX+2], nrm, g_spinor_field[DUM_MATRIX+2], VOLUME/2);
-  mul_r(g_spinor_field[DUM_MATRIX+3], nrm, g_spinor_field[DUM_MATRIX+3], VOLUME/2);
-  /* where nrm (= 1/(1+mu^2 -eps^2)) has been defined at the beginning of 
-     the subroutine */
-
-  Hopping_Matrix(OE, l_strange, g_spinor_field[DUM_MATRIX+2]);
-  Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+3]);
-
-  /* Here the M_oo  implementation  */
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+7], g_mubar, +1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+6], g_mubar, -1);
-
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+6], -g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+7], -g_epsbar, VOLUME/2);
-   
-  diff(l_strange, g_spinor_field[DUM_MATRIX], l_strange, VOLUME/2);
-  diff(l_charm, g_spinor_field[DUM_MATRIX+1], l_charm, VOLUME/2);
-
-  /* and finally the  gamma_5  multiplication  */
-  gamma5(l_strange, l_strange, VOLUME/2);
-  gamma5(l_charm, l_charm, VOLUME/2);
-
-  /* At the end, the normalisation by the max. eigenvalue  */
-  /* Twice  phmc_invmaxev  since we consider here  D Ddag  !!! */
-  mul_r(l_charm, phmc_invmaxev*phmc_invmaxev, l_charm, VOLUME/2);
-  mul_r(l_strange, phmc_invmaxev*phmc_invmaxev, l_strange, VOLUME/2);
-
-
-  /*  CREATE 1 (OUTPUT) BISPINOR OUT OF 2 SPINORS  */
-  compact(bisp_l, l_strange, l_charm);
+  /*  create 1 (output) bispinor out of 2 spinors  */
+  compact(bisp_l, g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+7]);
 }
 
 
@@ -637,48 +480,30 @@ void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k){
 void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm, 
              spinor * const k_strange, spinor * const k_charm, 
 	     const int ieo) {
-
-  double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
-
-
   /* recall:   strange <-> up    while    charm <-> dn   */
   Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX], k_strange);
   Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX+1], k_charm);
 
-  mul_one_pm_iconst(l_strange, g_spinor_field[DUM_MATRIX+1], g_mubar, -1);
-  mul_one_pm_iconst(l_charm, g_spinor_field[DUM_MATRIX], g_mubar, +1);
-
-  assign_add_mul_r(l_strange, g_spinor_field[DUM_MATRIX], g_epsbar, VOLUME/2);
-  assign_add_mul_r(l_charm, g_spinor_field[DUM_MATRIX+1], g_epsbar, VOLUME/2);
-
-  mul_r(l_strange, nrm, l_strange, VOLUME/2);
-  mul_r(l_charm, nrm, l_charm, VOLUME/2);
-
+  mul_one_p_imug5t3_p_epst1_inv(l_charm, l_strange,
+				g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+				-g_mubar, g_epsbar);
+  return;
 }
 
 void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		    const spinor * const k_strange, const spinor * const k_charm) {
+		    spinor * const k_strange, spinor * const k_charm) {
   
-  double nrm = 1./(1.+g_mubar*g_mubar-g_epsbar*g_epsbar);
-
-
   /* recall:   strange <-> up    while    charm <-> dn   */
-
-  mul_one_pm_iconst(l_strange, k_strange, g_mubar, -1);
-  mul_one_pm_iconst(l_charm, k_charm, g_mubar, +1);
-
-  assign_add_mul_r(l_strange, k_charm, g_epsbar, VOLUME/2);
-  assign_add_mul_r(l_charm, k_strange, g_epsbar, VOLUME/2);
-
-  mul_r(l_strange, nrm, l_strange, VOLUME/2);
-  mul_r(l_charm, nrm, l_charm, VOLUME/2);
-
+  mul_one_p_imug5t3_p_epst1_inv(l_charm, l_strange,
+				k_charm, k_strange,
+				-g_mubar, g_epsbar);
+  return;
 }
 
 // for this routine we need to have sw_invert_nd and sw_term called before hand
 // and the clover term must be initialised
 void Msw_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		      const spinor * const k_strange, const spinor * const k_charm) {
+		      spinor * const k_strange, spinor * const k_charm) {
   
 
   /* recall:   strange <-> up    while    charm <-> dn   */
@@ -686,8 +511,6 @@ void Msw_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm,
   assign_mul_one_sw_pm_imu_eps(EE, l_strange, l_charm, k_strange, k_charm, -g_mubar, g_epsbar);
 
   clover_inv_nd(EE, l_strange, l_charm);
-  //  clover_inv(EE, l_strange, +1);
-  //  clover_inv(EE, l_charm, +1);
   return;
 }
 
@@ -760,7 +583,7 @@ void mul_one_pm_iconst(spinor * const l, spinor * const k,
   for(unsigned int ix = 0; ix < (VOLUME/2); ++ix){
     r=l + ix;
     s=k + ix;
-    /* Multiply the spinorfield with the inverse of 1+imu\gamma_5 */
+    /* Multiply the spinorfield with 1+imu\gamma_5 */
     _complex_times_vector(phi1, (1. + mu * I), s->s0);
     _vector_assign(r->s0, phi1);
     _complex_times_vector(phi1, (1. + mu * I), s->s1);
@@ -779,10 +602,67 @@ void mul_one_pm_iconst(spinor * const l, spinor * const k,
 }
 
 
+void mul_one_p_imug5t3_p_epst1_inv(spinor * const l_c, spinor * const l_s, 
+				   spinor * const k_c, spinor * const k_s,
+				   const double mu, const double eps) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+  double nrm = 1./(1.+ mu*mu - eps*eps);
+  spinor *r_s, *r_c, *s_s, *s_c;
+  su3_vector ALIGN phi1;
+
+#ifdef OMP
+#pragma omp for
+#endif
+  for(unsigned int ix = 0; ix < (VOLUME/2); ++ix){
+    r_s = l_s + ix;
+    r_c = l_c + ix;
+    s_s = k_s + ix;
+    s_c = k_c + ix;
+
+    _complex_times_vector(phi1, (1. + mu * I), s_s->s0);
+    _vector_add_mul(phi1, eps, s_c->s0);
+    _vector_mul(r_s->s0, nrm, phi1);
+    _complex_times_vector(phi1, (1. - mu * I), s_c->s0);
+    _vector_add_mul(phi1, eps, s_s->s0);
+    _vector_mul(r_c->s0, nrm, phi1);
+
+    _complex_times_vector(phi1, (1. + mu * I), s_s->s1);
+    _vector_add_mul(phi1, eps, s_c->s1);
+    _vector_mul(r_s->s1, nrm, phi1);
+    _complex_times_vector(phi1, (1. - mu * I), s_c->s1);
+    _vector_add_mul(phi1, eps, s_s->s1);
+    _vector_mul(r_c->s1, nrm, phi1);
+
+    _complex_times_vector(phi1, (1. - mu * I), s_s->s2);
+    _vector_add_mul(phi1, eps, s_c->s2);
+    _vector_mul(r_s->s2, nrm, phi1);
+    _complex_times_vector(phi1, (1. + mu * I), s_c->s2);
+    _vector_add_mul(phi1, eps, s_s->s2);
+    _vector_mul(r_c->s2, nrm, phi1);
+
+    _complex_times_vector(phi1, (1. - mu * I), s_s->s3);
+    _vector_add_mul(phi1, eps, s_c->s3);
+    _vector_mul(r_s->s3, nrm, phi1);
+    _complex_times_vector(phi1, (1. + mu * I), s_c->s3);
+    _vector_add_mul(phi1, eps, s_s->s3);
+    _vector_mul(r_c->s3, nrm, phi1);
+
+  }
+
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+
+  return;
+}
+
 /*  calculates P(Q Q^dagger) for the nondegenerate case */
 
 void P_ndpsi(spinor * const l_strange, spinor * const l_charm,
-	  spinor * const k_strange, spinor * const k_charm){
+	     spinor * const k_strange, spinor * const k_charm){
   
   
   
@@ -793,17 +673,16 @@ void P_ndpsi(spinor * const l_strange, spinor * const l_charm,
   
   assign(dum_up,k_strange,VOLUME/2);
   assign(dum_dn,k_charm,VOLUME/2);
-  
-  
-  for(j=0; j<(2*phmc_dop_n_cheby -2); j++){
+    
+  for(j = 0; j < (2*phmc_dop_n_cheby -2); j++) {
     if(j>0) {
       assign(dum_up,l_strange,VOLUME/2);
       assign(dum_dn,l_charm,VOLUME/2);
     }
     
     Q_tau1_sub_const_ndpsi(l_strange, l_charm,
-			 dum_up, dum_dn,
-			 phmc_root[j]);
+			   dum_up, dum_dn,
+			   phmc_root[j]);
   }
   return;
 }
@@ -811,19 +690,19 @@ void P_ndpsi(spinor * const l_strange, spinor * const l_charm,
 
 /* calculates  Q * \tau^1  for the nondegenerate case */
 void Qtau1_P_ndpsi(spinor * const l_strange, spinor * const l_charm,
-		spinor * const k_strange, spinor * const k_charm){
+		   spinor * const k_strange, spinor * const k_charm){
   
   
   spinor * dum_up,* dum_dn;
-  dum_up=g_chi_up_spinor_field[DUM_MATRIX+1];
-  dum_dn=g_chi_dn_spinor_field[DUM_MATRIX+1];
+  dum_up = g_chi_up_spinor_field[DUM_MATRIX+1];
+  dum_dn = g_chi_dn_spinor_field[DUM_MATRIX+1];
   
-  P_ndpsi(l_strange, l_charm,k_strange,k_charm);
+  P_ndpsi(l_strange, l_charm, k_strange, k_charm);
   
-  assign(dum_up,l_strange,VOLUME/2);
-  assign(dum_dn,l_charm,VOLUME/2);
+  assign(dum_up, l_strange, VOLUME/2);
+  assign(dum_dn, l_charm, VOLUME/2);
   
-  Qtm_ndpsi(l_strange,l_charm,dum_dn,dum_up);
+  Qtm_ndpsi(l_strange, l_charm, dum_dn, dum_up);
   return;
 }
 
@@ -832,12 +711,12 @@ void Qtau1_P_ndpsi(spinor * const l_strange, spinor * const l_charm,
 /* this is neccessary for the calculation of the polynomial */
 
 void Qtm_pm_sub_const_nrm_psi(spinor * const l, spinor * const k,
-			   const _Complex double z){
+			      const _Complex double z){
   su3_vector ALIGN phi1;
   spinor *r,*s;
   int ix;
 
-  Qtm_pm_psi(l,k);
+  Qtm_pm_psi(l, k);
   mul_r(l, phmc_invmaxev, l, VOLUME/2);
 
   /*  AND FINALLY WE SUBSTRACT THE C-CONSTANT  */
@@ -860,7 +739,6 @@ void Qtm_pm_sub_const_nrm_psi(spinor * const l, spinor * const k,
     _vector_sub_assign(r->s2, phi1);
     _complex_times_vector(phi1, z, s->s3);
     _vector_sub_assign(r->s3, phi1);
-    
   }
 
   mul_r(l, phmc_Cpol, l, VOLUME/2);
diff --git a/tm_operators_nd.h b/tm_operators_nd.h
index ba6a19c37..88146555d 100644
--- a/tm_operators_nd.h
+++ b/tm_operators_nd.h
@@ -52,10 +52,10 @@ void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 	     const int ieo);
 
 void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		    const spinor * const k_strange, const spinor * const k_charm);
+		    spinor * const k_strange, spinor * const k_charm);
 
 void Msw_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		      const spinor * const k_strange, const spinor * const k_charm);
+		      spinor * const k_strange, spinor * const k_charm);
 
 void Q_test_epsilon(spinor * const l_strange, spinor * const l_charm,
                     spinor * const k_strange, spinor * const k_charm);

From b30e4ee5c5b1c28e49dfbcea420484fec3806716 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 7 Oct 2012 13:10:02 +0200
Subject: [PATCH 035/110] replaced linalg by a much simpler routine for
 non-clover ND

---
 invert_doublet_eo.c |   8 +-
 tm_operators_nd.c   | 259 ++++++++++++++++++++++++--------------------
 tm_operators_nd.h   |   4 +-
 3 files changed, 147 insertions(+), 124 deletions(-)

diff --git a/invert_doublet_eo.c b/invert_doublet_eo.c
index 9ad52c851..206e5059b 100644
--- a/invert_doublet_eo.c
+++ b/invert_doublet_eo.c
@@ -135,7 +135,8 @@ int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
   /* here comes the inversion using even/odd preconditioning */
   if(g_proc_id == 0) {printf("# Using even/odd preconditioning!\n"); fflush(stdout);}
   M_ee_inv_ndpsi(Even_new_s, Even_new_c, 
-		 Even_s, Even_c);
+		 Even_s, Even_c,
+		 g_mubar, g_epsbar);
   Hopping_Matrix(OE, g_spinor_field[DUM_DERI], Even_new_s);
   Hopping_Matrix(OE, g_spinor_field[DUM_DERI+1], Even_new_c);
   
@@ -184,12 +185,13 @@ int invert_doublet_eo(spinor * const Even_new_s, spinor * const Odd_new_s,
   
   Qtm_dagger_ndpsi(Odd_new_s, Odd_new_c,
 		   Odd_new_s, Odd_new_c);
-  
+
   /* Reconstruct the even sites                */
   Hopping_Matrix(EO, g_spinor_field[DUM_DERI], Odd_new_s);
   Hopping_Matrix(EO, g_spinor_field[DUM_DERI+1], Odd_new_c);
   M_ee_inv_ndpsi(g_spinor_field[DUM_DERI+2], g_spinor_field[DUM_DERI+3],
-		 g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1]);
+		 g_spinor_field[DUM_DERI], g_spinor_field[DUM_DERI+1],
+		 g_mubar, g_epsbar);
   
   /* The sign is plus, since in Hopping_Matrix */
   /* the minus is missing                      */
diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index 16bd47e54..385b34025 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -45,9 +45,11 @@
 
 void mul_one_pm_iconst(spinor * const l, spinor * const k, 
 		       const double mu_, const int sign_);
-void mul_one_p_imug5t3_p_epst1_inv(spinor * const l_c, spinor * const l_s, 
-				   spinor * const k_c, spinor * const k_s,
-				   const double mu, const double eps);
+
+void M_oo_sub_g5_ndpsi(spinor * const l_s, spinor * const l_c, 
+		       spinor * const k_s, spinor * const k_c,
+		       spinor * const j_s, spinor * const j_c,
+		       const double mu, const double eps);
 
 /* external functions */
 
@@ -71,30 +73,20 @@ void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_strange);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_charm);
 
-  mul_one_p_imug5t3_p_epst1_inv(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+2],
-				g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
-				g_mubar, g_epsbar);
-
+  M_ee_inv_ndpsi(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+2],
+		 g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+		 g_mubar, g_epsbar);
+  
   Hopping_Matrix(OE, l_strange, g_spinor_field[DUM_MATRIX+3]);
   Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+2]);
 
   /* Here the M_oo  implementation  */
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX], k_strange, g_mubar, +1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+1], k_charm, g_mubar, -1);
-
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX], k_charm, -g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], k_strange, -g_epsbar, VOLUME/2);
-   
-  diff(l_strange, g_spinor_field[DUM_MATRIX], l_strange, VOLUME/2);
-  diff(l_charm, g_spinor_field[DUM_MATRIX+1], l_charm, VOLUME/2);
-
-  /* and finally the  gamma_5  multiplication  */
-  gamma5(l_strange, l_strange, VOLUME/2);
-  gamma5(l_charm, l_charm, VOLUME/2);
-
+  M_oo_sub_g5_ndpsi(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], k_strange, k_charm,
+  		    l_strange, l_charm,
+  		    -g_mubar, -g_epsbar);
   /* At the end, the normalisation by the max. eigenvalue  */
-  mul_r(l_strange, phmc_invmaxev, l_strange, VOLUME/2);
-  mul_r(l_charm, phmc_invmaxev, l_charm, VOLUME/2);
+  mul_r(l_strange, phmc_invmaxev, g_spinor_field[DUM_MATRIX], VOLUME/2);
+  mul_r(l_charm, phmc_invmaxev, g_spinor_field[DUM_MATRIX+1], VOLUME/2);
 }
 
 void Qsw__ndpsi(spinor * const l_strange, spinor * const l_charm,
@@ -143,27 +135,17 @@ void Qtm_dagger_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_p_imug5t3_p_epst1_inv(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3],
-				g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
-				g_mubar, g_epsbar);
-
+  M_ee_inv_ndpsi(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3],
+		 g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+		 g_mubar, g_epsbar);
+  
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+2]);
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], k_charm, g_mubar, +1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], k_strange, g_mubar, -1);
-
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], k_strange, -g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], k_charm, -g_epsbar, VOLUME/2);
-   
-  diff(l_charm, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], VOLUME/2);
-  diff(l_strange, g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], VOLUME/2);
-
-  /* and finally the  gamma_5  multiplication  */
-  gamma5(l_charm, l_charm, VOLUME/2);
-  gamma5(l_strange, l_strange, VOLUME/2);
-
+  M_oo_sub_g5_ndpsi(l_strange, l_charm, k_strange, k_charm,
+  		    g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX],
+  		    g_mubar, -g_epsbar);
   /* At the end, the normalisation by the max. eigenvalue  */
   mul_r(l_charm, phmc_invmaxev, l_charm, VOLUME/2);
   mul_r(l_strange, phmc_invmaxev, l_strange, VOLUME/2);
@@ -219,27 +201,17 @@ void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_p_imug5t3_p_epst1_inv(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3],
-				g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
-				g_mubar, g_epsbar);
+  M_ee_inv_ndpsi(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3],
+		 g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+		 g_mubar, g_epsbar);
 
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+2]);
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3]);
 
   /* Here the M_oo  implementation  */
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+2], k_charm, g_mubar, +1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+3], k_strange, g_mubar, -1);
-
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+2], k_strange, -g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+3], k_charm, -g_epsbar, VOLUME/2);
-   
-  diff(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX], VOLUME/2);
-  diff(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+1], VOLUME/2);
-
-  /* and finally the  gamma_5  multiplication  */
-  gamma5(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+2], VOLUME/2);
-  gamma5(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+3], VOLUME/2);
-
+  M_oo_sub_g5_ndpsi(g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3], k_charm, k_strange,
+  		    g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+  		    -g_mubar, -g_epsbar);
   /* We have to reassigin as follows to avoid overwriting */
   /* Recall in fact that   Q^hat = tau_1 Q tau_1  , hence  */
   /* and then the  Qhat(2x2)  PART */
@@ -248,26 +220,17 @@ void Qtm_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+3]);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+2]);
 
-  mul_one_p_imug5t3_p_epst1_inv(g_spinor_field[DUM_MATRIX+5], g_spinor_field[DUM_MATRIX+4],
-				g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX],
-				-g_mubar, g_epsbar);
+  M_ee_inv_ndpsi(g_spinor_field[DUM_MATRIX+5], g_spinor_field[DUM_MATRIX+4],
+		 g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX],
+		 -g_mubar, g_epsbar);
 
   Hopping_Matrix(OE, l_strange, g_spinor_field[DUM_MATRIX+4]);
   Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+5]);
 
   /* Here the M_oo  implementation  */
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+3], g_mubar, +1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+2], g_mubar, -1);
-
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+2], -g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], g_spinor_field[DUM_MATRIX+3], -g_epsbar, VOLUME/2);
-   
-  diff(l_strange, g_spinor_field[DUM_MATRIX], l_strange, VOLUME/2);
-  diff(l_charm, g_spinor_field[DUM_MATRIX+1], l_charm, VOLUME/2);
-
-  gamma5(l_strange, l_strange, VOLUME/2);
-  gamma5(l_charm, l_charm, VOLUME/2);
-
+  M_oo_sub_g5_ndpsi(l_strange, l_charm, g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+2],
+		    l_strange, l_charm,
+  		    -g_mubar, -g_epsbar);
   /* At the end, the normalisation by the max. eigenvalue  */ 
   /* Twice  phmc_invmaxev  since we consider here  D Ddag  !!! */
   mul_r(l_charm, phmc_invmaxev*phmc_invmaxev, l_charm, VOLUME/2);
@@ -361,7 +324,7 @@ void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  mul_one_p_imug5t3_p_epst1_inv(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+2],
+  M_ee_inv_ndpsi(g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+2],
 				g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
 				g_mubar, g_epsbar);
 
@@ -369,22 +332,13 @@ void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+2]);
 
   /* Here the M_oo  implementation  */
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX], k_charm, g_mubar, +1);
-  mul_one_pm_iconst(g_spinor_field[DUM_MATRIX+1], k_strange, g_mubar, -1);
-
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX], k_strange, -g_epsbar, VOLUME/2);
-  assign_add_mul_r(g_spinor_field[DUM_MATRIX+1], k_charm, -g_epsbar, VOLUME/2);
-
-  diff(l_strange, g_spinor_field[DUM_MATRIX], l_strange, VOLUME/2);
-  diff(l_charm, g_spinor_field[DUM_MATRIX+1], l_charm, VOLUME/2);
-
-  /* and finally the  gamma_5  multiplication  */
-  gamma5(l_strange, l_strange, VOLUME/2);
-  gamma5(l_charm, l_charm, VOLUME/2);
+  M_oo_sub_g5_ndpsi(g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], k_charm, k_strange,
+  		    l_strange, l_charm,
+  		    -g_mubar, -g_epsbar);
 
   /* At the end, the normalisation by the max. eigenvalue  */
-  mul_r(l_strange, phmc_invmaxev, l_strange, VOLUME/2);
-  mul_r(l_charm, phmc_invmaxev, l_charm, VOLUME/2);
+  mul_r(l_strange, phmc_invmaxev, g_spinor_field[DUM_MATRIX], VOLUME/2);
+  mul_r(l_charm, phmc_invmaxev, g_spinor_field[DUM_MATRIX+1], VOLUME/2);
 
   /* Finally, we add k to l and multiply all */
   /* by the constant  phmc_Cpol  */
@@ -451,7 +405,7 @@ void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
  * it acts only on the odd part or only
  * on a half spinor
  ******************************************/
-void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k){
+void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k) {
 
   /*  create 2 spinors out of 1 (input) bispinor  */
   decompact(g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+7], bisp_k);
@@ -461,6 +415,20 @@ void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k){
 
   /*  create 1 (output) bispinor out of 2 spinors  */
   compact(bisp_l, g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+7]);
+  return;
+}
+
+void Qsw_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k) {
+
+  /*  create 2 spinors out of 1 (input) bispinor  */
+  decompact(g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+7], bisp_k);
+
+  Qsw_pm_ndpsi(g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+7],
+	       g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+7]);
+
+  /*  create 1 (output) bispinor out of 2 spinors  */
+  compact(bisp_l, g_spinor_field[DUM_MATRIX+6], g_spinor_field[DUM_MATRIX+7]);
+  return;
 }
 
 
@@ -484,19 +452,9 @@ void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX], k_strange);
   Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX+1], k_charm);
 
-  mul_one_p_imug5t3_p_epst1_inv(l_charm, l_strange,
-				g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
-				-g_mubar, g_epsbar);
-  return;
-}
-
-void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		    spinor * const k_strange, spinor * const k_charm) {
-  
-  /* recall:   strange <-> up    while    charm <-> dn   */
-  mul_one_p_imug5t3_p_epst1_inv(l_charm, l_strange,
-				k_charm, k_strange,
-				-g_mubar, g_epsbar);
+  M_ee_inv_ndpsi(l_charm, l_strange,
+		 g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
+		 -g_mubar, g_epsbar);
   return;
 }
 
@@ -601,17 +559,17 @@ void mul_one_pm_iconst(spinor * const l, spinor * const k,
   return;
 }
 
-
-void mul_one_p_imug5t3_p_epst1_inv(spinor * const l_c, spinor * const l_s, 
-				   spinor * const k_c, spinor * const k_s,
-				   const double mu, const double eps) {
+// l_ and k_ are allowed to be the same spinors
+void M_ee_inv_ndpsi(spinor * const l_s, spinor * const l_c, 
+		    spinor * const k_s, spinor * const k_c,
+		    const double mu, const double eps) {
 #ifdef OMP
 #pragma omp parallel
   {
 #endif
   double nrm = 1./(1.+ mu*mu - eps*eps);
   spinor *r_s, *r_c, *s_s, *s_c;
-  su3_vector ALIGN phi1;
+  su3_vector ALIGN phi1, phi2;
 
 #ifdef OMP
 #pragma omp for
@@ -622,33 +580,33 @@ void mul_one_p_imug5t3_p_epst1_inv(spinor * const l_c, spinor * const l_s,
     s_s = k_s + ix;
     s_c = k_c + ix;
 
-    _complex_times_vector(phi1, (1. + mu * I), s_s->s0);
+    _complex_times_vector(phi1, (1. - mu * I), s_s->s0);
     _vector_add_mul(phi1, eps, s_c->s0);
+    _complex_times_vector(phi2, (1. + mu * I), s_c->s0);
+    _vector_add_mul(phi2, eps, s_s->s0);
     _vector_mul(r_s->s0, nrm, phi1);
-    _complex_times_vector(phi1, (1. - mu * I), s_c->s0);
-    _vector_add_mul(phi1, eps, s_s->s0);
-    _vector_mul(r_c->s0, nrm, phi1);
+    _vector_mul(r_c->s0, nrm, phi2);
 
-    _complex_times_vector(phi1, (1. + mu * I), s_s->s1);
+    _complex_times_vector(phi1, (1. - mu * I), s_s->s1);
     _vector_add_mul(phi1, eps, s_c->s1);
+    _complex_times_vector(phi2, (1. + mu * I), s_c->s1);
+    _vector_add_mul(phi2, eps, s_s->s1);
     _vector_mul(r_s->s1, nrm, phi1);
-    _complex_times_vector(phi1, (1. - mu * I), s_c->s1);
-    _vector_add_mul(phi1, eps, s_s->s1);
-    _vector_mul(r_c->s1, nrm, phi1);
+    _vector_mul(r_c->s1, nrm, phi2);
 
-    _complex_times_vector(phi1, (1. - mu * I), s_s->s2);
+    _complex_times_vector(phi1, (1. + mu * I), s_s->s2);
     _vector_add_mul(phi1, eps, s_c->s2);
+    _complex_times_vector(phi2, (1. - mu * I), s_c->s2);
+    _vector_add_mul(phi2, eps, s_s->s2);
     _vector_mul(r_s->s2, nrm, phi1);
-    _complex_times_vector(phi1, (1. + mu * I), s_c->s2);
-    _vector_add_mul(phi1, eps, s_s->s2);
-    _vector_mul(r_c->s2, nrm, phi1);
+    _vector_mul(r_c->s2, nrm, phi2);
 
-    _complex_times_vector(phi1, (1. - mu * I), s_s->s3);
+    _complex_times_vector(phi1, (1. + mu * I), s_s->s3);
     _vector_add_mul(phi1, eps, s_c->s3);
+    _complex_times_vector(phi2, (1. - mu * I), s_c->s3);
+    _vector_add_mul(phi2, eps, s_s->s3);
     _vector_mul(r_s->s3, nrm, phi1);
-    _complex_times_vector(phi1, (1. + mu * I), s_c->s3);
-    _vector_add_mul(phi1, eps, s_s->s3);
-    _vector_mul(r_c->s3, nrm, phi1);
+    _vector_mul(r_c->s3, nrm, phi2);
 
   }
 
@@ -659,6 +617,67 @@ void mul_one_p_imug5t3_p_epst1_inv(spinor * const l_c, spinor * const l_s,
   return;
 }
 
+
+// l_ and k_ are allowed to be the same spinors
+void M_oo_sub_g5_ndpsi(spinor * const l_s, spinor * const l_c, 
+		       spinor * const k_s, spinor * const k_c,
+		       spinor * const j_s, spinor * const j_c,
+		       const double mu, const double eps) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+  spinor *r_s, *r_c, *s_s, *s_c, *t_s, *t_c;
+  su3_vector ALIGN phi1, phi2;
+
+#ifdef OMP
+#pragma omp for
+#endif
+  for(unsigned int ix = 0; ix < (VOLUME/2); ++ix){
+    r_s = l_s + ix;
+    r_c = l_c + ix;
+    s_s = k_s + ix;
+    s_c = k_c + ix;
+    t_s = j_s + ix;
+    t_c = j_c + ix;
+
+    _complex_times_vector(phi1, (1. - mu * I), s_s->s0);
+    _vector_add_mul(phi1, eps, s_c->s0);
+    _complex_times_vector(phi2, (1. + mu * I), s_c->s0);
+    _vector_add_mul(phi2, eps, s_s->s0);
+    _vector_sub(r_s->s0, phi1, t_s->s0);
+    _vector_sub(r_c->s0, phi2, t_c->s0);
+
+    _complex_times_vector(phi1, (1. - mu * I), s_s->s1);
+    _vector_add_mul(phi1, eps, s_c->s1);
+    _complex_times_vector(phi2, (1. + mu * I), s_c->s1);
+    _vector_add_mul(phi2, eps, s_s->s1);
+    _vector_sub(r_s->s1, phi1, t_s->s1);
+    _vector_sub(r_c->s1, phi2, t_c->s1);
+
+    _complex_times_vector(phi1, (1. + mu * I), s_s->s2);
+    _vector_add_mul(phi1, eps, s_c->s2);
+    _complex_times_vector(phi2, (1. - mu * I), s_c->s2);
+    _vector_add_mul(phi2, eps, s_s->s2);
+    _vector_sub(r_s->s2, t_s->s2, phi1);
+    _vector_sub(r_c->s2, t_c->s2, phi2);
+
+    _complex_times_vector(phi1, (1. + mu * I), s_s->s3);
+    _vector_add_mul(phi1, eps, s_c->s3);
+    _complex_times_vector(phi2, (1. - mu * I), s_c->s3);
+    _vector_add_mul(phi2, eps, s_s->s3);
+    _vector_sub(r_s->s3, t_s->s3, phi1);
+    _vector_sub(r_c->s3, t_c->s3, phi2);
+  }
+
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+
+  return;
+}
+
+
 /*  calculates P(Q Q^dagger) for the nondegenerate case */
 
 void P_ndpsi(spinor * const l_strange, spinor * const l_charm,
diff --git a/tm_operators_nd.h b/tm_operators_nd.h
index 88146555d..8e3a241f3 100644
--- a/tm_operators_nd.h
+++ b/tm_operators_nd.h
@@ -42,6 +42,7 @@ void Qsw_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 		  spinor * const k_strange, spinor * const k_charm);
 
 void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k);
+void Qsw_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k);
 
 void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
                        spinor * const k_strange, spinor * const k_charm, 
@@ -52,7 +53,8 @@ void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 	     const int ieo);
 
 void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		    spinor * const k_strange, spinor * const k_charm);
+		    spinor * const k_strange, spinor * const k_charm,
+		    const double mu, const double eps);
 
 void Msw_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
 		      spinor * const k_strange, spinor * const k_charm);

From f3a318b627c8336b14082244ab372d7f72912ec7 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 7 Oct 2012 14:25:12 +0200
Subject: [PATCH 036/110] added H_eo_sw_ndpsi

---
 tm_operators_nd.c | 16 ++++++++++++++++
 tm_operators_nd.h |  3 +++
 2 files changed, 19 insertions(+)

diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index 385b34025..9e9a8abdd 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -458,6 +458,22 @@ void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   return;
 }
 
+void H_eo_sw_ndpsi(spinor * const l_strange, spinor * const l_charm, 
+		   spinor * const k_strange, spinor * const k_charm, 
+		   const int ieo) {
+  /* recall:   strange <-> up    while    charm <-> dn   */
+  Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX], k_strange);
+  Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX+1], k_charm);
+  
+  assign_mul_one_sw_pm_imu_eps(EE, l_strange, l_charm, 
+			       g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], 
+			       -g_mubar, g_epsbar);
+
+  clover_inv_nd(EE, l_strange, l_charm);
+
+  return;
+}
+
 // for this routine we need to have sw_invert_nd and sw_term called before hand
 // and the clover term must be initialised
 void Msw_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
diff --git a/tm_operators_nd.h b/tm_operators_nd.h
index 8e3a241f3..d48c85c7c 100644
--- a/tm_operators_nd.h
+++ b/tm_operators_nd.h
@@ -51,6 +51,9 @@ void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
 void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm, 
              spinor * const k_strange, spinor * const k_charm, 
 	     const int ieo);
+void H_eo_sw_ndpsi(spinor * const l_strange, spinor * const l_charm, 
+		   spinor * const k_strange, spinor * const k_charm, 
+		   const int ieo);
 
 void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
 		    spinor * const k_strange, spinor * const k_charm,

From a4f66017c3735b34fb448ec703214907599ff0fd Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 7 Oct 2012 14:28:33 +0200
Subject: [PATCH 037/110] removed QdaggerQ_poly which was identical to
 Ptilde_ndpsi

---
 Ptilde_nd.c               |  24 +++---
 chebyshev_polynomial_nd.c | 163 +-------------------------------------
 chebyshev_polynomial_nd.h |   2 -
 cloverndpoly_monomial.c   |  77 ++++++++++--------
 ndpoly_monomial.c         |  14 ++--
 5 files changed, 67 insertions(+), 213 deletions(-)

diff --git a/Ptilde_nd.c b/Ptilde_nd.c
index 9ec08e7c4..02f2fd860 100644
--- a/Ptilde_nd.c
+++ b/Ptilde_nd.c
@@ -104,19 +104,19 @@ void Ptilde_cheb_coefs(double aa, double bb, double dd[], int n, double exponent
  **************************************************************************/
 
 void Ptilde_ndpsi(spinor *R_s, spinor *R_c, double *dd, int n, 
-                   spinor *S_s, spinor *S_c){
-
+		  spinor *S_s, spinor *S_c) {
+  
   int j;
   double fact1, fact2, temp1, temp2, temp3, temp4;
-
+  
   spinor *svs_=NULL, *svs=NULL, *ds_=NULL, *ds=NULL, *dds_=NULL, *dds=NULL, 
     *auxs_=NULL, *auxs=NULL, *aux2s_=NULL, *aux2s=NULL, *aux3s_=NULL, 
     *aux3s=NULL;
   spinor *svc_=NULL, *svc=NULL, *dc_=NULL, *dc=NULL, *ddc_=NULL, 
     *ddc=NULL, *auxc_=NULL, *auxc=NULL, *aux2c_=NULL, *aux2c=NULL, 
     *aux3c_=NULL, *aux3c=NULL;
-
-
+  
+  
 #if ( defined SSE || defined SSE2 )
   svs_  = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
   svs   = (spinor *)(((unsigned long int)(svs_)+ALIGN_BASE)&~ALIGN_BASE);
@@ -168,24 +168,24 @@ void Ptilde_ndpsi(spinor *R_s, spinor *R_c, double *dd, int n,
   aux3c_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
   aux3c = aux3c_;
 #endif
-
+  
   fact1=4/(phmc_cheb_evmax-phmc_cheb_evmin);
   fact2=-2*(phmc_cheb_evmax+phmc_cheb_evmin)/(phmc_cheb_evmax-phmc_cheb_evmin);
-
+  
   zero_spinor_field(&ds[0],VOLUME/2);
   zero_spinor_field(&dds[0],VOLUME/2); 
   zero_spinor_field(&dc[0],VOLUME/2);
   zero_spinor_field(&ddc[0],VOLUME/2); 
-
+  
   /*   sub_low_ev(&aux3[0], &S[0]);  */
   assign(&aux3s[0], &S_s[0],VOLUME/2);  
   assign(&aux3c[0], &S_c[0],VOLUME/2);  
-
+  
   /*  Use the Clenshaw's recursion for the Chebysheff polynomial */
   for (j=n-1; j>=1; j--) {
     assign(&svs[0],&ds[0],VOLUME/2);
     assign(&svc[0],&dc[0],VOLUME/2); 
-
+    
     /*
      * if ( (j%10) == 0 ) {
      *   sub_low_ev(&aux[0], &d[0]);
@@ -351,9 +351,9 @@ void degree_of_Ptilde(int * _degree, double ** coefs,
     random_spinor_field(sc,VOLUME/2, 1);
 
     Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &ss[0], &sc[0]);
-    QdaggerQ_poly(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0]);
+    Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0]);
     Qtm_pm_ndpsi(&auxs[0], &auxc[0], &aux2s[0], &aux2c[0]);
-    QdaggerQ_poly(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0]);
+    Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0]);
     Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &aux2s[0], &aux2c[0]);
 
     diff(&aux2s[0],&auxs[0], &ss[0], VOLUME/2);
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index 2fce78dca..dc5270be8 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -31,6 +31,7 @@
 #include "tm_operators.h"
 #include "tm_operators_nd.h"
 #include "phmc.h"
+#include "Ptilde_nd.h"
 #include "chebyshev_polynomial_nd.h"
 
 
@@ -76,164 +77,6 @@ void chebyshev_coefs(double aa, double bb, double c[], int n, double exponent){
 #undef PI
 
 
-/****************************************************************************  
- *
- * computation of, despite of the name, (Q Q^dagger) on a vector
- *   by using the chebyshev approximation for the function ()^1/4
- * subtraction of low-lying eigenvalues is not yet implemented for this
- *
- **************************************************************************/
-
-
-void QdaggerQ_poly(spinor *R_s, spinor *R_c, double *c, int n, 
-                   spinor *S_s, spinor *S_c){
-
-  int j;
-  double fact1, fact2, temp1, temp2, temp3, temp4;
-
-  spinor *svs_=NULL, *svs=NULL, *ds_=NULL, *ds=NULL, *dds_=NULL, *dds=NULL, 
-         *auxs_=NULL, *auxs=NULL, *aux2s_=NULL, *aux2s=NULL, *aux3s_=NULL, 
-         *aux3s=NULL;
-  spinor *svc_=NULL, *svc=NULL, *dc_=NULL, *dc=NULL, *ddc_=NULL, 
-         *ddc=NULL, *auxc_=NULL, *auxc=NULL, *aux2c_=NULL, *aux2c=NULL, 
-         *aux3c_=NULL, *aux3c=NULL;
-
-
-#if ( defined SSE || defined SSE2 )
-   svs_  = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
-   svs   = (spinor *)(((unsigned long int)(svs_)+ALIGN_BASE)&~ALIGN_BASE);
-   ds_   = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
-   ds    = (spinor *)(((unsigned long int)(ds_)+ALIGN_BASE)&~ALIGN_BASE);
-   dds_  = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
-   dds   = (spinor *)(((unsigned long int)(dds_)+ALIGN_BASE)&~ALIGN_BASE);
-   auxs_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
-   auxs  = (spinor *)(((unsigned long int)(auxs_)+ALIGN_BASE)&~ALIGN_BASE);
-   aux2s_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
-   aux2s = (spinor *)(((unsigned long int)(aux2s_)+ALIGN_BASE)&~ALIGN_BASE);
-   aux3s_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
-   aux3s = (spinor *)(((unsigned long int)(aux3s_)+ALIGN_BASE)&~ALIGN_BASE);
-   svc_  = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
-   svc   = (spinor *)(((unsigned long int)(svc_)+ALIGN_BASE)&~ALIGN_BASE);
-   dc_   = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
-   dc    = (spinor *)(((unsigned long int)(dc_)+ALIGN_BASE)&~ALIGN_BASE);
-   ddc_  = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
-   ddc   = (spinor *)(((unsigned long int)(ddc_)+ALIGN_BASE)&~ALIGN_BASE);
-   auxc_ = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
-   auxc  = (spinor *)(((unsigned long int)(auxc_)+ALIGN_BASE)&~ALIGN_BASE);
-   aux2c_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
-   aux2c = (spinor *)(((unsigned long int)(aux2c_)+ALIGN_BASE)&~ALIGN_BASE);
-   aux3c_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
-   aux3c = (spinor *)(((unsigned long int)(aux3c_)+ALIGN_BASE)&~ALIGN_BASE);
-#else
-   svs_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-   svs = svs_;
-   ds_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-   ds = ds_;
-   dds_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-   dds = dds_;
-   auxs_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-   auxs = auxs_;
-   aux2s_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-   aux2s = aux2s_;
-   aux3s_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-   aux3s = aux3s_;
-   svc_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-   svc = svc_;
-   dc_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-   dc = dc_;
-   ddc_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-   ddc = ddc_;
-   auxc_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-   auxc = auxc_;
-   aux2c_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-   aux2c = aux2c_;
-   aux3c_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-   aux3c = aux3c_;
-#endif
-
-
-   fact1=4/(phmc_cheb_evmax-phmc_cheb_evmin);
-   fact2=-2*(phmc_cheb_evmax+phmc_cheb_evmin)/(phmc_cheb_evmax-phmc_cheb_evmin);
-
-   zero_spinor_field(&ds[0],VOLUME/2);
-   zero_spinor_field(&dds[0],VOLUME/2); 
-   zero_spinor_field(&dc[0],VOLUME/2);
-   zero_spinor_field(&ddc[0],VOLUME/2); 
-
-   /*   sub_low_ev(&aux3[0], &S[0]);  */
-   assign(&aux3s[0], &S_s[0],VOLUME/2);  
-   assign(&aux3c[0], &S_c[0],VOLUME/2);  
-   
-   /*  Use the Clenshaw's recursion for the Chebysheff polynomial */
-   for (j=n-1; j>=1; j--) {
-     assign(&svs[0],&ds[0],VOLUME/2);
-     assign(&svc[0],&dc[0],VOLUME/2); 
-       
-     /*     
-     if ( (j%10) == 0 ) {
-  	 sub_low_ev(&aux[0], &d[0]);
-     }
-     else { */
-     assign(&auxs[0], &ds[0], VOLUME/2);
-     assign(&auxc[0], &dc[0], VOLUME/2);
-     /*   } */  
-
-
-     Qtm_pm_ndpsi(&R_s[0], &R_c[0], &auxs[0], &auxc[0]);
-
-     temp1=-1.0;
-     temp2=c[j];
-     assign_mul_add_mul_add_mul_add_mul_r(&ds[0] , &R_s[0], &dds[0], &aux3s[0], fact2, fact1, temp1, temp2,VOLUME/2);
-     assign_mul_add_mul_add_mul_add_mul_r(&dc[0] , &R_c[0], &ddc[0], &aux3c[0], fact2, fact1, temp1, temp2,VOLUME/2);
-     assign(&dds[0], &svs[0],VOLUME/2);
-     assign(&ddc[0], &svc[0],VOLUME/2);
-
-   }
-     
-   /*     sub_low_ev(&R[0],&d[0]);  */ 
-   assign(&R_s[0], &ds[0],VOLUME/2);  
-   assign(&R_c[0], &dc[0],VOLUME/2);  
-
-
-   Qtm_pm_ndpsi(&auxs[0], &auxc[0], &R_s[0], &R_c[0]);
-
-   temp1=-1.0;
-   temp2=c[0]/2;
-   temp3=fact1/2;
-   temp4=fact2/2;
-   assign_mul_add_mul_add_mul_add_mul_r(&auxs[0], &ds[0], &dds[0], &aux3s[0], temp3, temp4, temp1, temp2,VOLUME/2);
-   assign_mul_add_mul_add_mul_add_mul_r(&auxc[0], &dc[0], &ddc[0], &aux3c[0], temp3, temp4, temp1, temp2,VOLUME/2);
-   assign(&R_s[0], &auxs[0],VOLUME/2);
-   assign(&R_c[0], &auxc[0],VOLUME/2);
-     
-   /*     addproj_q_invsqrt(&R[0], &S[0]); */
-    
-   /*
-#ifndef _SOLVER_OUTPUT
-     if(g_proc_id == g_stdio_proc){
-       printf("Order of Chebysheff approximation = %d\n",j); 
-       fflush( stdout);};
-#endif
-   */
-
-    
-   free(svs_);  
-   free(ds_);   
-   free(dds_);  
-   free(auxs_); 
-   free(aux2s_);
-   free(aux3s_);
-   free(svc_);  
-   free(dc_);   
-   free(ddc_);  
-   free(auxc_); 
-   free(aux2c_);
-   free(aux3c_);
-   
-}
-  
-
-
 double cheb_eval(int M, double *c, double s){
 
   double d=0,dd=0, sv, z, z2, res;
@@ -316,9 +159,9 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
   }
 
   /* Here we check the accuracy */
-  QdaggerQ_poly(&auxs[0], &auxc[0], *coefs, degree_of_p, &ss[0], &sc[0]);
+  Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree_of_p, &ss[0], &sc[0]);
   Qtm_pm_ndpsi(&aux2s[0], &aux2c[0], &auxs[0], &auxc[0]);
-  QdaggerQ_poly(&auxs[0], &auxc[0], *coefs, degree_of_p, &aux2s[0], &aux2c[0]);
+  Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree_of_p, &aux2s[0], &aux2c[0]);
 
   diff(&aux2s[0],&auxs[0],&ss[0],VOLUME/2);
   temp=square_norm(&aux2s[0],VOLUME/2, 1)/square_norm(&ss[0],VOLUME/2, 1)/4.0;
diff --git a/chebyshev_polynomial_nd.h b/chebyshev_polynomial_nd.h
index 6edd61fb0..1dda9c1c5 100644
--- a/chebyshev_polynomial_nd.h
+++ b/chebyshev_polynomial_nd.h
@@ -24,8 +24,6 @@ double func(double u, double exponent);
 
 void chebyshev_coefs(double a, double b, double c[], int n, double exponent);
 
-void QdaggerQ_poly(spinor *R_s, spinor *R_c, double *c, int n, spinor *S_s, spinor *S_c);
-
 double cheb_eval(int M, double *c, double s);
 
 void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 092409e18..6805c4d1b 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -47,6 +47,7 @@
 #include "boundary.h"
 #include "phmc.h"
 #include "init_chi_spinor_field.h"
+#include "clover_leaf.h"
 #include "cloverndpoly_monomial.h"
 
 /********************************************
@@ -59,6 +60,19 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
   int j, k;
   monomial * mnl = &monomial_list[id];
 
+  for(int i = 0; i < VOLUME; i++) { 
+    for(int mu = 0; mu < 4; mu++) { 
+      _su3_zero(swm[i][mu]);
+      _su3_zero(swp[i][mu]);
+    }
+  }
+  ndpoly_set_global_parameter(mnl, 0);
+  
+  // we compute the clover term (1 + T_ee(oo)) for all sites x
+  sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
+  // we invert it for the even sites only
+  sw_invert_nd(EE, mnl->mu);
+
 
   /* This factor 2 a missing factor 2 in trace_lambda */
   ndpoly_set_global_parameter(mnl, 0);
@@ -76,9 +90,9 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
   assign(g_chi_dn_spinor_field[0], mnl->pf2, VOLUME/2);
   
   for(k = 1; k < (mnl->MDPolyDegree-1); k++) {
-    Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[k], g_chi_dn_spinor_field[k], 
-			 g_chi_up_spinor_field[k-1], g_chi_dn_spinor_field[k-1], 
-			 mnl->MDPolyRoots[k-1]);
+    Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[k], g_chi_dn_spinor_field[k], 
+			     g_chi_up_spinor_field[k-1], g_chi_dn_spinor_field[k-1], 
+			     mnl->MDPolyRoots[k-1]);
   }
   
   /* Here comes the remaining fields  chi_k ; k=n,...,2n-1  */
@@ -91,20 +105,20 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     assign(g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_up_spinor_field[mnl->MDPolyDegree], VOLUME/2);
     assign(g_chi_dn_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree], VOLUME/2);
     
-    Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], 
-			 g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree-1], 
-			 mnl->MDPolyRoots[2*mnl->MDPolyDegree-3-j]);
+    Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], 
+			     g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree-1], 
+			     mnl->MDPolyRoots[2*mnl->MDPolyDegree-3-j]);
     
     /* Get the even parts of the  (j-1)th  chi_spinors */
-    H_eo_tm_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
-	    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
+    H_eo_sw_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
+		  g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
     
     /* \delta M_eo sandwitched by  chi[j-1]_e^\dagger  and  chi[2N-j]_o */
     deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[mnl->MDPolyDegree], hf, mnl->forcefactor);      /* UP */
     deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[mnl->MDPolyDegree], hf, mnl->forcefactor);    /* DN */
     
     /* Get the even parts of the  (2N-j)-th  chi_spinors */
-    H_eo_tm_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
+    H_eo_sw_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
 	    g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], EO);
     
     /* \delta M_oe sandwitched by  chi[j-1]_o^\dagger  and  chi[2N-j]_e */
@@ -122,10 +136,9 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
 
   ndpoly_set_global_parameter(mnl, 0);
   g_mu3 = 0.;
-  g_c_sw = mnl->c_sw;
   init_sw_fields();
   sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
-  sw_invert(EE, mnl->mu);
+  sw_invert_nd(EE, mnl->mu);
 
   mnl->energy0 = 0.;
   random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, mnl->rngrepro);
@@ -141,23 +154,23 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
     printf("PHMC: OLD Energy  DN + UP %e \n\n", mnl->energy0);
   }
 
-  Qtm_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
+  Qsw_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
 		  g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0]);
   
   for(j = 1; j < (mnl->MDPolyDegree); j++){
     assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
     assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);
     
-    Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
+    Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
 			 g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], 
 			 mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
   }
   Ptilde_ndpsi(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], mnl->PtildeCoefs, 
-		mnl->PtildeDegree, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1]);
+	       mnl->PtildeDegree, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1]);
   
   assign(mnl->pf, g_chi_up_spinor_field[0], VOLUME/2);
   assign(mnl->pf2, g_chi_dn_spinor_field[0], VOLUME/2);
-
+  
   temp = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
   if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
     printf("PHMC: Then: evaluate Norm of pseudofermion heatbath BHB \n ");
@@ -186,10 +199,9 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
 
   ndpoly_set_global_parameter(mnl, 0);
   g_mu3 = 0.;
-  g_c_sw = mnl->c_sw;
   init_sw_fields();
   sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
-  sw_invert(EE, mnl->mu);
+  sw_invert_nd(EE, mnl->mu);
 
   mnl->energy1 = 0.;
   Ener[0] = 0;
@@ -209,7 +221,7 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
 
   for(j = 1; j <= (mnl->MDPolyDegree-1); j++) {
     /* Change this name !!*/
-    Q_tau1_sub_const_ndpsi(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
+    Qsw_tau1_sub_const_ndpsi(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
     
     dummy = up1; up1 = up0; up0 = dummy;
     dummy = dn1; dn1 = dn0; dn0 = dummy;
@@ -240,23 +252,23 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     
     if(j % 2){ /*  Chi[j] = ( Qdag P  Ptilde ) Chi[j-1]  */ 
       Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-		    mnl->PtildeCoefs, mnl->PtildeDegree, 
-		    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
-      QdaggerQ_poly(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
-		    mnl->MDPolyCoefs, mnl->MDPolyDegree, 
-		    g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j]);
-      Qtm_dagger_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-			    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
+		   mnl->PtildeCoefs, mnl->PtildeDegree, 
+		   g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
+      Ptilde_ndpsi(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
+		   mnl->MDPolyCoefs, mnl->MDPolyDegree, 
+		   g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j]);
+      Qsw_dagger_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+		       g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
     }
     else { /*  Chi[j] = ( Ptilde P Q ) Chi[j-1]  */ 
-      Qtm_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-		      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
-      QdaggerQ_poly(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
-		    mnl->MDPolyCoefs, mnl->MDPolyDegree, g_chi_up_spinor_field[j], 
-		    g_chi_dn_spinor_field[j]);
+      Qsw_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
+		g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
+      Ptilde_ndpsi(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
+		   mnl->MDPolyCoefs, mnl->MDPolyDegree, g_chi_up_spinor_field[j], 
+		   g_chi_dn_spinor_field[j]);
       Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-		    mnl->PtildeCoefs, mnl->PtildeDegree, 
-		    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
+		   mnl->PtildeCoefs, mnl->PtildeDegree, 
+		   g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
     }
     
     Ener[j] = Ener[j-1] + Ener[0];
@@ -297,7 +309,6 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   if(g_proc_id == 0 && g_debug_level > 3) {
     printf("called cloverndpoly_acc for id %d %d dH = %1.4e\n", id, g_running_phmc, mnl->energy1 - mnl->energy0);
   }
-  /* END IF PHMC */
   return(mnl->energy1 - mnl->energy0);
 }
 
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index 0dced7b56..a4fd24329 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -327,18 +327,19 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
 	Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 		      mnl->PtildeCoefs, mnl->PtildeDegree, 
 		      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
-	QdaggerQ_poly(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
-		      mnl->MDPolyCoefs, mnl->MDPolyDegree, 
-		      g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j]);
+	Ptilde_ndpsi(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
+		     mnl->MDPolyCoefs, mnl->MDPolyDegree, 
+		     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j]);
+
 	Qtm_dagger_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 			      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
       }
       else { /*  Chi[j] = ( Ptilde P Q ) Chi[j-1]  */ 
 	Qtm_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 			g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
-	QdaggerQ_poly(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
-		      mnl->MDPolyCoefs, mnl->MDPolyDegree, g_chi_up_spinor_field[j], 
-		      g_chi_dn_spinor_field[j]);
+	Ptilde_ndpsi(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
+		     mnl->MDPolyCoefs, mnl->MDPolyDegree, g_chi_up_spinor_field[j], 
+		     g_chi_dn_spinor_field[j]);
 	Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 		      mnl->PtildeCoefs, mnl->PtildeDegree, 
 		      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
@@ -549,6 +550,7 @@ void ndpoly_set_global_parameter(monomial * const mnl, const int exact) {
   g_mubar = mnl->mubar;
   g_epsbar = mnl->epsbar;
   g_kappa = mnl->kappa;
+  g_c_sw = mnl->c_sw;
   boundary(g_kappa);
 
   if (g_epsbar!=0.0 || exact == 0){

From 95f2beadf55b28b019410843f835187f4df9bc6e Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 7 Oct 2012 14:36:54 +0200
Subject: [PATCH 038/110] Ptilde_ndpsi gets operator as argument now so it can
 be re-used for clover

---
 Ptilde_nd.c               | 16 ++++++++--------
 Ptilde_nd.h               |  4 +++-
 chebyshev_polynomial_nd.c |  4 ++--
 cloverndpoly_monomial.c   | 10 +++++-----
 ndpoly_monomial.c         | 10 +++++-----
 reweighting_factor_nd.c   |  3 ++-
 6 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/Ptilde_nd.c b/Ptilde_nd.c
index 02f2fd860..07011a862 100644
--- a/Ptilde_nd.c
+++ b/Ptilde_nd.c
@@ -28,13 +28,13 @@
 # include <mpi.h>
 #endif
 #include "global.h"
-#include "linsolve.h"
 #include "linalg_eo.h"
 #include "start.h"
 #include "tm_operators.h"
 #include "tm_operators_nd.h"
 #include "chebyshev_polynomial_nd.h"
 #include "phmc.h"
+#include "solver/matrix_mult_typedef_nd.h"
 #include "Ptilde_nd.h"
 
 
@@ -104,7 +104,7 @@ void Ptilde_cheb_coefs(double aa, double bb, double dd[], int n, double exponent
  **************************************************************************/
 
 void Ptilde_ndpsi(spinor *R_s, spinor *R_c, double *dd, int n, 
-		  spinor *S_s, spinor *S_c) {
+		  spinor *S_s, spinor *S_c, matrix_mult_nd Qsq) {
   
   int j;
   double fact1, fact2, temp1, temp2, temp3, temp4;
@@ -195,7 +195,7 @@ void Ptilde_ndpsi(spinor *R_s, spinor *R_c, double *dd, int n,
     /*   } */
 
 
-    Qtm_pm_ndpsi(&R_s[0], &R_c[0], &auxs[0], &auxc[0]);
+    Qsq(&R_s[0], &R_c[0], &auxs[0], &auxc[0]);
 
     temp1=-1.0;
     temp2=dd[j];
@@ -208,7 +208,7 @@ void Ptilde_ndpsi(spinor *R_s, spinor *R_c, double *dd, int n,
   assign(&R_s[0], &ds[0],VOLUME/2);
   assign(&R_c[0], &dc[0],VOLUME/2);
 
-  Qtm_pm_ndpsi(&auxs[0], &auxc[0], &R_s[0], &R_c[0]);
+  Qsq(&auxs[0], &auxc[0], &R_s[0], &R_c[0]);
 
   temp1=-1.0;
   temp2=dd[0]/2;
@@ -350,11 +350,11 @@ void degree_of_Ptilde(int * _degree, double ** coefs,
     random_spinor_field(ss,VOLUME/2, 1);
     random_spinor_field(sc,VOLUME/2, 1);
 
-    Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &ss[0], &sc[0]);
-    Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0]);
+    Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &ss[0], &sc[0], &Qtm_pm_ndpsi);
+    Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0], &Qtm_pm_ndpsi);
     Qtm_pm_ndpsi(&auxs[0], &auxc[0], &aux2s[0], &aux2c[0]);
-    Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0]);
-    Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &aux2s[0], &aux2c[0]);
+    Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0], &Qtm_pm_ndpsi);
+    Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &aux2s[0], &aux2c[0], &Qtm_pm_ndpsi);
 
     diff(&aux2s[0],&auxs[0], &ss[0], VOLUME/2);
     temp = square_norm(&aux2s[0], VOLUME/2, 1) / square_norm(&ss[0], VOLUME/2, 1) / 4.0;
diff --git a/Ptilde_nd.h b/Ptilde_nd.h
index 4e5ed76a4..cd51078fc 100644
--- a/Ptilde_nd.h
+++ b/Ptilde_nd.h
@@ -20,12 +20,14 @@
 #ifndef _PTILDE_ND_H
 #define _PTILDE_ND_H
 
+#include "solver/matrix_mult_typedef_nd.h"
 
 double func_tilde(double u, double exponent);
 
 void Ptilde_cheb_coefs(double a, double b, double dd[], int n, double exponent);
 
-void Ptilde_ndpsi(spinor *R_s, spinor *R_c, double *dd, int n, spinor *S_s, spinor *S_c);
+void Ptilde_ndpsi(spinor *R_s, spinor *R_c, double *dd, int n, 
+		  spinor *S_s, spinor *S_c, matrix_mult_nd Qsq);
 
 double chebtilde_eval(int M, double *dd, double s);
 
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index dc5270be8..5f90c1986 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -159,9 +159,9 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
   }
 
   /* Here we check the accuracy */
-  Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree_of_p, &ss[0], &sc[0]);
+  Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree_of_p, &ss[0], &sc[0], &Qtm_pm_ndpsi);
   Qtm_pm_ndpsi(&aux2s[0], &aux2c[0], &auxs[0], &auxc[0]);
-  Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree_of_p, &aux2s[0], &aux2c[0]);
+  Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree_of_p, &aux2s[0], &aux2c[0], &Qtm_pm_ndpsi);
 
   diff(&aux2s[0],&auxs[0],&ss[0],VOLUME/2);
   temp=square_norm(&aux2s[0],VOLUME/2, 1)/square_norm(&ss[0],VOLUME/2, 1)/4.0;
diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 6805c4d1b..08b08d465 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -166,7 +166,7 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
 			 mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
   }
   Ptilde_ndpsi(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], mnl->PtildeCoefs, 
-	       mnl->PtildeDegree, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1]);
+	       mnl->PtildeDegree, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], &Qsw_pm_ndpsi);
   
   assign(mnl->pf, g_chi_up_spinor_field[0], VOLUME/2);
   assign(mnl->pf2, g_chi_dn_spinor_field[0], VOLUME/2);
@@ -253,10 +253,10 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     if(j % 2){ /*  Chi[j] = ( Qdag P  Ptilde ) Chi[j-1]  */ 
       Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 		   mnl->PtildeCoefs, mnl->PtildeDegree, 
-		   g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
+		   g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], &Qsw_pm_ndpsi);
       Ptilde_ndpsi(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
 		   mnl->MDPolyCoefs, mnl->MDPolyDegree, 
-		   g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j]);
+		   g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], &Qsw_pm_ndpsi);
       Qsw_dagger_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 		       g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
     }
@@ -265,10 +265,10 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
 		g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
       Ptilde_ndpsi(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
 		   mnl->MDPolyCoefs, mnl->MDPolyDegree, g_chi_up_spinor_field[j], 
-		   g_chi_dn_spinor_field[j]);
+		   g_chi_dn_spinor_field[j], &Qsw_pm_ndpsi);
       Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 		   mnl->PtildeCoefs, mnl->PtildeDegree, 
-		   g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
+		   g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], &Qsw_pm_ndpsi);
     }
     
     Ener[j] = Ener[j-1] + Ener[0];
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index a4fd24329..88af252e9 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -195,7 +195,7 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
 			mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
     }
     Ptilde_ndpsi(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], mnl->PtildeCoefs, 
-		  mnl->PtildeDegree, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1]);
+		 mnl->PtildeDegree, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], &Qtm_pm_ndpsi);
   } 
   else if( phmc_exact_poly==1 && g_epsbar!=0.0) {
     /* Attention this is Q * tau1, up/dn are exchanged in the input spinor  */
@@ -326,10 +326,10 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
       if(j % 2){ /*  Chi[j] = ( Qdag P  Ptilde ) Chi[j-1]  */ 
 	Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 		      mnl->PtildeCoefs, mnl->PtildeDegree, 
-		      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
+		     g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], &Qtm_pm_ndpsi);
 	Ptilde_ndpsi(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
 		     mnl->MDPolyCoefs, mnl->MDPolyDegree, 
-		     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j]);
+		     g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], &Qtm_pm_ndpsi);
 
 	Qtm_dagger_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 			      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
@@ -339,10 +339,10 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
 			g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
 	Ptilde_ndpsi(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
 		     mnl->MDPolyCoefs, mnl->MDPolyDegree, g_chi_up_spinor_field[j], 
-		     g_chi_dn_spinor_field[j]);
+		     g_chi_dn_spinor_field[j], &Qtm_pm_ndpsi);
 	Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
 		      mnl->PtildeCoefs, mnl->PtildeDegree, 
-		      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
+		      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], &Qtm_pm_ndpsi);
       }
 
       Ener[j] = Ener[j-1] + Ener[0];
diff --git a/reweighting_factor_nd.c b/reweighting_factor_nd.c
index ea68fb9b9..e30fdd380 100644
--- a/reweighting_factor_nd.c
+++ b/reweighting_factor_nd.c
@@ -28,6 +28,7 @@
 #include "linalg_eo.h"
 #include "start.h"
 #include "tm_operators.h"
+#include "tm_operators_nd.h"
 #include "Ptilde_nd.h"
 #include "phmc.h"
 #include "reweighting_factor_nd.h"
@@ -56,7 +57,7 @@ double reweighting_factor_nd(const int N)
     temp1 = phmc_ptilde_cheby_coef[0];
     phmc_ptilde_cheby_coef[0] = temp1 - 1;
 
-    Ptilde_ndpsi(g_chi_up_spinor_field[3], g_chi_dn_spinor_field[3], phmc_ptilde_cheby_coef, phmc_ptilde_n_cheby, g_chi_up_spinor_field[2], g_chi_dn_spinor_field[2]);
+    Ptilde_ndpsi(g_chi_up_spinor_field[3], g_chi_dn_spinor_field[3], phmc_ptilde_cheby_coef, phmc_ptilde_n_cheby, g_chi_up_spinor_field[2], g_chi_dn_spinor_field[2], &Qtm_pm_ndpsi);
 
     phmc_ptilde_cheby_coef[0] = temp1;
 

From a7a09b978cbddb476b47a78027fa4c743d787536 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 7 Oct 2012 16:57:15 +0200
Subject: [PATCH 039/110] eigenvalues_bi moved to subdir solver

---
 max_eigenvalues_bi.c                        | 267 --------------------
 max_eigenvalues_bi.h                        |  31 ---
 phmc.c                                      |   2 +-
 eigenvalues_bi.c => solver/eigenvalues_bi.c |   0
 eigenvalues_bi.h => solver/eigenvalues_bi.h |   0
 test/test_eigenvalues.c                     |  22 +-
 6 files changed, 10 insertions(+), 312 deletions(-)
 delete mode 100644 max_eigenvalues_bi.c
 delete mode 100644 max_eigenvalues_bi.h
 rename eigenvalues_bi.c => solver/eigenvalues_bi.c (100%)
 rename eigenvalues_bi.h => solver/eigenvalues_bi.h (100%)

diff --git a/max_eigenvalues_bi.c b/max_eigenvalues_bi.c
deleted file mode 100644
index eccab3623..000000000
--- a/max_eigenvalues_bi.c
+++ /dev/null
@@ -1,267 +0,0 @@
-/***********************************************************************
- *
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- *
- * Here we compute the nr_of_eigenvalues highest eigenvalues
- * of (gamma5*D)^2. Therefore we use the arnoldi routines.
- * 
- * The computed eigenvalues are stored in g_eigenvalues
- * and the computed eigenvectors in g_ev
- * 
- * inout:
- *   nr_of_eigenvalues:      input:  Number of eigenvalues to compute
- *                           output: Number of computed eigenvalues
- * input:
- *   crylov_space_dimension: Dimension of crylov space dimension
- *                           to be used in the arnoldi routines
- *   operator_flag:          Choose if we want to use D_Wilson
- *                           or D_Overlap
- *
- * Autor: Thomas Chiarappa
- *        Thomas.Chiarappa@mib.infn.it
- *
- *******************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include "global.h"
-#include "su3.h"
-#include "linalg_eo.h"
-#include "start.h"
-#include "tm_operators.h"
-#include "solver/solver.h"
-#include "solver/jdher_bi.h"
-#ifdef MPI
-#include "solver/pjdher_bi.h"
-#endif
-#include "max_eigenvalues_bi.h"
-#include "tm_operators_nd.h"
-#include "gettime.h"
-
-/* Needed only if you want to create an EV-file
-#include "rw_ev.h"
-#include "read_manip.h"
-*/
-
-/*********************************************************
- *
- * We need here another function Qsqr_psi, representing
- * (gamma5*D)^2, because this is used in the CG solver
- *
- * It is not identical to Q_sqr_psi and not externally
- * accessible.
- *
- *********************************************************/
-
-bispinor  *max_evs = NULL;
-double * max_evls = NULL;
-/*
-int eigenvalues_for_cg_computed = 0;
-*/
-
-
-double max_eigenvalues_bi(int * nr_of_eigenvalues, const int operator_flag, 
-		 const int max_iterations, const double precision) {
-
-
-
-  static bispinor * max_evs_ = NULL;
-  static int allocated = 0;
-  bispinor  *temp_field, *temp_field_ = NULL, *aux, *aux_ = NULL;
-  bispinor *copy_ev_, *copy_ev;
-
-  /**********************
-   * For Jacobi-Davidson 
-   **********************/
-  /* OLD VALUES HERE
-  int verbosity = 5;
-  */  
-  int verbosity = g_debug_level, converged = 0, blocksize = 1, blockwise = 0;
-  int solver_it_max = 50, j_max, j_min; 
-  /*int it_max = 10000;*/
-  complex *eigv_ = NULL, *eigv;
-  double decay_min = 1.7, decay_max = 1.5, prec,
-    threshold_min = 1.e-3, threshold_max = 5.e-2;
-  static int v0dim = 0;
-
-  /**********************
-   * General variables
-   **********************/
-  int returncode=0;
-
-  int i, iVol, ix;
-
-  FILE *conf_bifile=NULL;
-  char * filename = NULL;
-  char conf_bifilename[50];
-
-  double ev_time=0.0, av_time=0.0;
-  
-  filename = calloc(200, sizeof(char));
-  /*  strcpy(filename,optarg);*/
-
-
-
-  if(g_proc_id == g_stdio_proc) printf("\nNumber of highest eigenvalues to compute = %d\n\n",(*nr_of_eigenvalues));
-  /*
-  eigenvalues_for_cg_computed = 1;
-  */
-
-  if(g_proc_id == g_stdio_proc) printf("Using Jacobi-Davidson method! \n");
-  if((*nr_of_eigenvalues) < 8){
-    j_max = 15;
-    j_min = 8;
-  }
-  else{
-    j_max = 2*(*nr_of_eigenvalues);
-    j_min = (*nr_of_eigenvalues);
-  }
-  /* RELAXED ACCURACY
-  if(precision < 1.e-14){
-    prec = 1.e-14;
-  }
-  else{
-  */
-    prec = precision;
-  /* REMEMBER TO CLOSE THE BRACKETS 
-  }
-  */
-/*  g_mu = 0.00343; */
-/*   prec = 1.e-10; */
-  if(allocated == 0) {
-    allocated = 1;
-#if (defined SSE || defined SSE2 || defined SSE3)
-    max_evs_ = calloc((VOLUME)/2*(*nr_of_eigenvalues)+1, sizeof(bispinor)); 
-    max_evs = (bispinor *)(((unsigned long int)(max_evs_)+ALIGN_BASE)&~ALIGN_BASE);
-    copy_ev_ = calloc((VOLUME)/2*(*nr_of_eigenvalues)+1, sizeof(bispinor)); 
-    copy_ev = (bispinor *)(((unsigned long int)(copy_ev_)+ALIGN_BASE)&~ALIGN_BASE);
-    /*
-    temp_field_ = calloc((VOLUMEPLUSRAND)/2+1, sizeof(spinor));
-    temp_field = (spinor *)(((unsigned long int)(temp_field_)+ALIGN_BASE)&~ALIGN_BASE);
-
-    aux_ = calloc((VOLUMEPLUSRAND)/2+1, sizeof(spinor));
-    aux = (spinor *)(((unsigned long int)(aux_)+ALIGN_BASE)&~ALIGN_BASE);
-    */
-
-    temp_field_ = calloc((VOLUME)/2+1, sizeof(bispinor));
-    temp_field = (bispinor *)(((unsigned long int)(temp_field_)+ALIGN_BASE)&~ALIGN_BASE);
-
-    aux_ = calloc((VOLUME)/2+1, sizeof(bispinor));
-    aux = (bispinor *)(((unsigned long int)(aux_)+ALIGN_BASE)&~ALIGN_BASE);
-#else
-
-    max_evs_= calloc((VOLUME)/2*(*nr_of_eigenvalues), sizeof(bispinor));
-    copy_ev_= calloc((VOLUME)/2*(*nr_of_eigenvalues), sizeof(bispinor));
-
-    temp_field_ = calloc((VOLUME)/2, sizeof(bispinor));
-    aux_ = calloc((VOLUME)/2, sizeof(bispinor));
-
-    max_evs = max_evs_;
-    copy_ev = copy_ev_;
-    temp_field = temp_field_;
-    aux = aux_;
-#endif
-    max_evls = (double*)malloc((*nr_of_eigenvalues)*sizeof(double));
-  }
-
-  /* compute maximal eigenvalues */
-
-  if(g_proc_id==0) {
-
-    printf(" Values of   mu = %e     mubar = %e     eps = %e     precision = %e  \n \n", g_mu, g_mubar, g_epsbar, precision);
-
-  }
-
-  av_time = gettime();
-
-  DeltaTcd = 0.0;
-  DeltaTtot = 0.0;
- 
-  /*  Come secondo argomento, originariamente c`era 
-      (VOLUMEPLUSRAND)/2*sizeof(spinor)/sizeof(complex),
-  */
-
-  /*  THE VALUE IN THE SECOND LINE WAS 
-       0 FOR MINIMAL EW , SET TO 50 FOR MAXIMAL EW
-  */
-
-#ifdef MPI
-  pjdher((VOLUME)/2*sizeof(bispinor)/sizeof(complex), (VOLUME)/2*sizeof(bispinor)/sizeof(complex),
-	 50., prec, 
-	 (*nr_of_eigenvalues), j_max, j_min, 
-	 max_iterations, blocksize, blockwise, v0dim, (complex*) max_evs,
-	 CG, solver_it_max,
-	 threshold_max, decay_max, verbosity,
-	 &converged, (complex*) max_evs, max_evls,
-	 &returncode, JD_MAXIMAL, 1,
-	 &Qtm_pm_ndbipsi);
-
-	/* IN THE LAST LINE, INSERT:
-             Qtm_pm_ndbipsi;   Non-degenerate case - on 1 bispinor 
-             Qtm_pm_ndpsi;      Non-degenerate case - on 2 spinors 
-             Qtm_pm_psi;        Degenerate case  -  on 1 spinor 
-	*/
-
-#else
-  jdher((VOLUME)/2*sizeof(bispinor)/sizeof(complex),
-        50., prec, 
-	(*nr_of_eigenvalues), j_max, j_min, 
-	max_iterations, blocksize, blockwise, v0dim, (complex*) max_evs,
-	BICGSTAB, solver_it_max,
-	threshold_max, decay_max, verbosity,
-	&converged, (complex*) max_evs, max_evls,
-	&returncode, JD_MAXIMAL, 1,
-	&Qtm_pm_ndbipsi);
-
-	/* IN THE LAST LINE, INSERT:
-             Qtm_pm_ndbipsi;   Non-degenerate case - on 1 bispinor 
-             Qtm_pm_ndpsi;      Non-degenerate case - on 2 spinors 
-             Qtm_pm_psi;        Degenerate case  -  on 1 spinor 
-	*/
-
-#endif
-
-  (*nr_of_eigenvalues) = converged;
-  v0dim = converged;
-
-  /*
-  printf(" Largest EV = %22.15e  \n", max_evls[0]);
-  */
-
-  ev_time = gettime();
-
-  DeltaTev = (ev_time - av_time);
-
-  if(g_proc_id==0) {
-    printf(" \n Now in maximal EW computation \n \n");
-
-    printf(" \n Elapsed time for comp-decomp in Q_Qdag_nd_bi =  %f \n", DeltaTcd);
-
-    printf(" \n Total elapsed time in Q_Qdag_nd_bi =  %f \n", DeltaTtot);
-
-    printf(" Number of S Matrix applications = %d \n", counter_Spsi);
-    printf(" \n Total elapsed time in Eigenvalues computation =  %f \n", DeltaTev);
-  }
-
-  free(max_evls);
-  return(max_evls[0]);
-}
diff --git a/max_eigenvalues_bi.h b/max_eigenvalues_bi.h
deleted file mode 100644
index fea3eb12b..000000000
--- a/max_eigenvalues_bi.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/***********************************************************************
- *
- * Copyright (C) 2006 Thomas Chiarappa
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifndef _MAX_EIGENVALUES_BI_H
-#define _MAX_EIGENVALUES_BI_H
-
-extern bispinor * max_evs;
-extern double * max_evls;
-/*
-extern int eigenvalues_for_cg_computed;
-*/
-double max_eigenvalues_bi(int * nev, const int operator_flag, const int max_iterations, const double prec);
-
-#endif
diff --git a/phmc.c b/phmc.c
index 094e5e397..0e162f77b 100644
--- a/phmc.c
+++ b/phmc.c
@@ -29,7 +29,7 @@
 
 #include "read_input.h"
 #include "init_bispinor_field.h"
-#include "eigenvalues_bi.h"
+#include "solver/eigenvalues_bi.h"
 #include "solver/solver.h"
 #include "init_chi_spinor_field.h"
 #include "chebyshev_polynomial_nd.h"
diff --git a/eigenvalues_bi.c b/solver/eigenvalues_bi.c
similarity index 100%
rename from eigenvalues_bi.c
rename to solver/eigenvalues_bi.c
diff --git a/eigenvalues_bi.h b/solver/eigenvalues_bi.h
similarity index 100%
rename from eigenvalues_bi.h
rename to solver/eigenvalues_bi.h
diff --git a/test/test_eigenvalues.c b/test/test_eigenvalues.c
index f2009c66e..dbb213d29 100644
--- a/test/test_eigenvalues.c
+++ b/test/test_eigenvalues.c
@@ -15,17 +15,14 @@
  * 
  * You should have received a copy of the GNU General Public License
  * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-/*******************************************************************************
-*
-* File hybrid.c
-*
-* Main for testing the Eigenvalues computation using bispinors 
-*
-* Author: Thomas Chiarappa
-*         Thomas.Chiarappa@mib.infn.it
-*
-*******************************************************************************/
+ *
+ *
+ * Main for testing the Eigenvalues computation using bispinors 
+ *
+ * Author: Thomas Chiarappa
+ *         Thomas.Chiarappa@mib.infn.it
+ *
+ *******************************************************************************/
 
 #define MAIN_PROGRAM
 
@@ -71,8 +68,7 @@
 #include "boundary.h"
 #include "polyakov_loop.h"
 
-#include "eigenvalues_bi.h"
-#include "max_eigenvalues_bi.h"
+#include "solver/eigenvalues_bi.h"
 
 char * Version = "2.3.5";
 

From edf5aed769cb53c0f21292de1bd37328eb40661c Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 7 Oct 2012 16:57:50 +0200
Subject: [PATCH 040/110] eigenvalues_bi moved to subdir solver

---
 Makefile.in        | 2 +-
 solver/Makefile.in | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 5c9bac305..b3055f064 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -59,7 +59,7 @@ MODULES = read_input gamma hybrid_update measure_gauge_action start \
 	tm_operators_nd nddetratio_monomial \
 	chebyshev_polynomial_nd Ptilde_nd  \
 	init_chi_spinor_field reweighting_factor_nd \
-	init_bispinor_field eigenvalues_bi D_psi \
+	init_bispinor_field D_psi \
 	xchange_lexicfield xchange_2fields online_measurement \
 	monomial det_monomial detratio_monomial update_momenta \
 	integrator gauge_monomial ndpoly_monomial phmc \
diff --git a/solver/Makefile.in b/solver/Makefile.in
index 9a7ef1b30..708eee4bc 100644
--- a/solver/Makefile.in
+++ b/solver/Makefile.in
@@ -34,7 +34,7 @@ libsolver_TARGETS = bicgstab_complex gmres \
 	            bicgstabell bicgstab2 eigenvalues fgmres \
 	            gcr gcr4complex diagonalise_general_matrix \
 	            quicksort gmres_dr lu_solve jdher Msap \
-                    jdher_bi gram-schmidt \
+                    jdher_bi gram-schmidt eigenvalues_bi \
                     bicgstab_complex_bi cg_her_bi pcg_her \
                     sub_low_ev cg_her_nd poly_precon \
                     generate_dfl_subspace dfl_projector \

From 9cf3146c7221b736f4366b9ba3b696b0f7e04851 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 7 Oct 2012 17:12:34 +0200
Subject: [PATCH 041/110] polynomial functions using function pointer now

---
 Ptilde_nd.c               | 13 ++++---
 Ptilde_nd.h               |  3 +-
 chebyshev_polynomial_nd.c |  9 +++--
 chebyshev_polynomial_nd.h |  4 +-
 clover_leaf.h             |  3 --
 ndpoly_monomial.c         | 35 +++++++++---------
 phmc.c                    |  9 +++--
 solver/eigenvalues_bi.c   | 24 +++++-------
 solver/eigenvalues_bi.h   |  5 ++-
 tm_operators_nd.c         | 78 ++++++++++++++++++++++++++++++++++++++-
 tm_operators_nd.h         |  7 +++-
 11 files changed, 136 insertions(+), 54 deletions(-)

diff --git a/Ptilde_nd.c b/Ptilde_nd.c
index 07011a862..ae9e690f6 100644
--- a/Ptilde_nd.c
+++ b/Ptilde_nd.c
@@ -268,7 +268,8 @@ double chebtilde_eval(int M, double *dd, double s){
 
 void degree_of_Ptilde(int * _degree, double ** coefs,
 		      const double EVMin, const double EVMax,
-		      const int sloppy_degree, const double acc) {
+		      const int sloppy_degree, const double acc, 
+		      matrix_mult_nd Qsq) {
   int i, j;
   double temp, temp2;
   int degree;
@@ -350,11 +351,11 @@ void degree_of_Ptilde(int * _degree, double ** coefs,
     random_spinor_field(ss,VOLUME/2, 1);
     random_spinor_field(sc,VOLUME/2, 1);
 
-    Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &ss[0], &sc[0], &Qtm_pm_ndpsi);
-    Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0], &Qtm_pm_ndpsi);
-    Qtm_pm_ndpsi(&auxs[0], &auxc[0], &aux2s[0], &aux2c[0]);
-    Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0], &Qtm_pm_ndpsi);
-    Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &aux2s[0], &aux2c[0], &Qtm_pm_ndpsi);
+    Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &ss[0], &sc[0], Qsq);
+    Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0], Qsq);
+    Qsq(&auxs[0], &auxc[0], &aux2s[0], &aux2c[0]);
+    Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0], Qsq);
+    Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &aux2s[0], &aux2c[0], Qsq);
 
     diff(&aux2s[0],&auxs[0], &ss[0], VOLUME/2);
     temp = square_norm(&aux2s[0], VOLUME/2, 1) / square_norm(&ss[0], VOLUME/2, 1) / 4.0;
diff --git a/Ptilde_nd.h b/Ptilde_nd.h
index cd51078fc..438777220 100644
--- a/Ptilde_nd.h
+++ b/Ptilde_nd.h
@@ -33,6 +33,7 @@ double chebtilde_eval(int M, double *dd, double s);
 
 void degree_of_Ptilde(int * _degree, double ** coefs, 
 		      const double EVMin, const double EVMax,
-		      const int sloppy_degree, const double acc);
+		      const int sloppy_degree, const double acc,
+		      matrix_mult_nd Qsw);
 
 #endif
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index 5f90c1986..f9d83601a 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -109,7 +109,8 @@ double cheb_eval(int M, double *c, double s){
 
 
 void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
-			     const double EVMin, const double EVMax) { 
+			     const double EVMin, const double EVMax,
+			     matrix_mult_nd Qsq) { 
   int j;
   double temp, temp2;
   int degree_of_p = *_degree_of_p + 1;
@@ -159,9 +160,9 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
   }
 
   /* Here we check the accuracy */
-  Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree_of_p, &ss[0], &sc[0], &Qtm_pm_ndpsi);
-  Qtm_pm_ndpsi(&aux2s[0], &aux2c[0], &auxs[0], &auxc[0]);
-  Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree_of_p, &aux2s[0], &aux2c[0], &Qtm_pm_ndpsi);
+  Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree_of_p, &ss[0], &sc[0], Qsq);
+  Qsq(&aux2s[0], &aux2c[0], &auxs[0], &auxc[0]);
+  Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree_of_p, &aux2s[0], &aux2c[0], Qsq);
 
   diff(&aux2s[0],&auxs[0],&ss[0],VOLUME/2);
   temp=square_norm(&aux2s[0],VOLUME/2, 1)/square_norm(&ss[0],VOLUME/2, 1)/4.0;
diff --git a/chebyshev_polynomial_nd.h b/chebyshev_polynomial_nd.h
index 1dda9c1c5..438ca1898 100644
--- a/chebyshev_polynomial_nd.h
+++ b/chebyshev_polynomial_nd.h
@@ -19,6 +19,7 @@
 #ifndef _CHEBYSHEV_POLYNOMIAL_ND_H
 #define _CHEBYSHEV_POLYNOMIAL_ND_H
 
+#include "solver/matrix_mult_typedef_nd.h"
 
 double func(double u, double exponent);
 
@@ -27,6 +28,7 @@ void chebyshev_coefs(double a, double b, double c[], int n, double exponent);
 double cheb_eval(int M, double *c, double s);
 
 void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
-			     const double EVMin, const double EVMax);
+			     const double EVMin, const double EVMax,
+			     matrix_mult_nd Qsq);
 
 #endif
diff --git a/clover_leaf.h b/clover_leaf.h
index 87454fccf..71881e4ad 100644
--- a/clover_leaf.h
+++ b/clover_leaf.h
@@ -36,7 +36,4 @@ void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll);
 void sw_all(hamiltonian_field_t * const hf, const double kappa, const double c_sw);
 int init_swpm(const int V);
 
-double sw_trace_nd(const int ieo, const double mu, const double eps);
-void sw_invert_nd(const double mshift);
-
 #endif
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index 88af252e9..1054d1f67 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -78,8 +78,8 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 
     for(k = 1; k < (mnl->MDPolyDegree-1); k++) {
       Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[k], g_chi_dn_spinor_field[k], 
-			   g_chi_up_spinor_field[k-1], g_chi_dn_spinor_field[k-1], 
-			   mnl->MDPolyRoots[k-1]);
+			     g_chi_up_spinor_field[k-1], g_chi_dn_spinor_field[k-1], 
+			     mnl->MDPolyRoots[k-1]);
     }
     
     /* Here comes the remaining fields  chi_k ; k=n,...,2n-1  */
@@ -93,12 +93,12 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
       assign(g_chi_dn_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree], VOLUME/2);
       
       Q_tau1_sub_const_ndpsi(g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], 
-			   g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree-1], 
-			   mnl->MDPolyRoots[2*mnl->MDPolyDegree-3-j]);
+			     g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree-1], 
+			     mnl->MDPolyRoots[2*mnl->MDPolyDegree-3-j]);
       
       /* Get the even parts of the  (j-1)th  chi_spinors */
       H_eo_tm_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
-	      g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
+		    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
       
       /* \delta M_eo sandwitched by  chi[j-1]_e^\dagger  and  chi[2N-j]_o */
       deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[phmc_dop_n_cheby], hf, mnl->forcefactor);      /* UP */
@@ -106,7 +106,7 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
       
       /* Get the even parts of the  (2N-j)-th  chi_spinors */
       H_eo_tm_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
-	      g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], EO);
+		    g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], EO);
       
       /* \delta M_oe sandwitched by  chi[j-1]_o^\dagger  and  chi[2N-j]_e */
       deriv_Sb(OE, g_chi_up_spinor_field[j-1], mnl->w_fields[0], hf, mnl->forcefactor);
@@ -119,28 +119,28 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     assign(g_chi_up_spinor_field[0], mnl->pf, VOLUME/2);
     for(k = 1; k < (mnl->MDPolyDegree-1); k++) {
       Qtm_pm_sub_const_nrm_psi(g_chi_up_spinor_field[k],
-			    g_chi_up_spinor_field[k-1], 
-			    mnl->MDPolyRoots[k-1]);
+			       g_chi_up_spinor_field[k-1], 
+			       mnl->MDPolyRoots[k-1]);
     }
     assign(g_chi_up_spinor_field[mnl->MDPolyDegree],
 	   g_chi_up_spinor_field[mnl->MDPolyDegree-2], VOLUME/2);
-
+    
     for(j = (mnl->MDPolyDegree-1); j >= 1; j--) {
       assign(g_chi_up_spinor_field[mnl->MDPolyDegree-1],
 	     g_chi_up_spinor_field[mnl->MDPolyDegree], VOLUME/2);
-
+      
       Qtm_pm_sub_const_nrm_psi(g_chi_up_spinor_field[mnl->MDPolyDegree], 
-			   g_chi_up_spinor_field[mnl->MDPolyDegree-1],
-			   mnl->MDPolyRoots[2*mnl->MDPolyDegree-3-j]);
-
+			       g_chi_up_spinor_field[mnl->MDPolyDegree-1],
+			       mnl->MDPolyRoots[2*mnl->MDPolyDegree-3-j]);
+      
       Qtm_minus_psi(mnl->w_fields[3],g_chi_up_spinor_field[j-1]); 
-
+      
       H_eo_tm_inv_psi(mnl->w_fields[2], g_chi_up_spinor_field[phmc_dop_n_cheby], EO, -1.);
       deriv_Sb(OE, mnl->w_fields[3], mnl->w_fields[2], hf, mnl->forcefactor); 
       
       H_eo_tm_inv_psi(mnl->w_fields[2], mnl->w_fields[3], EO, 1.); 
       deriv_Sb(EO, mnl->w_fields[2], g_chi_up_spinor_field[phmc_dop_n_cheby], hf, mnl->forcefactor);
-
+      
       Qtm_minus_psi(mnl->w_fields[3],g_chi_up_spinor_field[mnl->MDPolyDegree]); 
 
       H_eo_tm_inv_psi(mnl->w_fields[2],mnl->w_fields[3], EO, +1.);
@@ -475,7 +475,8 @@ int init_ndpoly_monomial(const int id) {
 
   /* Here we prepare the less precise MD polynomial first   */
   degree_of_polynomial_nd(&mnl->MDPolyDegree, &mnl->MDPolyCoefs,
-			  mnl->EVMin, mnl->EVMax);
+			  mnl->EVMin, mnl->EVMax,
+			  Qtm_pm_ndpsi);
   phmc_dop_n_cheby = mnl->MDPolyDegree;
   phmc_dop_cheby_coef = mnl->MDPolyCoefs;
   if((g_proc_id == 0) && (g_debug_level > 1)) {
@@ -496,7 +497,7 @@ int init_ndpoly_monomial(const int id) {
   /* Here we prepare the precise polynomial Ptilde */
   degree_of_Ptilde(&mnl->PtildeDegree, &mnl->PtildeCoefs, 
 		   mnl->EVMin, mnl->EVMax, mnl->MDPolyDegree, 
-		   mnl->PrecisionPtilde);
+		   mnl->PrecisionPtilde, &Qtm_pm_ndpsi);
   phmc_ptilde_cheby_coef = mnl->PtildeCoefs;
   phmc_ptilde_n_cheby = mnl->PtildeDegree;
 
diff --git a/phmc.c b/phmc.c
index 0e162f77b..f11f08b20 100644
--- a/phmc.c
+++ b/phmc.c
@@ -34,6 +34,7 @@
 #include "init_chi_spinor_field.h"
 #include "chebyshev_polynomial_nd.h"
 #include "Ptilde_nd.h"
+#include "tm_operators_nd.h"
 #include "phmc.h"
 #include "monomial.h"
 #include "gettime.h"
@@ -81,14 +82,14 @@ void init_phmc() {
     
     no_eigenvalues = 10;   /* Number of lowest eigenvalues to be computed */
     if(g_epsbar!=0.0)
-      phmc_cheb_evmin = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0);
+      phmc_cheb_evmin = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, &Qtm_pm_ndbipsi);
     else {
       phmc_cheb_evmin = eigenvalues(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, 0, nstore, even_odd_flag);
     }
 
     no_eigenvalues = 4;   /* Number of highest eigenvalues to be computed */
     if(g_epsbar!=0.0)
-      phmc_cheb_evmax = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1);
+      phmc_cheb_evmax = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, &Qtm_pm_ndbipsi);
     else
       phmc_cheb_evmax = eigenvalues(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, 0, nstore, even_odd_flag);
        
@@ -220,13 +221,13 @@ void phmc_compute_ev(const int trajectory_counter,
   no_eigenvalues = 1;
 
   if(g_epsbar!=0.0)
-    temp = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0);
+    temp = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, &Qtm_pm_ndbipsi);
   else
     temp = eigenvalues(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, 0, nstore, even_odd_flag);
   
   no_eigenvalues = 1;
   if(g_epsbar!=0.0)
-    temp2 = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1);
+    temp2 = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, &Qtm_pm_ndbipsi);
   else
     temp2 = eigenvalues(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, 0, nstore, even_odd_flag);
   
diff --git a/solver/eigenvalues_bi.c b/solver/eigenvalues_bi.c
index 5d6e9767b..e2d387bde 100644
--- a/solver/eigenvalues_bi.c
+++ b/solver/eigenvalues_bi.c
@@ -50,13 +50,14 @@
 #include "tm_operators.h"
 #include "solver/solver.h"
 #include "solver/jdher_bi.h"
+#include "solver/matrix_mult_typedef_bi.h"
 #include "eigenvalues_bi.h"
 #include "tm_operators_nd.h"
 
 
 double eigenvalues_bi(int * nr_of_eigenvalues,  
 		      const int max_iterations, const double precision,
-		      const int maxmin) {
+		      const int maxmin, matrix_mult_bi Qsq) {
 
 
   static bispinor * eigenvectors_bi_ = NULL;
@@ -78,11 +79,6 @@ double eigenvalues_bi(int * nr_of_eigenvalues,
    * General variables
    **********************/
   int returncode=0;
-  char * filename = NULL;
-
-  
-  filename = calloc(200, sizeof(char));
-  /*  strcpy(filename,optarg);*/
 
   if(maxmin == JD_MINIMAL) {
     startvalue = 0.;
@@ -140,14 +136,14 @@ double eigenvalues_bi(int * nr_of_eigenvalues,
   /* conversion to non _bi fields which are subject to xchange_fields   */
   /* so _bi fields do not need boundary                                 */
   jdher_bi((VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double), (VOLUME)/2*sizeof(bispinor)/sizeof(_Complex double),
-	    startvalue, prec, 
-	    (*nr_of_eigenvalues), j_max, j_min, 
-	    max_iterations, blocksize, blockwise, v0dim, (_Complex double*) eigenvectors_bi,
-	    BICGSTAB, solver_it_max,
-	    threshold, decay, verbosity,
-	    &converged, (_Complex double*) eigenvectors_bi, eigenvls_bi,
-	    &returncode, maxmin, 1,
-	    &Qtm_pm_ndbipsi);
+	   startvalue, prec, 
+	   (*nr_of_eigenvalues), j_max, j_min, 
+	   max_iterations, blocksize, blockwise, v0dim, (_Complex double*) eigenvectors_bi,
+	   BICGSTAB, solver_it_max,
+	   threshold, decay, verbosity,
+	   &converged, (_Complex double*) eigenvectors_bi, eigenvls_bi,
+	   &returncode, maxmin, 1,
+	   Qsq);
   
   *nr_of_eigenvalues = converged;
 
diff --git a/solver/eigenvalues_bi.h b/solver/eigenvalues_bi.h
index d3f228360..1245063a5 100644
--- a/solver/eigenvalues_bi.h
+++ b/solver/eigenvalues_bi.h
@@ -19,7 +19,10 @@
 #ifndef _EIGENVALUES_BI_H
 #define _EIGENVALUES_BI_H
 
+#include "matrix_mult_typedef_bi.h"
+
 double eigenvalues_bi(int * nev, const int max_iterations, 
-		      const double prec, const int maxmin);
+		      const double prec, const int maxmin,
+		      matrix_mult_bi Qsq);
 
 #endif
diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index 9e9a8abdd..263cc4554 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -311,7 +311,7 @@ void Qsw_pm_ndpsi(spinor * const l_strange, spinor * const l_charm,
  ******************************************/
 void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
 			    spinor * const k_strange, spinor * const k_charm, 
-			    const _Complex double z){
+			    const _Complex double z) {
 
   spinor *r, *s;
   su3_vector ALIGN phi1;
@@ -384,6 +384,82 @@ void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
   return;
 }
 
+void Qsw_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
+			      spinor * const k_strange, spinor * const k_charm, 
+			      const _Complex double z) {
+
+  spinor *r, *s;
+  su3_vector ALIGN phi1;
+
+  /*   tau_1   inverts the   k_charm  <->  k_strange   spinors */
+  /*  Apply first  Qhat(2x2)  and finally substract the constant  */
+
+  /* Here the  M_oe Mee^-1 M_eo  implementation  */
+
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
+
+  assign_mul_one_sw_pm_imu_eps(EE, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3], 
+			       g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], -g_mubar, g_epsbar);
+  clover_inv_nd(EE, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3]);
+
+  Hopping_Matrix(OE, l_strange, g_spinor_field[DUM_MATRIX+3]);
+  Hopping_Matrix(OE, l_charm, g_spinor_field[DUM_MATRIX+2]);
+
+  /* Here the M_oo  implementation  */
+  clover_gamma5_nd(OO, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], 
+  		   k_charm, k_strange,
+  		   l_strange, l_charm,
+  		   g_mubar, -g_epsbar);
+
+  /* At the end, the normalisation by the max. eigenvalue  */
+  mul_r(l_strange, phmc_invmaxev, g_spinor_field[DUM_MATRIX], VOLUME/2);
+  mul_r(l_charm, phmc_invmaxev, g_spinor_field[DUM_MATRIX+1], VOLUME/2);
+
+  /* Finally, we add k to l and multiply all */
+  /* by the constant  phmc_Cpol  */
+  /* which renders the polynomial in monomials  */
+  /* identical to the polynomial a la clenshaw */;
+#ifdef OMP
+#pragma omp parallel for private(r) private(s) private(phi1)
+#endif
+  for(int ix = 0; ix < (VOLUME/2); ix++){
+
+    r=l_strange + ix;
+    s=k_strange + ix;
+    
+    _complex_times_vector(phi1, z, s->s0);
+    _vector_sub_assign(r->s0, phi1);
+    _vector_mul(r->s0, phmc_Cpol, r->s0);
+    _complex_times_vector(phi1, z, s->s1);
+    _vector_sub_assign(r->s1, phi1);
+    _vector_mul(r->s1, phmc_Cpol, r->s1);
+    _complex_times_vector(phi1, z, s->s2);
+    _vector_sub_assign(r->s2, phi1);
+    _vector_mul(r->s2, phmc_Cpol, r->s2);
+    _complex_times_vector(phi1, z, s->s3);
+    _vector_sub_assign(r->s3, phi1);
+    _vector_mul(r->s3, phmc_Cpol, r->s3);
+
+    r=l_charm + ix;
+    s=k_charm + ix;
+    
+    _complex_times_vector(phi1, z, s->s0);
+    _vector_sub_assign(r->s0, phi1);
+    _vector_mul(r->s0, phmc_Cpol, r->s0);
+    _complex_times_vector(phi1, z, s->s1);
+    _vector_sub_assign(r->s1, phi1);
+    _vector_mul(r->s1, phmc_Cpol, r->s1);
+    _complex_times_vector(phi1, z, s->s2);
+    _vector_sub_assign(r->s2, phi1);
+    _vector_mul(r->s2, phmc_Cpol, r->s2);
+    _complex_times_vector(phi1, z, s->s3);
+    _vector_sub_assign(r->s3, phi1);    
+    _vector_mul(r->s3, phmc_Cpol, r->s3);
+  }
+  return;
+}
+
 
 
 
diff --git a/tm_operators_nd.h b/tm_operators_nd.h
index d48c85c7c..38e3983be 100644
--- a/tm_operators_nd.h
+++ b/tm_operators_nd.h
@@ -45,8 +45,11 @@ void Qtm_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k);
 void Qsw_pm_ndbipsi(bispinor * const bisp_l, bispinor * const bisp_k);
 
 void Q_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
-                       spinor * const k_strange, spinor * const k_charm, 
-                       const _Complex double z);
+			    spinor * const k_strange, spinor * const k_charm, 
+			    const _Complex double z);
+void Qsw_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
+			      spinor * const k_strange, spinor * const k_charm, 
+			      const _Complex double z);
 
 void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm, 
              spinor * const k_strange, spinor * const k_charm, 

From 185d1cf0e73e7901656756e64f6ded57a19a972d Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 7 Oct 2012 17:34:52 +0200
Subject: [PATCH 042/110] introduced NDCLOVER monomial, not tested yet

---
 Makefile.in             |  2 +-
 cloverndpoly_monomial.c | 12 +++++++-----
 doc/input.tex           |  7 ++++++-
 monomial.c              |  8 ++++++++
 monomial.h              |  2 ++
 read_input.l            | 22 +++++++++++++++++-----
 tm_operators_nd.c       |  2 +-
 7 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index b3055f064..bbd4323b0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -66,7 +66,7 @@ MODULES = read_input gamma hybrid_update measure_gauge_action start \
 	clover_trlog_monomial cloverdet_monomial cloverdetratio_monomial \
 	little_D block Dov_psi operator poly_monomial measurements pion_norm Dov_proj \
 	xchange_field_tslice temporalgauge spinor_fft X_psi P_M_eta \
-	xchange_jacobi jacobi init_jacobi_field \
+	xchange_jacobi jacobi init_jacobi_field cloverndpoly_monomial \
 	fatal_error invert_clover_eo gettime @SPI_FILES@ init_omp_accumulators
 
 ## the GPU modules (all .cu files in $GPUDIR)
diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 08b08d465..fe3ff7201 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -47,7 +47,9 @@
 #include "boundary.h"
 #include "phmc.h"
 #include "init_chi_spinor_field.h"
+#include "clovertm_operators.h"
 #include "clover_leaf.h"
+
 #include "cloverndpoly_monomial.h"
 
 /********************************************
@@ -71,7 +73,7 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
   // we compute the clover term (1 + T_ee(oo)) for all sites x
   sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
   // we invert it for the even sites only
-  sw_invert_nd(EE, mnl->mu);
+  sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
 
 
   /* This factor 2 a missing factor 2 in trace_lambda */
@@ -137,8 +139,8 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   ndpoly_set_global_parameter(mnl, 0);
   g_mu3 = 0.;
   init_sw_fields();
-  sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
-  sw_invert_nd(EE, mnl->mu);
+  sw_term((const su3**)hf->gaugefield, mnl->kappa, mnl->c_sw); 
+  sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
 
   mnl->energy0 = 0.;
   random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, mnl->rngrepro);
@@ -200,8 +202,8 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   ndpoly_set_global_parameter(mnl, 0);
   g_mu3 = 0.;
   init_sw_fields();
-  sw_term(hf->gaugefield, mnl->kappa, mnl->c_sw); 
-  sw_invert_nd(EE, mnl->mu);
+  sw_term((const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
+  sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
 
   mnl->energy1 = 0.;
   Ener[0] = 0;
diff --git a/doc/input.tex b/doc/input.tex
index 9476ec3cb..10c2e52fe 100644
--- a/doc/input.tex
+++ b/doc/input.tex
@@ -371,10 +371,15 @@ \subsection{Input parameter for main program}
     -\re\tr(U^{1\times2}_{x,\mu,\nu})\}\right)\,  ,
   \]
 \item {\ttfamily NDPOLY}: polynomial representation of the (possibly
-  non-degenerate) doublet\\
+  non-degenerate) twisted mass doublet\\
   \[
   [\det(Q_{nd}(\bar\epsilon, \bar\mu)^2)]^{1/2}
   \]
+\item {\ttfamily NDCLOVER}: polynomial representation of the (possibly
+  non-degenerate) clover twisted mass doublet\\
+  \[
+  [\det(Q_{nd}(\bar\epsilon, \bar\mu)^2), c_\mathrm{sw}]^{1/2}
+  \]
 \item {\ttfamily POLY}: polynomial approximation ($P_n(x) \approx \frac{1}{x}$) of the mass degenerate determinant\\
   \[
   \left[\det(P_{n}(Q^2(\kappa) + \mu^2))\right]^{-1}
diff --git a/monomial.c b/monomial.c
index eadb1a482..67c2f25ab 100644
--- a/monomial.c
+++ b/monomial.c
@@ -227,6 +227,14 @@ int init_monomials(const int V, const int even_odd_flag) {
 	no++;
 	retval = init_ndpoly_monomial(i);
       }
+      else if(monomial_list[i].type == NDCLOVER) {
+	monomial_list[i].hbfunction = &cloverndpoly_heatbath;
+	monomial_list[i].accfunction = &cloverndpoly_acc;
+	monomial_list[i].derivativefunction = &cloverndpoly_derivative;
+	monomial_list[i].pf2 = __pf+no*V;
+	no++;
+	retval = init_ndpoly_monomial(i);
+      }
       else if(monomial_list[i].type == NDDETRATIO) {
 	monomial_list[i].hbfunction = &dummy_heatbath;
 	monomial_list[i].accfunction = &nddetratio_acc;
diff --git a/monomial.h b/monomial.h
index ab1a12af7..acf6ee520 100644
--- a/monomial.h
+++ b/monomial.h
@@ -37,6 +37,7 @@
 #define CLOVERTRLOG 8
 #define CLOVERDET 9
 #define CLOVERDETRATIO 10
+#define NDCLOVER 11
 
 #define max_no_monomials 20
 
@@ -119,6 +120,7 @@ typedef struct {
 #include "clover_trlog_monomial.h"
 #include "cloverdet_monomial.h"
 #include "cloverdetratio_monomial.h"
+#include "cloverndpoly_monomial.h"
 
 /* list of all monomials */
 extern monomial monomial_list[max_no_monomials];
diff --git a/read_input.l b/read_input.l
index e12ce7dcc..020fcc3cb 100644
--- a/read_input.l
+++ b/read_input.l
@@ -281,6 +281,7 @@ inline void rmQuotes(char *str){
 %x SFGAUGEMONOMIAL
 %x NDPOLYMONOMIAL
 %x POLYMONOMIAL
+%x CLPOLYMONOMIAL
 %x MNAME
 %x MCSTR
 %x MSOLVER
@@ -820,6 +821,11 @@ inline void rmQuotes(char *str){
     strcpy((*mnl).name, "NDPOLY");
     g_running_phmc = 1;
   }
+  else if(strcmp(yytext, "NDCLOVER")==0) {
+    mnl->type = NDCLOVER;
+    strcpy((*mnl).name, "NDCLOVER");
+    g_running_phmc = 1;
+  }
   else if(strcmp(yytext, "POLY")==0) {
     mnl->type = POLY;
     strcpy((*mnl).name, "POLY");
@@ -854,6 +860,7 @@ inline void rmQuotes(char *str){
   if(mnl->type == GAUGE) BEGIN(GAUGEMONOMIAL);
   else if(mnl->type == SFGAUGE) BEGIN(SFGAUGEMONOMIAL);
   else if(mnl->type == NDPOLY) BEGIN(NDPOLYMONOMIAL);
+  else if(mnl->type == NDCLOVER) BEGIN(CLPOLYMONOMIAL);
   else if(mnl->type == POLY || mnl->type == POLYDETRATIO)  {
           fprintf(stderr,"starting to parse poly(detratio) monomial\n");
           BEGIN(POLYMONOMIAL); 
@@ -865,7 +872,7 @@ inline void rmQuotes(char *str){
 
 
 
-<DETMONOMIAL,GAUGEMONOMIAL,SFGAUGEMONOMIAL,NDPOLYMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL>{
+<DETMONOMIAL,GAUGEMONOMIAL,SFGAUGEMONOMIAL,NDPOLYMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLPOLYMONOMIAL>{
   {SPC}*Timescale{EQL}{DIGIT}+ {
     if(mnl->type == NDDETRATIO) {
       mnl->timescale = -5;
@@ -887,12 +894,15 @@ inline void rmQuotes(char *str){
   }
 }
 
-<CLDETMONOMIAL,CLDETRATMONOMIAL>{
+<CLDETMONOMIAL,CLDETRATMONOMIAL,CLPOLYMONOMIAL>{
   {SPC}*CSW{EQL}{FLT} {
     sscanf(yytext, " %[a-zA-Z] = %lf", name, &c);
     mnl->c_sw = c;
     if(myverbose) printf("  CSW set to %f line %d monomial %d\n", c, line_of_file, current_monomial);
   }
+}
+
+<CLDETMONOMIAL,CLDETRATMONOMIAL>{
   {SPC}*rho{EQL}{FLT} {
     sscanf(yytext, " %[a-zA-Z] = %lf", name, &c);
     mnl->rho = c;
@@ -909,7 +919,7 @@ inline void rmQuotes(char *str){
 }
 
 
-<DETMONOMIAL,POLYMONOMIAL,NDPOLYMONOMIAL>{
+<DETMONOMIAL,POLYMONOMIAL,NDPOLYMONOMIAL,CLPOLYMONOMIAL>{
   {SPC}*2KappaMu2{EQL}{FLT} {
     sscanf(yytext, " %[2a-zA-Z] = %lf", name, &c);
     mnl->mu2 = c;
@@ -1100,6 +1110,8 @@ inline void rmQuotes(char *str){
     phmc_exact_poly = 0;
     if(myverbose!=0) printf("  phmc_exact_poly set to false line %d monomial %d\n", line_of_file, current_monomial);
   }
+}
+<NDPOLYMONOMIAL,CLPOLYMONOMIAL>{
   {SPC}*StildeMax{EQL}{FLT} {
     sscanf(yytext, " %[a-zA-Z] = %lf",name , &c);
     stilde_max = c;
@@ -1172,7 +1184,7 @@ inline void rmQuotes(char *str){
 }
 
 
-<POLYMONOMIAL,NDPOLYMONOMIAL>{
+<POLYMONOMIAL,NDPOLYMONOMIAL,CLPOLYMONOMIAL>{
   {SPC}*LocNormConst{EQL}{FLT} {
     sscanf(yytext, " %[a-zA-Z] = %lf", name, &c);
     mnl->MDPolyLocNormConst = c;
@@ -1939,7 +1951,7 @@ inline void rmQuotes(char *str){
 }
 
 
-<INITMONOMIAL,DETMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,NDPOLYMONOMIAL,GAUGEMONOMIAL,SFGAUGEMONOMIAL,INTEGRATOR,INITINTEGRATOR,INITMEASUREMENT,PIONNORMMEAS,ONLINEMEAS,INITOPERATOR,TMOP,DBTMOP,OVERLAPOP,WILSONOP,CLOVEROP,DBCLOVEROP,POLYMONOMIAL,PLOOP,INITGPU,GPU>\n   {
+<INITMONOMIAL,DETMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,NDPOLYMONOMIAL,CLPOLYMONOMIAL,GAUGEMONOMIAL,SFGAUGEMONOMIAL,INTEGRATOR,INITINTEGRATOR,INITMEASUREMENT,PIONNORMMEAS,ONLINEMEAS,INITOPERATOR,TMOP,DBTMOP,OVERLAPOP,WILSONOP,CLOVEROP,DBCLOVEROP,POLYMONOMIAL,PLOOP,INITGPU,GPU>\n   {
   line_of_file++;
 }
 <*>\n                       {
diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index 263cc4554..b4ba3b902 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -89,7 +89,7 @@ void Qtm_ndpsi(spinor * const l_strange, spinor * const l_charm,
   mul_r(l_charm, phmc_invmaxev, g_spinor_field[DUM_MATRIX+1], VOLUME/2);
 }
 
-void Qsw__ndpsi(spinor * const l_strange, spinor * const l_charm,
+void Qsw_ndpsi(spinor * const l_strange, spinor * const l_charm,
 		spinor * const k_strange, spinor * const k_charm) {
 
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);

From 956f7b0c174ac401be7f18a7cbd89ee3ae175ad6 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 7 Oct 2012 18:57:19 +0200
Subject: [PATCH 043/110] NDCLOVER monomial running with c_sw=0, sw derivative
 and trlog missing

---
 cloverndpoly_monomial.c | 12 ++++++------
 doc/input.tex           |  2 +-
 monomial.c              |  9 ++++++++-
 tm_operators_nd.c       | 12 ++++++------
 update_tm.c             |  1 +
 5 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index fe3ff7201..6d778477f 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -93,8 +93,8 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
   
   for(k = 1; k < (mnl->MDPolyDegree-1); k++) {
     Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[k], g_chi_dn_spinor_field[k], 
-			     g_chi_up_spinor_field[k-1], g_chi_dn_spinor_field[k-1], 
-			     mnl->MDPolyRoots[k-1]);
+			   g_chi_up_spinor_field[k-1], g_chi_dn_spinor_field[k-1], 
+			   mnl->MDPolyRoots[k-1]);
   }
   
   /* Here comes the remaining fields  chi_k ; k=n,...,2n-1  */
@@ -121,7 +121,7 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     
     /* Get the even parts of the  (2N-j)-th  chi_spinors */
     H_eo_sw_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
-	    g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], EO);
+		  g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], EO);
     
     /* \delta M_oe sandwitched by  chi[j-1]_o^\dagger  and  chi[2N-j]_e */
     deriv_Sb(OE, g_chi_up_spinor_field[j-1], mnl->w_fields[0], hf, mnl->forcefactor);
@@ -157,15 +157,15 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
 
   Qsw_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
-		  g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0]);
+	    g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0]);
   
   for(j = 1; j < (mnl->MDPolyDegree); j++){
     assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
     assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);
     
     Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
-			 g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], 
-			 mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
+			     g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], 
+			     mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
   }
   Ptilde_ndpsi(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], mnl->PtildeCoefs, 
 	       mnl->PtildeDegree, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], &Qsw_pm_ndpsi);
diff --git a/doc/input.tex b/doc/input.tex
index 10c2e52fe..db891e19d 100644
--- a/doc/input.tex
+++ b/doc/input.tex
@@ -371,7 +371,7 @@ \subsection{Input parameter for main program}
     -\re\tr(U^{1\times2}_{x,\mu,\nu})\}\right)\,  ,
   \]
 \item {\ttfamily NDPOLY}: polynomial representation of the (possibly
-  non-degenerate) twisted mass doublet\\
+  non-degenerate) Wilson twisted mass doublet\\
   \[
   [\det(Q_{nd}(\bar\epsilon, \bar\mu)^2)]^{1/2}
   \]
diff --git a/monomial.c b/monomial.c
index 67c2f25ab..70bc0f527 100644
--- a/monomial.c
+++ b/monomial.c
@@ -136,7 +136,8 @@ int init_monomials(const int V, const int even_odd_flag) {
   for(int i = 0; i < no_monomials; i++) {
     if((monomial_list[i].type != GAUGE) && (monomial_list[i].type != SFGAUGE)) no++;
     /* non-degenerate monomials need two pseudo fermion fields */
-    if((monomial_list[i].type == NDPOLY) || (monomial_list[i].type == NDDETRATIO)) no++;
+    if((monomial_list[i].type == NDPOLY) || (monomial_list[i].type == NDDETRATIO) || 
+       (monomial_list[i].type == NDCLOVER)) no++;
   }
   if(no_monomials > 0) {
     if((void*)(_pf = (spinor*)calloc((no+4)*V+1, sizeof(spinor))) == NULL) {
@@ -223,15 +224,21 @@ int init_monomials(const int V, const int even_odd_flag) {
 	monomial_list[i].hbfunction = &ndpoly_heatbath;
 	monomial_list[i].accfunction = &ndpoly_acc;
 	monomial_list[i].derivativefunction = &ndpoly_derivative;
+	monomial_list[i].even_odd_flag = 1;
 	monomial_list[i].pf2 = __pf+no*V;
 	no++;
 	retval = init_ndpoly_monomial(i);
       }
       else if(monomial_list[i].type == NDCLOVER) {
+	init_swpm(VOLUME);
 	monomial_list[i].hbfunction = &cloverndpoly_heatbath;
 	monomial_list[i].accfunction = &cloverndpoly_acc;
 	monomial_list[i].derivativefunction = &cloverndpoly_derivative;
 	monomial_list[i].pf2 = __pf+no*V;
+	monomial_list[i].even_odd_flag = 1;
+	//monomial_list[i].Qsq = &Qsw_pm_ndpsi;
+	//monomial_list[i].Qp = &Qsw_ndpsi;
+	//monomial_list[i].Qm = &Qsw_dagger_ndpsi;
 	no++;
 	retval = init_ndpoly_monomial(i);
       }
diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index b4ba3b902..73750395f 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -96,7 +96,7 @@ void Qsw_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
   assign_mul_one_sw_pm_imu_eps(EE, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3], 
-			       g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], -g_mubar, g_epsbar);
+			       g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], g_mubar, g_epsbar);
   clover_inv_nd(EE, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3]);
 
   Hopping_Matrix(OE, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+2]);
@@ -105,7 +105,7 @@ void Qsw_ndpsi(spinor * const l_strange, spinor * const l_charm,
   clover_gamma5_nd(OO, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3], 
   		   k_charm, k_strange,
   		   g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1],
-  		   -g_mubar, -g_epsbar);
+  		   g_mubar, -g_epsbar);
   mul_r(l_charm, phmc_invmaxev, g_spinor_field[DUM_MATRIX+2], VOLUME/2);
   mul_r(l_strange, phmc_invmaxev, g_spinor_field[DUM_MATRIX+3], VOLUME/2);
   return;
@@ -399,7 +399,7 @@ void Qsw_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_charm);
   Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_strange);
 
-  assign_mul_one_sw_pm_imu_eps(EE, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3], 
+  assign_mul_one_sw_pm_imu_eps(EE, g_spinor_field[DUM_MATRIX+3], g_spinor_field[DUM_MATRIX+2], 
 			       g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], -g_mubar, g_epsbar);
   clover_inv_nd(EE, g_spinor_field[DUM_MATRIX+2], g_spinor_field[DUM_MATRIX+3]);
 
@@ -410,7 +410,7 @@ void Qsw_tau1_sub_const_ndpsi(spinor * const l_strange, spinor * const l_charm,
   clover_gamma5_nd(OO, g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], 
   		   k_charm, k_strange,
   		   l_strange, l_charm,
-  		   g_mubar, -g_epsbar);
+  		   -g_mubar, -g_epsbar);
 
   /* At the end, the normalisation by the max. eigenvalue  */
   mul_r(l_strange, phmc_invmaxev, g_spinor_field[DUM_MATRIX], VOLUME/2);
@@ -541,9 +541,9 @@ void H_eo_sw_ndpsi(spinor * const l_strange, spinor * const l_charm,
   Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX], k_strange);
   Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX+1], k_charm);
   
-  assign_mul_one_sw_pm_imu_eps(EE, l_strange, l_charm, 
+  assign_mul_one_sw_pm_imu_eps(EE, l_charm, l_strange,
 			       g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], 
-			       -g_mubar, g_epsbar);
+			       g_mubar, g_epsbar);
 
   clover_inv_nd(EE, l_strange, l_charm);
 
diff --git a/update_tm.c b/update_tm.c
index 804a44348..09bccfa85 100644
--- a/update_tm.c
+++ b/update_tm.c
@@ -363,6 +363,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy,
         if(monomial_list[ Integrator.mnls_per_ts[i][j] ].type != GAUGE
 	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != SFGAUGE 
 	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != NDPOLY
+	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != NDCLOVER
 	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != CLOVERTRLOG ) {
           fprintf(datafile,"%d %d ",  monomial_list[ Integrator.mnls_per_ts[i][j] ].iter0, 
                   monomial_list[ Integrator.mnls_per_ts[i][j] ].iter1);

From 9306db130b85aff7deb1d40451df3a94521b9158 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 7 Oct 2012 19:13:30 +0200
Subject: [PATCH 044/110] included gamma5 in sw_spinor

---
 clover_leaf.c             | 11 +++++------
 cloverdet_monomial.c      |  2 --
 cloverdetratio_monomial.c |  6 ------
 su3.h                     | 12 ++++++++++++
 4 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/clover_leaf.c b/clover_leaf.c
index 254156fd6..c726fd5d4 100644
--- a/clover_leaf.c
+++ b/clover_leaf.c
@@ -869,7 +869,6 @@ void sw_deriv(const int ieo, const double mu) {
 // with insertion matrix at site x
 // see equation (22) of hep-lat/9603008                  
 // result is again stored in swm and swp                 
-// additional gamma_5 needed for one of the input vectors
 
 void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll) {
 #ifdef OMP
@@ -905,11 +904,11 @@ void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll)
     _vector_tensor_vector(v1,(*r).s0,(*s).s1);
     _vector_tensor_vector(v2,(*r).s1,(*s).s1);
     _vector_tensor_vector(v3,(*r).s1,(*s).s0);
-    
-    _vector_tensor_vector(u0,(*r).s2,(*s).s2);
-    _vector_tensor_vector(u1,(*r).s2,(*s).s3);
-    _vector_tensor_vector(u2,(*r).s3,(*s).s3);
-    _vector_tensor_vector(u3,(*r).s3,(*s).s2);
+    // mvector takes g5 into account
+    _mvector_tensor_vector(u0,(*r).s2,(*s).s2);
+    _mvector_tensor_vector(u1,(*r).s2,(*s).s3);
+    _mvector_tensor_vector(u2,(*r).s3,(*s).s3);
+    _mvector_tensor_vector(u3,(*r).s3,(*s).s2);
     
     /* compute the insertion matrix */
     _su3_plus_su3(lswp[0],u0,v0);
diff --git a/cloverdet_monomial.c b/cloverdet_monomial.c
index 39ce501cd..d1a8a389c 100644
--- a/cloverdet_monomial.c
+++ b/cloverdet_monomial.c
@@ -108,11 +108,9 @@ void cloverdet_derivative(const int id, hamiltonian_field_t * const hf) {
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-  gamma5(mnl->w_fields[2], mnl->w_fields[2], VOLUME/2);
   sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3]);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  gamma5(mnl->w_fields[0], mnl->w_fields[0], VOLUME/2);
   sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1]);
   
   // compute the contribution for the det-part
diff --git a/cloverdetratio_monomial.c b/cloverdetratio_monomial.c
index 1b08a7edf..2f5cda93f 100644
--- a/cloverdetratio_monomial.c
+++ b/cloverdetratio_monomial.c
@@ -110,11 +110,9 @@ void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e  
-  gamma5(mnl->w_fields[2], mnl->w_fields[2], VOLUME/2);
   sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3]);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  gamma5(mnl->w_fields[0], mnl->w_fields[0], VOLUME/2);
   sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1]);
 
   g_mu3 = mnl->rho2; // rho2
@@ -138,11 +136,9 @@ void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-  gamma5(mnl->w_fields[2], mnl->w_fields[2], VOLUME/2);
   sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3]);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  gamma5(mnl->w_fields[0], mnl->w_fields[0], VOLUME/2);
   sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1]);
 
   sw_all(hf, mnl->kappa*mnl->forcefactor, mnl->c_sw);
@@ -221,11 +217,9 @@ void cloverdetratio_derivative(const int no, hamiltonian_field_t * const hf) {
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e  
-  gamma5(mnl->w_fields[2], mnl->w_fields[2], VOLUME/2);
   sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3]);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  gamma5(mnl->w_fields[0], mnl->w_fields[0], VOLUME/2);
   sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1]);
 
   sw_all(hf, mnl->kappa*mnl->forcefactor, mnl->c_sw);
diff --git a/su3.h b/su3.h
index 38261d89e..686a36d16 100644
--- a/su3.h
+++ b/su3.h
@@ -653,6 +653,18 @@ _sse_store_up(r);
   (t).c21 = (u).c2 * conj((v).c1);	\
   (t).c22 = (u).c2 * conj((v).c2);
 
+#define _mvector_tensor_vector(t,u,v)	\
+  (t).c00 = -(u).c0 * conj((v).c0);	\
+  (t).c01 = -(u).c0 * conj((v).c1);	\
+  (t).c02 = -(u).c0 * conj((v).c2);	\
+  (t).c10 = -(u).c1 * conj((v).c0);	\
+  (t).c11 = -(u).c1 * conj((v).c1);	\
+  (t).c12 = -(u).c1 * conj((v).c2);	\
+  (t).c20 = -(u).c2 * conj((v).c0);	\
+  (t).c21 = -(u).c2 * conj((v).c1);	\
+  (t).c22 = -(u).c2 * conj((v).c2);
+
+
 #define _vector_tensor_vector_add(t, u, v, w, z) \
   (t).c00 = (u).c0 * conj((v).c0) + (w).c0 * conj((z).c0) ;	\
   (t).c01 = (u).c0 * conj((v).c1) + (w).c0 * conj((z).c1);	\

From 11f672e536cf2bbc761a35cca8fbb8ae038a5aa9 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 7 Oct 2012 19:29:51 +0200
Subject: [PATCH 045/110] sw_spinor added to NDCLOVER, not tested yet, trlog
 derivative and trlog itself still to be coded

---
 clover_leaf.c           |  3 ++-
 cloverndpoly_monomial.c | 23 +++++++++++++++++++----
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/clover_leaf.c b/clover_leaf.c
index c726fd5d4..e6c704f76 100644
--- a/clover_leaf.c
+++ b/clover_leaf.c
@@ -869,6 +869,7 @@ void sw_deriv(const int ieo, const double mu) {
 // with insertion matrix at site x
 // see equation (22) of hep-lat/9603008                  
 // result is again stored in swm and swp                 
+// includes a gamma5 multiplication for kk
 
 void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll) {
 #ifdef OMP
@@ -921,7 +922,7 @@ void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll)
     _su3_minus_su3(lswm[2],u2,v2);
     _su3_minus_su3(lswm[3],u3,v3);
     
-    /* add up the swm[0] and swp[0] */
+    /* add up to swm[0] and swp[0] */
     _su3_acc(swm[x][0], lswm[0]);
     _su3_acc(swm[x][1], lswm[1]);
     _su3_acc(swm[x][2], lswm[2]);
diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 6d778477f..a11b859e7 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -118,15 +118,30 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     /* \delta M_eo sandwitched by  chi[j-1]_e^\dagger  and  chi[2N-j]_o */
     deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[mnl->MDPolyDegree], hf, mnl->forcefactor);      /* UP */
     deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[mnl->MDPolyDegree], hf, mnl->forcefactor);    /* DN */
-    
+
     /* Get the even parts of the  (2N-j)-th  chi_spinors */
-    H_eo_sw_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
+    H_eo_sw_ndpsi(mnl->w_fields[2], mnl->w_fields[3], 
 		  g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], EO);
     
     /* \delta M_oe sandwitched by  chi[j-1]_o^\dagger  and  chi[2N-j]_e */
-    deriv_Sb(OE, g_chi_up_spinor_field[j-1], mnl->w_fields[0], hf, mnl->forcefactor);
-    deriv_Sb(OE, g_chi_dn_spinor_field[j-1], mnl->w_fields[1], hf, mnl->forcefactor);
+    deriv_Sb(OE, g_chi_up_spinor_field[j-1], mnl->w_fields[2], hf, mnl->forcefactor);
+    deriv_Sb(OE, g_chi_dn_spinor_field[j-1], mnl->w_fields[3], hf, mnl->forcefactor);
+
+    // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
+    sw_spinor(EO, g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_up_spinor_field[j-1]);
+    // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
+    sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[2]);
+
+    // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
+    sw_spinor(EO, g_chi_dn_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[j-1]);
+    // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
+    sw_spinor(OE, mnl->w_fields[1], mnl->w_fields[3]);
+
   }
+  //to be coded
+  //sw_deriv(EE, mnl->mu);
+  sw_all(hf, mnl->kappa*mnl->forcefactor, mnl->c_sw);
+
   return;
 }
 

From 1732593987ce22a2887f7866f0898a8c7cf15ad3 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Mon, 8 Oct 2012 16:27:13 +0200
Subject: [PATCH 046/110] fixed order in sw_spinor

---
 cloverndpoly_monomial.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index a11b859e7..25310e90f 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -116,8 +116,8 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 		  g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
     
     /* \delta M_eo sandwitched by  chi[j-1]_e^\dagger  and  chi[2N-j]_o */
-    deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[mnl->MDPolyDegree], hf, mnl->forcefactor);      /* UP */
-    deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[mnl->MDPolyDegree], hf, mnl->forcefactor);    /* DN */
+    deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[mnl->MDPolyDegree], hf, mnl->forcefactor);/* UP */
+    deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[mnl->MDPolyDegree], hf, mnl->forcefactor);/* DN */
 
     /* Get the even parts of the  (2N-j)-th  chi_spinors */
     H_eo_sw_ndpsi(mnl->w_fields[2], mnl->w_fields[3], 
@@ -128,14 +128,14 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     deriv_Sb(OE, g_chi_dn_spinor_field[j-1], mnl->w_fields[3], hf, mnl->forcefactor);
 
     // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-    sw_spinor(EO, g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_up_spinor_field[j-1]);
+    sw_spinor(OO, g_chi_up_spinor_field[j-1], g_chi_up_spinor_field[mnl->MDPolyDegree]);
     // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-    sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[2]);
+    sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[0]);
 
     // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-    sw_spinor(EO, g_chi_dn_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[j-1]);
+    sw_spinor(OO, g_chi_dn_spinor_field[j-1], g_chi_dn_spinor_field[mnl->MDPolyDegree]);
     // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-    sw_spinor(OE, mnl->w_fields[1], mnl->w_fields[3]);
+    sw_spinor(EE, mnl->w_fields[3], mnl->w_fields[1]);
 
   }
   //to be coded

From 1470aa661ba913b6a5c47b27d69464708559ba63 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Mon, 8 Oct 2012 16:27:48 +0200
Subject: [PATCH 047/110] for cosmetical reasons replace EE with EO and some
 more (no influence on code)

---
 cloverdet_monomial.c      |  8 ++++----
 cloverdetratio_monomial.c | 16 ++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/cloverdet_monomial.c b/cloverdet_monomial.c
index d1a8a389c..2236742da 100644
--- a/cloverdet_monomial.c
+++ b/cloverdet_monomial.c
@@ -94,12 +94,12 @@ void cloverdet_derivative(const int id, hamiltonian_field_t * const hf) {
   
   // apply Hopping Matrix M_{eo}
   // to get the even sites of X_e
-  H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EO, -mnl->mu);
   // \delta Q sandwitched by Y_o^\dagger and X_e
   deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); 
   
   // to get the even sites of Y_e
-  H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EO, mnl->mu);
   // \delta Q sandwitched by Y_e^\dagger and X_o
   // uses the gauge field in hf and changes the derivative fields in hf
   deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor);
@@ -108,10 +108,10 @@ void cloverdet_derivative(const int id, hamiltonian_field_t * const hf) {
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-  sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3]);
+  sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[3]);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1]);
+  sw_spinor(OO, mnl->w_fields[0], mnl->w_fields[1]);
   
   // compute the contribution for the det-part
   // we again compute only the insertion matrices for S_det
diff --git a/cloverdetratio_monomial.c b/cloverdetratio_monomial.c
index 2f5cda93f..8bbebd8b9 100644
--- a/cloverdetratio_monomial.c
+++ b/cloverdetratio_monomial.c
@@ -97,12 +97,12 @@ void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf
   
   /* apply Hopping Matrix M_{eo} */
   /* to get the even sites of X */
-  H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EO, -mnl->mu);
   /* \delta Q sandwitched by Y_o^\dagger and X_e */
   deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); 
   
   /* to get the even sites of Y */
-  H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EO, mnl->mu);
   /* \delta Q sandwitched by Y_e^\dagger and X_o */
   deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor); 
 
@@ -110,10 +110,10 @@ void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e  
-  sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3]);
+  sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[3]);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1]);
+  sw_spinor(OO, mnl->w_fields[0], mnl->w_fields[1]);
 
   g_mu3 = mnl->rho2; // rho2
   
@@ -123,12 +123,12 @@ void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf
   
   /* apply Hopping Matrix M_{eo} */
   /* to get the even sites of X */
-  H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EE, -mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[2], mnl->w_fields[1], EO, -mnl->mu);
   /* \delta Q sandwitched by Y_o^\dagger and X_e */
   deriv_Sb(OE, mnl->w_fields[0], mnl->w_fields[2], hf, mnl->forcefactor); 
   
   /* to get the even sites of Y */
-  H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EE, mnl->mu);
+  H_eo_sw_inv_psi(mnl->w_fields[3], mnl->w_fields[0], EO, mnl->mu);
   /* \delta Q sandwitched by Y_e^\dagger and X_o */
   deriv_Sb(EO, mnl->w_fields[3], mnl->w_fields[1], hf, mnl->forcefactor);
 
@@ -136,10 +136,10 @@ void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-  sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3]);
+  sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[3]);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1]);
+  sw_spinor(OO, mnl->w_fields[0], mnl->w_fields[1]);
 
   sw_all(hf, mnl->kappa*mnl->forcefactor, mnl->c_sw);
   

From f36bf6b562069cb8683752ccb7c0e624a67b06e9 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Mon, 8 Oct 2012 18:40:12 +0200
Subject: [PATCH 048/110] eigenvalue computation for polynomial now done by
 monomial

---
 Ptilde_nd.c               | 56 +++------------------------------------
 chebyshev_polynomial_nd.c | 13 +++------
 cloverndpoly_monomial.c   | 23 +++++-----------
 global.h                  |  1 -
 hamiltonian_field.h       |  1 +
 hmc_tm.c                  |  9 +++----
 linsolve.c                |  4 +--
 monomial.c                |  1 +
 monomial.h                |  1 +
 ndpoly_monomial.c         |  6 +++++
 phmc.c                    | 42 ++++++++++++++++++-----------
 phmc.h                    |  6 +++--
 read_input.l              |  5 ++--
 update_tm.c               |  6 +++--
 update_tm.h               |  3 ++-
 15 files changed, 66 insertions(+), 111 deletions(-)

diff --git a/Ptilde_nd.c b/Ptilde_nd.c
index ae9e690f6..e103e17e3 100644
--- a/Ptilde_nd.c
+++ b/Ptilde_nd.c
@@ -71,11 +71,7 @@ void Ptilde_cheb_coefs(double aa, double bb, double dd[], int n, double exponent
 
   inv_n=1./(double)n;
   f=calloc(n,sizeof(double));/*vector(0,n-1);*/
-  if(g_proc_id == g_stdio_proc && g_debug_level > 2){
-    printf("PHMC: PTILDE-chebyshev_polynomial\n");
-    printf("PHMC: n= %d inv_n=%e \n",n,inv_n);
-    printf("PHMC: allocation !!!\n");
-  }
+
   fflush(stdout);
   bma=0.5*(bb-aa);
   bpa=0.5*(bb+aa);
@@ -117,7 +113,6 @@ void Ptilde_ndpsi(spinor *R_s, spinor *R_c, double *dd, int n,
     *aux3c_=NULL, *aux3c=NULL;
   
   
-#if ( defined SSE || defined SSE2 )
   svs_  = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
   svs   = (spinor *)(((unsigned long int)(svs_)+ALIGN_BASE)&~ALIGN_BASE);
   ds_   = calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
@@ -142,32 +137,6 @@ void Ptilde_ndpsi(spinor *R_s, spinor *R_c, double *dd, int n,
   aux2c = (spinor *)(((unsigned long int)(aux2c_)+ALIGN_BASE)&~ALIGN_BASE);
   aux3c_= calloc(VOLUMEPLUSRAND+1, sizeof(spinor));
   aux3c = (spinor *)(((unsigned long int)(aux3c_)+ALIGN_BASE)&~ALIGN_BASE);
-#else
-  svs_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-  svs = svs_;
-  ds_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-  ds = ds_;
-  dds_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-  dds = dds_;
-  auxs_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-  auxs = auxs_;
-  aux2s_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-  aux2s = aux2s_;
-  aux3s_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-  aux3s = aux3s_;
-  svc_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-  svc = svc_;
-  dc_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-  dc = dc_;
-  ddc_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-  ddc = ddc_;
-  auxc_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-  auxc = auxc_;
-  aux2c_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-  aux2c = aux2c_;
-  aux3c_=calloc(VOLUMEPLUSRAND, sizeof(spinor));
-  aux3c = aux3c_;
-#endif
   
   fact1=4/(phmc_cheb_evmax-phmc_cheb_evmin);
   fact2=-2*(phmc_cheb_evmax+phmc_cheb_evmin)/(phmc_cheb_evmax-phmc_cheb_evmin);
@@ -256,7 +225,7 @@ double chebtilde_eval(int M, double *dd, double s){
  *
  * The externally accessible function is
  *
- *   void degree_of_Ptilde(void)
+ *   void degree_of_Ptilde
  *     Computation of (QdaggerQ)^1/4
  *     by using the chebyshev approximation for the function ()^1/4  
  *
@@ -281,7 +250,6 @@ void degree_of_Ptilde(int * _degree, double ** coefs,
 
   *coefs = calloc(phmc_max_ptilde_degree, sizeof(double)); 
 
-#if ( defined SSE || defined SSE2 || defined SSE3)
   ss_   = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
   auxs_ = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
   aux2s_= calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
@@ -296,15 +264,6 @@ void degree_of_Ptilde(int * _degree, double ** coefs,
   auxc  = (spinor *)(((unsigned long int)(auxc_)+ALIGN_BASE)&~ALIGN_BASE);
   aux2c = (spinor *)(((unsigned long int)(aux2c_)+ALIGN_BASE)&~ALIGN_BASE);
 
-#else
-  ss   =calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-  auxs =calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-  aux2s=calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-  sc   =calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-  auxc =calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-  aux2c=calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-#endif
-
   Ptilde_cheb_coefs(EVMin, EVMax, *coefs, phmc_max_ptilde_degree, -1.0); 
 
   if(g_proc_id == g_stdio_proc && g_debug_level > 0){
@@ -368,7 +327,7 @@ void degree_of_Ptilde(int * _degree, double ** coefs,
     }
     /* || (Ptilde P S P Ptilde - 1)X ||^2 / || 2X ||^2 */
     if(g_proc_id == g_stdio_proc) {
-      printf("# NDPOLY Acceptance Polynomial: relative squared accuracy in components:\n UP=%e  DN=%e \n", temp, temp2);
+      printf("# NDPOLY Acceptance Polynomial: relative squared accuracy in components:\n# UP=%e  DN=%e \n", temp, temp2);
     }
 
     temp = chebtilde_eval(degree, *coefs, EVMin);
@@ -386,20 +345,11 @@ void degree_of_Ptilde(int * _degree, double ** coefs,
   }
 
   *_degree = degree;
-#if ( defined SSE || defined SSE2 || defined SSE3)
   free(ss_);
   free(auxs_);
   free(aux2s_);
   free(sc_);
   free(auxc_);
   free(aux2c_);
-#else
-  free(ss);
-  free(auxs);
-  free(aux2s);
-  free(sc);
-  free(auxc);
-  free(aux2c);
-#endif
   return;
 }
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index f9d83601a..aa9ae5546 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -51,11 +51,6 @@ void chebyshev_coefs(double aa, double bb, double c[], int n, double exponent){
 
   inv_n=1./(double)n;
   f=calloc(n,sizeof(double));/*vector(0,n-1);*/
-  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-    printf("PHMC: chebyshev_polynomial\n");
-    printf("PHMC: n= %d inv_n=%e \n",n,inv_n);
-    printf("PHMC: allocation !!!\n");
-  }
   fflush(stdout);
   bma=0.5*(bb-aa);
   bpa=0.5*(bb+aa);
@@ -154,8 +149,8 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
   random_spinor_field(sc,VOLUME/2, 1);
 
   if((g_proc_id == g_stdio_proc) && (g_debug_level > 0)){
-    printf("NDPOLY MD Polynomial: EVmin = %e  EVmax = %e  \n", EVMin, EVMax);
-    printf("NDPOLY MD Polynomial: the degree was set to: %d\n", degree_of_p);
+    printf("# NDPOLY MD Polynomial: EVmin = %e  EVmax = %e  \n", EVMin, EVMax);
+    printf("# NDPOLY MD Polynomial: the degree was set to: %d\n", degree_of_p);
     fflush(stdout);
   }
 
@@ -177,7 +172,7 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
   if(g_proc_id == g_stdio_proc && g_debug_level > 0){
     /* this is || (P S P - 1)X ||^2 /|| 2X ||^2 */
     /* where X is a random spinor field         */
-    printf("NDPOLY MD Polynomial: relative squared accuracy in components:\n UP=%e  DN=%e \n", temp, temp2);
+    printf("# NDPOLY MD Polynomial: relative squared accuracy in components:\n# UP=%e  DN=%e \n", temp, temp2);
     /*     printf("NDPOLY: Sum remaining | c_n | = %e \n", sum); */
     fflush(stdout);
   }
@@ -188,7 +183,7 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
     temp *= cheb_eval(degree_of_p, *coefs, EVMin);
     temp = 0.5*fabs(temp - 1);
     if(g_proc_id == g_stdio_proc) {
-      printf("PHMC: Delta_IR at s=%f:    | P s_low P - 1 |/2 = %e \n", EVMin, temp);
+      printf("# PHMC: Delta_IR at s=%f:    | P s_low P - 1 |/2 = %e \n", EVMin, temp);
     }
   }
   /* RECALL THAT WE NEED AN EVEN DEGREE !!!! */
diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 25310e90f..a6c791b98 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -156,6 +156,11 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   init_sw_fields();
   sw_term((const su3**)hf->gaugefield, mnl->kappa, mnl->c_sw); 
   sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
+  
+  // we measure before trajectory!
+  if((mnl->rec_ev != 0) || (hf->traj_counter%mnl->rec_ev == 0)) {
+    phmc_compute_ev(hf->traj_counter-1, id, &Qsw_pm_ndbipsi);
+  }
 
   mnl->energy0 = 0.;
   random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, mnl->rngrepro);
@@ -164,13 +169,6 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   random_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2, mnl->rngrepro);
   mnl->energy0 += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
 
-  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-    printf("PHMC: Here comes the computation of H_old with \n \n");
-    printf("PHMC: First: random spinors and their norm  \n ");
-    printf("PHMC: OLD Ennergy UP %e \n", mnl->energy0);
-    printf("PHMC: OLD Energy  DN + UP %e \n\n", mnl->energy0);
-  }
-
   Qsw_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
 	    g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0]);
   
@@ -189,16 +187,9 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   assign(mnl->pf2, g_chi_dn_spinor_field[0], VOLUME/2);
   
   temp = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
-  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-    printf("PHMC: Then: evaluate Norm of pseudofermion heatbath BHB \n ");
-    printf("PHMC: Norm of BHB up squared %e \n", temp);
-  }
 
   temp += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
 
-  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)){
-    printf("PHMC: Norm of BHB up + BHB dn squared %e \n\n", temp);
-  }
   if(g_proc_id == 0 && g_debug_level > 3) {
     printf("called cloverndpoly_heatbath for id %d with g_running_phmc = %d\n", id, g_running_phmc);
   }
@@ -257,7 +248,7 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   temp = square_norm(g_chi_dn_spinor_field[ij], VOLUME/2, 1);
   Ener[ij] += temp;
   
-  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
+  if((g_proc_id == g_stdio_proc) && (g_debug_level > 20)) {
     printf("PHMC: Here comes the computation of H_new with \n \n");
     
     printf("PHMC: At j=%d  PHMC Final Energy %e \n", ij, mnl->energy1+Ener[ij]);
@@ -319,7 +310,7 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     }
   }
   mnl->energy1 += Ener[ij];  /* this is quite sticky */
-  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
+  if((g_proc_id == g_stdio_proc) && (g_debug_level > 20)) {
     printf("PHMC: At j = %d  P=%e +HMC Final Energy %e \n\n", ij, Ener[ij], mnl->energy1);
   }
   
diff --git a/global.h b/global.h
index 85000027e..1b94ca869 100644
--- a/global.h
+++ b/global.h
@@ -210,7 +210,6 @@ EXTERN int g_sf_inc_wrap_sq;
 /*************************/
 
 /* Parameters for non-degenrate case */
-EXTERN int g_rec_ev;
 EXTERN double g_mubar, g_epsbar;
 EXTERN int g_use_clover_flag;
 
diff --git a/hamiltonian_field.h b/hamiltonian_field.h
index df897abc8..b5cecad11 100644
--- a/hamiltonian_field.h
+++ b/hamiltonian_field.h
@@ -32,6 +32,7 @@ typedef struct {
   int update_gauge_copy;
   int update_gauge_energy;
   int update_rectangle_energy;
+  int traj_counter;
 } hamiltonian_field_t;
 
 
diff --git a/hmc_tm.c b/hmc_tm.c
index 02b979f1b..7c15a9e28 100644
--- a/hmc_tm.c
+++ b/hmc_tm.c
@@ -441,6 +441,7 @@ int main(int argc,char *argv[]) {
     fclose(countfile);
   }
 
+
   /* Loop for measurements */
   for(j = 0; j < Nmeas; j++) {
     if(g_proc_id == 0) {
@@ -449,7 +450,8 @@ int main(int argc,char *argv[]) {
 
     return_check = return_check_flag && (trajectory_counter%return_check_interval == 0);
 
-    accept = update_tm(&plaquette_energy, &rectangle_energy, datafilename, return_check, Ntherm<trajectory_counter);
+    accept = update_tm(&plaquette_energy, &rectangle_energy, datafilename, 
+		       return_check, Ntherm<trajectory_counter, trajectory_counter);
     Rate += accept;
 
     /* Save gauge configuration all Nsave times */
@@ -529,11 +531,6 @@ int main(int argc,char *argv[]) {
       }
     }
 
-    if((g_rec_ev !=0) && (trajectory_counter%g_rec_ev == 0) && (g_running_phmc)) {
-      phmc_compute_ev(trajectory_counter, plaquette_energy);
-    }
-
-
     if(g_proc_id == 0) {
       verbose = 1;
     }
diff --git a/linsolve.c b/linsolve.c
index ec8314f97..b547e44ee 100644
--- a/linsolve.c
+++ b/linsolve.c
@@ -149,8 +149,8 @@ int solve_cg(spinor * const k, spinor * const l, double eps_sq, const int rel_pr
   /* 2*1320.0 because the linalg is over VOLUME/2 */
   flops = (2*(2*1320.0+2*3*4) + 2*3*4 + iteration*(2.*(2*1320.0+2*3*4) + 10*3*4))*VOLUME/2/1.0e6f;
   if(g_proc_id==0 && g_debug_level > 0) {
-    printf("CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime); 
-    printf("CG: flopcount: t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", 
+    printf("# CG(linsolve): iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime); 
+    printf("# CG(linsolve): flopcount: t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", 
 	   etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime));
   }
   g_sloppy_precision = save_sloppy;
diff --git a/monomial.c b/monomial.c
index 70bc0f527..b8ad86d95 100644
--- a/monomial.c
+++ b/monomial.c
@@ -104,6 +104,7 @@ int add_monomial(const int type) {
   monomial_list[no_monomials].c1tts = _default_g_C1tts; 
   monomial_list[no_monomials].rngrepro = _default_reproduce_randomnumber_flag;
   /* poly monomial */
+  monomial_list[no_monomials].rec_ev = _default_g_rec_ev;
   monomial_list[no_monomials].MDPolyDegree = _default_MDPolyDegree;
   monomial_list[no_monomials].MDPolyLmin = _default_MDPolyLmin;
   monomial_list[no_monomials].MDPolyLmax = _default_MDPolyLmax;
diff --git a/monomial.h b/monomial.h
index acf6ee520..03a712016 100644
--- a/monomial.h
+++ b/monomial.h
@@ -83,6 +83,7 @@ typedef struct {
   /* second one needed for ND monomials */
   spinor * pf, * pf2;
   /* parameters for the POLY Monomial*/
+  int rec_ev;
   int MDPolyDegree, MaxPtildeDegree, PtildeDegree;
   double MDPolyLmin, MDPolyLmax;
   char MDPolyRootsFile[256];
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index 1054d1f67..47cd7ebfe 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -163,6 +163,12 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   monomial * mnl = &monomial_list[id];
 
   ndpoly_set_global_parameter(mnl, phmc_exact_poly);
+
+  // we measure before trajectory!
+  if((mnl->rec_ev != 0) || (hf->traj_counter%mnl->rec_ev == 0)) {
+    phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi);
+  }
+
   mnl->energy0 = 0.;
   random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, mnl->rngrepro);
   mnl->energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
diff --git a/phmc.c b/phmc.c
index f11f08b20..103022965 100644
--- a/phmc.c
+++ b/phmc.c
@@ -37,6 +37,7 @@
 #include "tm_operators_nd.h"
 #include "phmc.h"
 #include "monomial.h"
+#include "solver/matrix_mult_typedef_bi.h"
 #include "gettime.h"
 
 //                                          --> in  monomial
@@ -76,20 +77,20 @@ void init_phmc() {
 
   phmc_invmaxev=1.0;
 
-  if(phmc_compute_evs != 0) {
+  if(phmc_compute_evs != -1) {
     g_mu = g_mu1;
     max_iter_ev = 1000;
     
     no_eigenvalues = 10;   /* Number of lowest eigenvalues to be computed */
     if(g_epsbar!=0.0)
-      phmc_cheb_evmin = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, &Qtm_pm_ndbipsi);
+      phmc_cheb_evmin = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, &Qsw_pm_ndbipsi);
     else {
       phmc_cheb_evmin = eigenvalues(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, 0, nstore, even_odd_flag);
     }
 
     no_eigenvalues = 4;   /* Number of highest eigenvalues to be computed */
     if(g_epsbar!=0.0)
-      phmc_cheb_evmax = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, &Qtm_pm_ndbipsi);
+      phmc_cheb_evmax = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, &Qsw_pm_ndbipsi);
     else
       phmc_cheb_evmax = eigenvalues(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, 0, nstore, even_odd_flag);
        
@@ -203,12 +204,15 @@ void init_phmc() {
 
 
 void phmc_compute_ev(const int trajectory_counter,
-		     const double plaquette_energy) {
+		     const int id,
+		     matrix_mult_bi Qsq) {
   double atime, etime, temp=0., temp2=0.;
   int max_iter_ev, no_eigenvalues;
-  char * phmcfilename = "phmc.data";
+  char buf[100];
+  char * phmcfilename = buf;
   FILE * countfile;
 
+  sprintf(phmcfilename,"monomial-%.2d.data", id);
   atime = gettime();
   
   max_iter_ev = 1000;
@@ -220,31 +224,37 @@ void phmc_compute_ev(const int trajectory_counter,
 
   no_eigenvalues = 1;
 
-  if(g_epsbar!=0.0)
-    temp = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, &Qtm_pm_ndbipsi);
-  else
-    temp = eigenvalues(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, 0, nstore, even_odd_flag);
+  //if(g_epsbar!=0.0)
+  temp = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, Qsq);
+  //else
+  //temp = eigenvalues(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 0, 0, nstore, even_odd_flag);
   
   no_eigenvalues = 1;
-  if(g_epsbar!=0.0)
-    temp2 = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, &Qtm_pm_ndbipsi);
-  else
-    temp2 = eigenvalues(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, 0, nstore, even_odd_flag);
+  //  if(g_epsbar!=0.0)
+  temp2 = eigenvalues_bi(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, Qsq);
+  //  else
+  //    temp2 = eigenvalues(&no_eigenvalues, max_iter_ev, eigenvalue_precision, 1, 0, nstore, even_odd_flag);
   
   if((g_proc_id == 0) && (g_debug_level > 0)) {
     printf("# PHMC: lowest eigenvalue end of trajectory %d = %e\n", 
 	   trajectory_counter, temp);
     printf("# PHMC: maximal eigenvalue end of trajectory %d = %e\n", 
 	   trajectory_counter, temp2);
+    if(temp2 > 1.) {
+      fprintf(stderr, "\nWarning: largest eigenvalue larger than upper bound!\n\n");
+    }
+    if(temp < stilde_min/stilde_max) {
+      fprintf(stderr, "\nWarning: smallest eigenvalue smaller than lower bound!\n\n");
+    }
   }
   if(g_proc_id == 0) {
     countfile = fopen(phmcfilename, "a");
-    fprintf(countfile, "%d %1.12f %1.5e %1.5e %1.5e %1.5e\n", 
-	    trajectory_counter, plaquette_energy/(6.*VOLUME*g_nproc), temp, temp2, stilde_min, stilde_max);
+    fprintf(countfile, "%.8d %1.5e %1.5e %1.5e %1.5e\n", 
+	    trajectory_counter, temp, temp2, stilde_min/stilde_max, 1.);
     fclose(countfile);
   }
   etime = gettime();
-  if((g_proc_id == 0)) {
+  if((g_proc_id == 0) && g_debug_level > 4) {
     printf("# PHMC: time/s for eigenvalue computation %e\n", etime-atime);
   }
 }
diff --git a/phmc.h b/phmc.h
index 10aa53528..f5dfa73aa 100644
--- a/phmc.h
+++ b/phmc.h
@@ -20,6 +20,8 @@
 #ifndef _PHMC_H
 #define _PHMC_H
 
+#include "solver/matrix_mult_typedef_bi.h"
+
 /* the normalisation constant appearing in the product representation of */
 /* the polynomial */
 extern double phmc_Cpol;
@@ -54,7 +56,7 @@ extern phmc_vars *phmc_var_stack;
 void pushPhmcVars();
 void popPhmcVars();
 
-void phmc_compute_ev(const int trajectory_counter,
-		     const double plaquette_energy);
+void phmc_compute_ev(const int trajectory_counter, const int id,
+		     matrix_mult_bi Qsq);
 
 #endif
diff --git a/read_input.l b/read_input.l
index 020fcc3cb..dc460847b 100644
--- a/read_input.l
+++ b/read_input.l
@@ -1148,8 +1148,8 @@ inline void rmQuotes(char *str){
   }
   {SPC}*ComputeEVFreq{EQL}{DIGIT}+ {
     sscanf(yytext, " %[a-zA-Z] = %d", name, &a);
-    g_rec_ev = a;
-    if(myverbose!=0) printf("  Frequency for computing EV's set to %d in line %d monomial %d\n", g_rec_ev, line_of_file, current_monomial);
+    mnl->rec_ev = a;
+    if(myverbose!=0) printf("  Frequency for computing EV's set to %d in line %d monomial %d\n", mnl->rec_ev, line_of_file, current_monomial);
   }
   {SPC}*ComputeOnlyEVs{EQL}yes {
     phmc_compute_evs=1;
@@ -2023,7 +2023,6 @@ int read_input(char * conf_file){
   dfl_poly_iter = 20;
 
   g_kappa = _default_g_kappa;
-  g_rec_ev = _default_g_rec_ev;
   g_mubar = _default_g_mubar;
   g_epsbar = _default_g_epsbar;
   g_mu = _default_g_mu;
diff --git a/update_tm.c b/update_tm.c
index 09bccfa85..8e3085aae 100644
--- a/update_tm.c
+++ b/update_tm.c
@@ -65,7 +65,8 @@
 extern su3 ** g_gauge_field_saved;
 
 int update_tm(double *plaquette_energy, double *rectangle_energy, 
-              char * filename, const int return_check, const int acctest) {
+              char * filename, const int return_check, const int acctest, 
+	      const int traj_counter) {
 
   su3 *v, *w;
   static int ini_g_tmp = 0;
@@ -95,6 +96,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy,
   hf.update_gauge_copy = g_update_gauge_copy;
   hf.update_gauge_energy = g_update_gauge_energy;
   hf.update_rectangle_energy = g_update_rectangle_energy;
+  hf.traj_counter = traj_counter;
   integrator_set_fields(&hf);
 
   strcpy(tmp_filename, ".conf.tmp");
@@ -355,7 +357,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy,
   if(g_proc_id==0) {
     datafile = fopen(filename, "a");
     if (!bc_flag) { /* if Periodic Boundary Conditions */
-      fprintf(datafile, "%14.12f %14.12f %e ",
+      fprintf(datafile, "%.8d %14.12f %14.12f %e ", traj_counter,
               (*plaquette_energy)/(6.*VOLUME*g_nproc), dh, expmdh);
     }
     for(i = 0; i < Integrator.no_timescales; i++) {
diff --git a/update_tm.h b/update_tm.h
index 95c5efc59..e26456c5d 100644
--- a/update_tm.h
+++ b/update_tm.h
@@ -20,6 +20,7 @@
 #define _UPDATE_TM_H
 
 int update_tm(double *plaquette_energy, double *rectangle_energy, 
-	      char * filename, const int return_check, const int acctest);
+	      char * filename, const int return_check, const int acctest, 
+	      const int traj_counter);
 
 #endif

From f584fc2de14a9b769493429d825fe627e842dee9 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Mon, 8 Oct 2012 19:03:49 +0200
Subject: [PATCH 049/110] sample input file for NDCLOVER monomial

---
 sample-input/clover_roots.dat      | 97 ++++++++++++++++++++++++++++++
 sample-input/sample-ndclover.input | 68 +++++++++++++++++++++
 2 files changed, 165 insertions(+)
 create mode 100644 sample-input/clover_roots.dat
 create mode 100644 sample-input/sample-ndclover.input

diff --git a/sample-input/clover_roots.dat b/sample-input/clover_roots.dat
new file mode 100644
index 000000000..2999e6084
--- /dev/null
+++ b/sample-input/clover_roots.dat
@@ -0,0 +1,97 @@
+Nr.           Re            Im
+0 -8.9098604765327671e-01 4.5074023389346268e-02
+1 -7.0521755425263163e-02 6.0986181422472217e-02
+2 8.5940433190103771e-01 5.0319968469774365e-02
+3 1.0043512422763268e+00 3.1636826180314933e-03
+4 -5.4886289787580045e-01 7.7329052278625937e-02
+5 4.9397115089376387e-01 7.9385556139429547e-02
+6 -9.7968537134226641e-01 2.1906208061652683e-02
+7 -3.1854163755834597e-01 8.1675780066108220e-02
+8 6.9938103374781302e-01 6.8159933702328160e-02
+9 9.6335510933106017e-01 2.7960365396897370e-02
+10 -7.4412011327798677e-01 6.4234828280787096e-02
+11 2.5742534856994170e-01 8.0679880062335896e-02
+12 -9.4306964853078168e-01 3.3861735312366539e-02
+13 -1.9555476022692905e-01 7.8209886572080278e-02
+14 7.8581782322697336e-01 5.9931944976320019e-02
+15 9.9199268902477511e-01 1.5732777929054854e-02
+16 -6.5178876086130810e-01 7.1674463081838988e-02
+17 3.7854117416401190e-01 8.1660806016135512e-02
+18 -1.0002260243442995e+00 9.4739108135040214e-03
+19 -4.3711111468617142e-01 8.0856014824308473e-02
+20 6.0154461274290694e-01 7.4743652461674512e-02
+21 9.1891318465105021e-01 3.9577188890777028e-02
+22 -8.2429947903305645e-01 5.5283143607108863e-02
+23 1.3332587028590551e-01 7.3063488817650540e-02
+24 -9.1891318465105021e-01 3.9577188890777028e-02
+25 -1.3332587028590551e-01 7.3063488817650540e-02
+26 8.2429947903305645e-01 5.5283143607108863e-02
+27 1.0002260243442995e+00 9.4739108135040214e-03
+28 -6.0154461274290694e-01 7.4743652461674512e-02
+29 4.3711111468617142e-01 8.0856014824308473e-02
+30 -9.9199268902477511e-01 1.5732777929054854e-02
+31 -3.7854117416401190e-01 8.1660806016135512e-02
+32 6.5178876086130810e-01 7.1674463081838988e-02
+33 9.4306964853078168e-01 3.3861735312366539e-02
+34 -7.8581782322697336e-01 5.9931944976320019e-02
+35 1.9555476022692905e-01 7.8209886572080278e-02
+36 -9.6335510933106017e-01 2.7960365396897370e-02
+37 -2.5742534856994170e-01 8.0679880062335896e-02
+38 7.4412011327798677e-01 6.4234828280787096e-02
+39 9.7968537134226641e-01 2.1906208061652683e-02
+40 -6.9938103374781302e-01 6.8159933702328160e-02
+41 3.1854163755834597e-01 8.1675780066108220e-02
+42 -1.0043512422763268e+00 3.1636826180314933e-03
+43 -4.9397115089376387e-01 7.9385556139429547e-02
+44 5.4886289787580045e-01 7.7329052278625937e-02
+45 8.9098604765327671e-01 4.5074023389346268e-02
+46 -8.5940433190103771e-01 5.0319968469774365e-02
+47 7.0521755425263163e-02 6.0986181422472217e-02
+48 7.0521755425263163e-02 -6.0986181422472217e-02
+49 -8.5940433190103771e-01 -5.0319968469774365e-02
+50 8.9098604765327671e-01 -4.5074023389346268e-02
+51 5.4886289787580045e-01 -7.7329052278625937e-02
+52 -4.9397115089376387e-01 -7.9385556139429547e-02
+53 -1.0043512422763268e+00 -3.1636826180314933e-03
+54 3.1854163755834597e-01 -8.1675780066108220e-02
+55 -6.9938103374781302e-01 -6.8159933702328160e-02
+56 9.7968537134226641e-01 -2.1906208061652683e-02
+57 7.4412011327798677e-01 -6.4234828280787096e-02
+58 -2.5742534856994170e-01 -8.0679880062335896e-02
+59 -9.6335510933106017e-01 -2.7960365396897370e-02
+60 1.9555476022692905e-01 -7.8209886572080278e-02
+61 -7.8581782322697336e-01 -5.9931944976320019e-02
+62 9.4306964853078168e-01 -3.3861735312366539e-02
+63 6.5178876086130810e-01 -7.1674463081838988e-02
+64 -3.7854117416401190e-01 -8.1660806016135512e-02
+65 -9.9199268902477511e-01 -1.5732777929054854e-02
+66 4.3711111468617142e-01 -8.0856014824308473e-02
+67 -6.0154461274290694e-01 -7.4743652461674512e-02
+68 1.0002260243442995e+00 -9.4739108135040214e-03
+69 8.2429947903305645e-01 -5.5283143607108863e-02
+70 -1.3332587028590551e-01 -7.3063488817650540e-02
+71 -9.1891318465105021e-01 -3.9577188890777028e-02
+72 1.3332587028590551e-01 -7.3063488817650540e-02
+73 -8.2429947903305645e-01 -5.5283143607108863e-02
+74 9.1891318465105021e-01 -3.9577188890777028e-02
+75 6.0154461274290694e-01 -7.4743652461674512e-02
+76 -4.3711111468617142e-01 -8.0856014824308473e-02
+77 -1.0002260243442995e+00 -9.4739108135040214e-03
+78 3.7854117416401190e-01 -8.1660806016135512e-02
+79 -6.5178876086130810e-01 -7.1674463081838988e-02
+80 9.9199268902477511e-01 -1.5732777929054854e-02
+81 7.8581782322697336e-01 -5.9931944976320019e-02
+82 -1.9555476022692905e-01 -7.8209886572080278e-02
+83 -9.4306964853078168e-01 -3.3861735312366539e-02
+84 2.5742534856994170e-01 -8.0679880062335896e-02
+85 -7.4412011327798677e-01 -6.4234828280787096e-02
+86 9.6335510933106017e-01 -2.7960365396897370e-02
+87 6.9938103374781302e-01 -6.8159933702328160e-02
+88 -3.1854163755834597e-01 -8.1675780066108220e-02
+89 -9.7968537134226641e-01 -2.1906208061652683e-02
+90 4.9397115089376387e-01 -7.9385556139429547e-02
+91 -5.4886289787580045e-01 -7.7329052278625937e-02
+92 1.0043512422763268e+00 -3.1636826180314933e-03
+93 8.5940433190103771e-01 -5.0319968469774365e-02
+94 -7.0521755425263163e-02 -6.0986181422472217e-02
+95 -8.9098604765327671e-01 -4.5074023389346268e-02
diff --git a/sample-input/sample-ndclover.input b/sample-input/sample-ndclover.input
new file mode 100644
index 000000000..526c769db
--- /dev/null
+++ b/sample-input/sample-ndclover.input
@@ -0,0 +1,68 @@
+# this sample corresponds to a 2+1+1 test case
+# roots and the normalisation are in Square_root_BR_roots.dat
+# and normierungLocal.dat in this directory
+# they were generated using the chebyRoot.H file, which can also
+# be found in this directory
+L=4
+T=4
+Measurements = 1
+StartCondition = hot
+2KappaMu = 0.01
+2Kappamubar = 0.1105
+2Kappaepsbar = 0.0935
+kappa = 0.170
+NSave = 500000
+ThetaT = 1
+BCGstabMaxIter = 0
+CGMaxIter = 1000
+GaugeConfigInputFile = conf.save
+UseEvenOdd = yes
+ReversibilityCheck = yes
+ReversibilityCheckIntervall = 100
+DebugLevel = 1
+
+BeginMeasurement CORRELATORS
+  Frequency = 1
+EndMeasurement
+
+BeginMonomial GAUGE
+  Type = tlsym
+  beta = 3.30
+  Timescale = 0
+EndMonomial
+
+BeginMonomial DET
+  Timescale = 1
+  2KappaMu = 0.01
+  kappa = 0.170
+  AcceptancePrecision =  1e-20
+  ForcePrecision = 1e-12
+  Name = det
+  Solver = CG
+EndMonomial
+
+BeginMonomial NDCLOVER
+  Timescale = 1
+  StildeMin = 0.01225
+  StildeMax = 3.5
+  LocNormConst = 3.3775885577830275786
+  PrecisionPtilde = 1e-05
+  DegreeOfMDPolynomial = 48
+  PrecisionHfinal = 1e-10
+  ComputeEVFreq = 1
+  2KappaEpsBar = 0.0935
+  2Kappamubar = 0.1105
+  kappa = 0.170
+  CSW = 1.0
+  RootsFile = "clover_roots.dat"
+EndMonomial
+
+BeginIntegrator 
+  Type0 = 2MN
+  Type1 = 2MN
+  IntegrationSteps0 = 2
+  IntegrationSteps1 = 12
+  Tau = 1
+  Lambda0 = 0.19
+  NumberOfTimescales = 2
+EndIntegrator

From 7b7ba5846508b2ccdbaf92e6041442b7d6e17df7 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Mon, 8 Oct 2012 21:50:50 +0200
Subject: [PATCH 050/110] removed some inconsistencies and made initialisation
 depending on monomial type

---
 cloverndpoly_monomial.c | 109 ++++------------------------------------
 ndpoly_monomial.c       |  16 +++++-
 tm_operators_nd.c       |  10 ++--
 tm_operators_nd.h       |   4 +-
 4 files changed, 32 insertions(+), 107 deletions(-)

diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index a6c791b98..e36ea0957 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -75,9 +75,6 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
   // we invert it for the even sites only
   sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
 
-
-  /* This factor 2 a missing factor 2 in trace_lambda */
-  ndpoly_set_global_parameter(mnl, 0);
   mnl->forcefactor = -phmc_Cpol*mnl->EVMaxInv;
 
   /* Recall:  The GAMMA_5 left of  delta M_eo  is done in  deriv_Sb !!! */
@@ -113,7 +110,7 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     
     /* Get the even parts of the  (j-1)th  chi_spinors */
     H_eo_sw_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
-		  g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
+		  g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
     
     /* \delta M_eo sandwitched by  chi[j-1]_e^\dagger  and  chi[2N-j]_o */
     deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[mnl->MDPolyDegree], hf, mnl->forcefactor);/* UP */
@@ -121,22 +118,21 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 
     /* Get the even parts of the  (2N-j)-th  chi_spinors */
     H_eo_sw_ndpsi(mnl->w_fields[2], mnl->w_fields[3], 
-		  g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree], EO);
+		  g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree]);
     
     /* \delta M_oe sandwitched by  chi[j-1]_o^\dagger  and  chi[2N-j]_e */
     deriv_Sb(OE, g_chi_up_spinor_field[j-1], mnl->w_fields[2], hf, mnl->forcefactor);
     deriv_Sb(OE, g_chi_dn_spinor_field[j-1], mnl->w_fields[3], hf, mnl->forcefactor);
 
     // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-    sw_spinor(OO, g_chi_up_spinor_field[j-1], g_chi_up_spinor_field[mnl->MDPolyDegree]);
-    // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
     sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[0]);
+    // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
+    sw_spinor(OO, g_chi_up_spinor_field[j-1], g_chi_up_spinor_field[mnl->MDPolyDegree]);
 
     // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-    sw_spinor(OO, g_chi_dn_spinor_field[j-1], g_chi_dn_spinor_field[mnl->MDPolyDegree]);
-    // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
     sw_spinor(EE, mnl->w_fields[3], mnl->w_fields[1]);
-
+    // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
+    sw_spinor(OO, g_chi_dn_spinor_field[j-1], g_chi_dn_spinor_field[mnl->MDPolyDegree]);
   }
   //to be coded
   //sw_deriv(EE, mnl->mu);
@@ -148,7 +144,6 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 
 void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   int j;
-  double temp;
   monomial * mnl = &monomial_list[id];
 
   ndpoly_set_global_parameter(mnl, 0);
@@ -186,12 +181,8 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   assign(mnl->pf, g_chi_up_spinor_field[0], VOLUME/2);
   assign(mnl->pf2, g_chi_dn_spinor_field[0], VOLUME/2);
   
-  temp = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
-
-  temp += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
-
   if(g_proc_id == 0 && g_debug_level > 3) {
-    printf("called cloverndpoly_heatbath for id %d with g_running_phmc = %d\n", id, g_running_phmc);
+    printf("called cloverndpoly_heatbath for id %d\n", id);
   }
   return;
 }
@@ -199,25 +190,16 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
 
 double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   int j, ij=0;
-  double temp, sgn, fact, Diff;
-  double Ener[8];
-  double factor[8];
   monomial * mnl = &monomial_list[id];
   spinor *up0, *dn0, *up1, *dn1, *dummy;
 
   ndpoly_set_global_parameter(mnl, 0);
   g_mu3 = 0.;
-  init_sw_fields();
   sw_term((const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
   sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
 
   mnl->energy1 = 0.;
-  Ener[0] = 0;
-  factor[0] = 1.0;
-  for(j = 1; j < 8; j++){
-    factor[j] = j*factor[j-1];
-    Ener[j] = 0;
-  }
+
   /* IF PHMC */
   up0 = g_chi_up_spinor_field[0];
   up1 = g_chi_up_spinor_field[1];
@@ -242,79 +224,10 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     assign(g_chi_dn_spinor_field[ij], dn0, VOLUME/2);
   }
   
-  temp = square_norm(g_chi_up_spinor_field[ij], VOLUME/2, 1);
-  Ener[ij] = temp;
+  mnl->energy1 = square_norm(g_chi_up_spinor_field[ij], VOLUME/2, 1);
+  mnl->energy1 += square_norm(g_chi_dn_spinor_field[ij], VOLUME/2, 1);
   
-  temp = square_norm(g_chi_dn_spinor_field[ij], VOLUME/2, 1);
-  Ener[ij] += temp;
-  
-  if((g_proc_id == g_stdio_proc) && (g_debug_level > 20)) {
-    printf("PHMC: Here comes the computation of H_new with \n \n");
-    
-    printf("PHMC: At j=%d  PHMC Final Energy %e \n", ij, mnl->energy1+Ener[ij]);
-    printf("PHMC: At j=%d  PHMC Only Final Energy %e \n", ij, Ener[ij]);
-  }
-  
-  /* Here comes the loop for the evaluation of A, A^2, ...  */
-  for(j = 1; j < 1; j++){ /* To omit corrections just set  j<1 */
-    
-    if(j % 2){ /*  Chi[j] = ( Qdag P  Ptilde ) Chi[j-1]  */ 
-      Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-		   mnl->PtildeCoefs, mnl->PtildeDegree, 
-		   g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], &Qsw_pm_ndpsi);
-      Ptilde_ndpsi(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
-		   mnl->MDPolyCoefs, mnl->MDPolyDegree, 
-		   g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], &Qsw_pm_ndpsi);
-      Qsw_dagger_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-		       g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
-    }
-    else { /*  Chi[j] = ( Ptilde P Q ) Chi[j-1]  */ 
-      Qsw_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-		g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1]);
-      Ptilde_ndpsi(g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], 
-		   mnl->MDPolyCoefs, mnl->MDPolyDegree, g_chi_up_spinor_field[j], 
-		   g_chi_dn_spinor_field[j], &Qsw_pm_ndpsi);
-      Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
-		   mnl->PtildeCoefs, mnl->PtildeDegree, 
-		   g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], &Qsw_pm_ndpsi);
-    }
-    
-    Ener[j] = Ener[j-1] + Ener[0];
-    sgn = -1.0;
-    for(ij = 1; ij < j; ij++){
-      fact = factor[j] / (factor[ij] * factor[j-ij]);
-      if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-	printf("PHMC: Here  j=%d  and  ij=%d   sign=%f  fact=%f \n", j ,ij, sgn, fact);
-      }
-      Ener[j] += sgn*fact*Ener[ij];
-      sgn = -sgn;
-    }
-    temp = square_norm(g_chi_up_spinor_field[j], VOLUME/2, 1);
-    temp += square_norm(g_chi_dn_spinor_field[j], VOLUME/2, 1);
-    if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-      printf("PHMC: Here  j=%d   sign=%f  temp=%e \n", j, sgn, temp);
-    }
-    
-    Ener[j] += sgn*temp;
-    
-    Diff = fabs(Ener[j] - Ener[j-1]);
-    if((g_proc_id == g_stdio_proc) && (g_debug_level > 0)) {
-      printf("PHMC: Correction aftern %d steps: %e \n", j, Diff);
-    }
-    
-    if(Diff < mnl->PrecisionHfinal) {
-      if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-	printf("PHMC: At j = %d  PHMC Only Final Energy %e \n", j, Ener[j]);
-      }
-      break;
-    }
-  }
-  mnl->energy1 += Ener[ij];  /* this is quite sticky */
-  if((g_proc_id == g_stdio_proc) && (g_debug_level > 20)) {
-    printf("PHMC: At j = %d  P=%e +HMC Final Energy %e \n\n", ij, Ener[ij], mnl->energy1);
-  }
-  
-  if(g_proc_id == 0 && g_debug_level > 3) {
+  if(g_proc_id == 0 && g_debug_level > 0) {
     printf("called cloverndpoly_acc for id %d %d dH = %1.4e\n", id, g_running_phmc, mnl->energy1 - mnl->energy0);
   }
   return(mnl->energy1 - mnl->energy0);
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index 47cd7ebfe..0f63e6fb8 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -47,6 +47,9 @@
 #include "boundary.h"
 #include "phmc.h"
 #include "init_chi_spinor_field.h"
+#include "solver/matrix_mult_typedef_nd.h"
+#include "clover_leaf.h"
+#include "clovertm_operators.h"
 #include "ndpoly_monomial.h"
 
 extern int phmc_exact_poly;
@@ -453,11 +456,20 @@ int init_ndpoly_monomial(const int id) {
   FILE * ifs;
   double *phmc_darray;
   char title[100];
+  matrix_mult_nd Qsq = &Qtm_pm_ndpsi;
+
+  if(mnl->type == NDCLOVER) {
+    Qsq = &Qsw_pm_ndpsi;
+    init_sw_fields();
+    sw_term((const su3 **)g_gauge_field, mnl->kappa, mnl->c_sw); 
+    sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
+  }
 
   phmc_invmaxev = 1.0;
   g_mubar = mnl->mubar;
   g_epsbar = mnl->epsbar;
   g_kappa = mnl->kappa;
+  g_c_sw = mnl->c_sw;
   boundary(g_kappa);
   if (g_epsbar!=0.0 || phmc_exact_poly==0){
     phmc_Cpol = sqrt(mnl->MDPolyLocNormConst);
@@ -482,7 +494,7 @@ int init_ndpoly_monomial(const int id) {
   /* Here we prepare the less precise MD polynomial first   */
   degree_of_polynomial_nd(&mnl->MDPolyDegree, &mnl->MDPolyCoefs,
 			  mnl->EVMin, mnl->EVMax,
-			  Qtm_pm_ndpsi);
+			  Qsq);
   phmc_dop_n_cheby = mnl->MDPolyDegree;
   phmc_dop_cheby_coef = mnl->MDPolyCoefs;
   if((g_proc_id == 0) && (g_debug_level > 1)) {
@@ -503,7 +515,7 @@ int init_ndpoly_monomial(const int id) {
   /* Here we prepare the precise polynomial Ptilde */
   degree_of_Ptilde(&mnl->PtildeDegree, &mnl->PtildeCoefs, 
 		   mnl->EVMin, mnl->EVMax, mnl->MDPolyDegree, 
-		   mnl->PrecisionPtilde, &Qtm_pm_ndpsi);
+		   mnl->PrecisionPtilde, Qsq);
   phmc_ptilde_cheby_coef = mnl->PtildeCoefs;
   phmc_ptilde_n_cheby = mnl->PtildeDegree;
 
diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index 73750395f..ad131d06c 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -535,16 +535,16 @@ void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm,
 }
 
 void H_eo_sw_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		   spinor * const k_strange, spinor * const k_charm, 
-		   const int ieo) {
+		   spinor * const k_strange, spinor * const k_charm) {
+
   /* recall:   strange <-> up    while    charm <-> dn   */
-  Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX], k_strange);
-  Hopping_Matrix(ieo, g_spinor_field[DUM_MATRIX+1], k_charm);
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX], k_strange);
+  Hopping_Matrix(EO, g_spinor_field[DUM_MATRIX+1], k_charm);
   
   assign_mul_one_sw_pm_imu_eps(EE, l_charm, l_strange,
 			       g_spinor_field[DUM_MATRIX], g_spinor_field[DUM_MATRIX+1], 
 			       g_mubar, g_epsbar);
-
+  // here the order doesn't matter
   clover_inv_nd(EE, l_strange, l_charm);
 
   return;
diff --git a/tm_operators_nd.h b/tm_operators_nd.h
index 38e3983be..2edde5c9c 100644
--- a/tm_operators_nd.h
+++ b/tm_operators_nd.h
@@ -55,8 +55,8 @@ void H_eo_tm_ndpsi(spinor * const l_strange, spinor * const l_charm,
              spinor * const k_strange, spinor * const k_charm, 
 	     const int ieo);
 void H_eo_sw_ndpsi(spinor * const l_strange, spinor * const l_charm, 
-		   spinor * const k_strange, spinor * const k_charm, 
-		   const int ieo);
+		   spinor * const k_strange, spinor * const k_charm);
+
 
 void M_ee_inv_ndpsi(spinor * const l_strange, spinor * const l_charm, 
 		    spinor * const k_strange, spinor * const k_charm,

From 71efad77b7d4ada805629afe51e060f76ee7da7c Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Mon, 8 Oct 2012 22:06:39 +0200
Subject: [PATCH 051/110] bug fixed in clover derivative, seems to be working
 now, trlog part still mising

---
 cloverndpoly_monomial.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index e36ea0957..5b4eab791 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -125,14 +125,14 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     deriv_Sb(OE, g_chi_dn_spinor_field[j-1], mnl->w_fields[3], hf, mnl->forcefactor);
 
     // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-    sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[0]);
+    sw_spinor(EE, mnl->w_fields[3], mnl->w_fields[0]);
     // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-    sw_spinor(OO, g_chi_up_spinor_field[j-1], g_chi_up_spinor_field[mnl->MDPolyDegree]);
+    sw_spinor(OO, g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[mnl->MDPolyDegree]);
 
     // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-    sw_spinor(EE, mnl->w_fields[3], mnl->w_fields[1]);
+    sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[1]);
     // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-    sw_spinor(OO, g_chi_dn_spinor_field[j-1], g_chi_dn_spinor_field[mnl->MDPolyDegree]);
+    sw_spinor(OO, g_chi_dn_spinor_field[j-1], g_chi_up_spinor_field[mnl->MDPolyDegree]);
   }
   //to be coded
   //sw_deriv(EE, mnl->mu);

From d3e5de2cf9b6473045a350eb5c0ec2484fabf846 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 10 Oct 2012 13:30:02 +0200
Subject: [PATCH 052/110] added a trlog monomial for ndclover

---
 Makefile.in               |  1 +
 clovernd_trlog_monomial.c | 74 +++++++++++++++++++++++++++++++++++++++
 clovernd_trlog_monomial.h | 30 ++++++++++++++++
 monomial.c                | 29 +++++++++++++++
 monomial.h                |  2 ++
 5 files changed, 136 insertions(+)
 create mode 100644 clovernd_trlog_monomial.c
 create mode 100644 clovernd_trlog_monomial.h

diff --git a/Makefile.in b/Makefile.in
index bbd4323b0..3cb70cfb9 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -64,6 +64,7 @@ MODULES = read_input gamma hybrid_update measure_gauge_action start \
 	monomial det_monomial detratio_monomial update_momenta \
 	integrator gauge_monomial ndpoly_monomial phmc \
 	clover_trlog_monomial cloverdet_monomial cloverdetratio_monomial \
+	clovernd_trlog_monomial \
 	little_D block Dov_psi operator poly_monomial measurements pion_norm Dov_proj \
 	xchange_field_tslice temporalgauge spinor_fft X_psi P_M_eta \
 	xchange_jacobi jacobi init_jacobi_field cloverndpoly_monomial \
diff --git a/clovernd_trlog_monomial.c b/clovernd_trlog_monomial.c
new file mode 100644
index 000000000..2fa893d5c
--- /dev/null
+++ b/clovernd_trlog_monomial.c
@@ -0,0 +1,74 @@
+/***********************************************************************
+ *
+ * Copyright (C) 2012 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#include "global.h"
+#include "su3.h"
+#include "su3adj.h"
+#include "su3spinor.h"
+#include "clovertm_operators.h"
+#include "clover_leaf.h"
+#include "monomial.h"
+#include "Hopping_Matrix.h"
+#include "clovernd_trlog_monomial.h"
+
+void clovernd_trlog_derivative(const int id, hamiltonian_field_t * const hf) {
+  //monomial * mnl = &monomial_list[id];
+  /* this term has no derivative */
+  /* so a dummy function         */
+  if(g_proc_id == 0 && g_debug_level > 4) {
+    printf("called clovernd_trlog_derivative for id %d, which is a dummy function\n", id);
+  }
+  return;
+}
+
+
+void clovernd_trlog_heatbath(const int id, hamiltonian_field_t * const hf) {
+  monomial * mnl = &monomial_list[id];
+  mnl->energy0 = 0.;
+
+  init_sw_fields();
+  sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
+  /*compute the contribution from the clover trlog term */
+  mnl->energy0 = -sw_trace_nd(EE, mnl->mubar, mnl->epsbar);
+  if(g_proc_id == 0 && g_debug_level > 3) {
+    printf("called clovernd_trlog_heatbath for id %d E = %e\n", id, mnl->energy0);
+  }
+  return;
+}
+
+double clovernd_trlog_acc(const int id, hamiltonian_field_t * const hf) {
+  monomial * mnl = &monomial_list[id];
+  mnl->energy1 = 0.;
+  sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
+  /*compute the contribution from the clover trlog term */
+  mnl->energy1 = -sw_trace_nd(EE, mnl->mubar, mnl->epsbar);
+  if(g_proc_id == 0 && g_debug_level > 3) {
+    printf("called clover_trlog_acc for id %d dH = %1.4e\n", 
+	   id, mnl->energy1 - mnl->energy0);
+  }
+  return(0.*(mnl->energy1 - mnl->energy0));
+}
diff --git a/clovernd_trlog_monomial.h b/clovernd_trlog_monomial.h
new file mode 100644
index 000000000..69c50b0c6
--- /dev/null
+++ b/clovernd_trlog_monomial.h
@@ -0,0 +1,30 @@
+/***********************************************************************
+ *
+ * Copyright (C) 2012 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifndef _CLOVERND_TRLOG_MONOMIAL_H
+#define _CLOVERND_TRLOG_MONOMIAL_H
+
+#include "hamiltonian_field.h"
+
+void clovernd_trlog_derivative(const int id, hamiltonian_field_t * const hf);
+void clovernd_trlog_heatbath(const int id, hamiltonian_field_t * const hf);
+double clovernd_trlog_acc(const int id, hamiltonian_field_t * const hf);
+
+#endif
diff --git a/monomial.c b/monomial.c
index b8ad86d95..f3911cdb5 100644
--- a/monomial.c
+++ b/monomial.c
@@ -47,6 +47,7 @@ monomial monomial_list[max_no_monomials];
 int no_monomials = 0;
 int no_gauge_monomials = 0;
 int clover_trlog_monomial = 0;
+int clovernd_trlog_monomial = 0;
 static spinor * _pf;
 spinor ** w_fields;
 const int no_wfields = 4;
@@ -134,6 +135,7 @@ int init_monomials(const int V, const int even_odd_flag) {
   int retval;
   spinor * __pf = NULL;
   double sw_mu=0., sw_k=0., sw_c=0.;
+  double swn_mubar=0., swn_epsbar = 0., swn_k=0., swn_c=0.;
   for(int i = 0; i < no_monomials; i++) {
     if((monomial_list[i].type != GAUGE) && (monomial_list[i].type != SFGAUGE)) no++;
     /* non-degenerate monomials need two pseudo fermion fields */
@@ -237,6 +239,11 @@ int init_monomials(const int V, const int even_odd_flag) {
 	monomial_list[i].derivativefunction = &cloverndpoly_derivative;
 	monomial_list[i].pf2 = __pf+no*V;
 	monomial_list[i].even_odd_flag = 1;
+	clovernd_trlog_monomial = 1;
+	swn_c = monomial_list[i].c_sw;
+	swn_k = monomial_list[i].kappa;
+	swn_mubar = monomial_list[i].mubar;
+	swn_epsbar = monomial_list[i].epsbar;
 	//monomial_list[i].Qsq = &Qsw_pm_ndpsi;
 	//monomial_list[i].Qp = &Qsw_ndpsi;
 	//monomial_list[i].Qm = &Qsw_dagger_ndpsi;
@@ -296,6 +303,28 @@ int init_monomials(const int V, const int even_odd_flag) {
       printf("# Initialised clover_trlog_monomial, no_monomials= %d\n", no_monomials);
     }
   }
+  if(clovernd_trlog_monomial && even_odd_flag) {
+    monomial_list[no_monomials].type = CLOVERNDTRLOG;
+    strcpy( monomial_list[no_monomials].name, "CLOVERNDTRLOG");
+    add_monomial(CLOVERNDTRLOG);
+    monomial_list[no_monomials-1].pf = NULL;
+    monomial_list[no_monomials-1].id = no_monomials-1;
+    // set the parameters according to cloverdet monomial
+    // this need alltogether a more general approach
+    monomial_list[no_monomials-1].c_sw = swn_c;
+    monomial_list[no_monomials-1].mubar = swn_mubar;
+    monomial_list[no_monomials-1].epsbar = swn_epsbar;
+    monomial_list[no_monomials-1].kappa = swn_k;
+    monomial_list[no_monomials-1].hbfunction = &clovernd_trlog_heatbath;
+    monomial_list[no_monomials-1].accfunction = &clovernd_trlog_acc;
+    monomial_list[no_monomials-1].derivativefunction = NULL;
+    monomial_list[no_monomials-1].timescale = 0;
+    monomial_list[no_monomials-1].even_odd_flag = 1;
+    if(g_proc_id == 0) {
+      printf("# Initialised clovernd_trlog_monomial, no_monomials= %d\n", no_monomials);
+    }
+  }
+
   return(0);
 }
 
diff --git a/monomial.h b/monomial.h
index 03a712016..78b9d1f66 100644
--- a/monomial.h
+++ b/monomial.h
@@ -38,6 +38,7 @@
 #define CLOVERDET 9
 #define CLOVERDETRATIO 10
 #define NDCLOVER 11
+#define CLOVERNDTRLOG 12
 
 #define max_no_monomials 20
 
@@ -119,6 +120,7 @@ typedef struct {
 #include "gauge_monomial.h"
 #include "sf_gauge_monomial.h"
 #include "clover_trlog_monomial.h"
+#include "clovernd_trlog_monomial.h"
 #include "cloverdet_monomial.h"
 #include "cloverdetratio_monomial.h"
 #include "cloverndpoly_monomial.h"

From 13a5db67b7127b1511b29c088c67d39f42770fbd Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 10 Oct 2012 17:41:10 +0200
Subject: [PATCH 053/110] added docu for nd tracelog derivative

---
 doc/eo_pre.tex | 62 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 9 deletions(-)

diff --git a/doc/eo_pre.tex b/doc/eo_pre.tex
index cf53ca63d..726a3ea07 100644
--- a/doc/eo_pre.tex
+++ b/doc/eo_pre.tex
@@ -499,22 +499,66 @@ \subsubsection{Combining Clover and Nondegenerate Twisted mass term}
 (M_{oe}^h\ (M_{ee}^h)^{-1}\ M_{eo}^h)\,,
 \]
 with
-\[
-M_{oo|ee}^h = 1+T_{oo|ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1\,.
-\]
+\begin{equation}
+  M_{oo|ee}^h = 1+T_{oo|ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1\,.
+\end{equation}
 Because $1+T_{oo|ee}$ is hermitian, we can invert $M_{ee}^h$ by
-\[
-(1+T_{ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1)^{-1} =
-\frac{1}{(1+T_{ee})^2 + \bar\mu^2 - \bar\epsilon^2}
-(1+T_{ee}-i\bar\mu\gamma_5\tau^3+\bar\epsilon\tau^1)\,.
-\]
-{\bf check!}\\
+\begin{equation}
+  \label{eq:ndSdet}
+  (1+T_{ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1)^{-1} =
+  \frac{(1+T_{ee}-i\bar\mu\gamma_5\tau^3+\bar\epsilon\tau^1)}
+  {(1+T_{ee})^2 + \bar\mu^2 - \bar\epsilon^2}\,.
+\end{equation}
 Therefore, the determinant we have to compute is
 \[
 \det(Q^h) =
 \det[\gamma_5(1+T_{ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1)]\
 \det[\hat Q^h_{oo}]
 \]
+For the variation of the first term we have to compute now
+\begin{equation}
+  \label{eq:ndtrdiracdet}
+  \tr_\mathrm{dirac, flavour}[\ i\sigma_{\mu\nu}(1+T_{ee}(x) +
+  i\bar\mu\gamma_5\tau^3 - \bar\epsilon\tau^1)^{-1}\ ]\, ,
+\end{equation}
+which is equal to
+\begin{equation}
+ \tr_\mathrm{dirac,flavour}\left[\ i\sigma_{\mu\nu}
+   \frac{(1+T_{ee}-i\bar\mu\gamma_5\tau^3+\bar\epsilon\tau^1)}
+  {(1+T_{ee})^2 + \bar\mu^2 - \bar\epsilon^2}\ \right]\,.
+\end{equation}
+%{\bf is it correct that we take the flvour trace as well??}
+The trace in flavour simplifies the computation to
+\begin{equation}
+ \tr_\mathrm{dirac}\left[\ i\sigma_{\mu\nu}
+   \frac{2(1+T_{ee})}
+  {(1+T_{ee})^2 + \bar\mu^2 - \bar\epsilon^2}\ \right]\,.
+\end{equation}
+This can be treated analogously to the degenerate case described
+above.
+%This is unfortunately a rather large matrix in Dirac and flavour
+%space. However, $[(1+T_{ee})^2 + \bar\mu^2 - \bar\epsilon^2]^{-1}$
+%are two $6\times6$ block matrices, with off diagonal $6\times6$ blocks
+%being identically zero. Therefore, the matrix $T_\mathrm{nddet}$ in
+%2-flavour Dirac space has the form (suppressing colour)
+%\[
+%T_\mathrm{nddet}=
+%\begin{pmatrix}
+%  u_0 & u_1 & 0 & 0 & a_0 & a_1 & 0 & 0\\
+%  u_3 & u_2 & 0 & 0 & a_3 & a_2 & 0 & 0\\
+%  0 & 0 & l_0 & l_1 & 0 & 0 & b_0 & b_1\\
+%  0 & 0 & l_3 & l_2 & 0 & 0 & b_3 & b_2\\
+%  a_0 & a_1 & 0 & 0 & u'_0 & u'_1 & 0 & 0\\
+%  a_3 & a_2 & 0 & 0 & u'_3 & u'_2 & 0 & 0\\
+%  0 & 0 & b_0 & b_1 & 0 & 0 & l'_0 & l'_1\\
+%  0 & 0 & b_3 & b_2 & 0 & 0 & l'_3 & l'_2\\
+%\end{pmatrix}\,.
+%\]
+%Actually, $u_i$ and $u'_i$ and $l_i$ and $l'_i$ differ by $\pm\mu$, so
+%this $\mu$-dependence should cancel in the sum for derivative (the
+%$\mu$-dependence in the denominator of eq.~(\ref{eq:ndSdet}) of course
+%remains). The off-diagonal $4\times4$ blocks are of course simply
+%$\epsilon [(1+T_{ee})^2 + \bar\mu^2 - \bar\epsilon^2]^{-1}$.
 
 \subsection{Inversion}
 

From 8088c044ec0b4d744b4f523a2fa2ebcd36125647 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 10 Oct 2012 21:10:49 +0200
Subject: [PATCH 054/110] implemented sw_deriv_nd and unified some
 normalisation factors. NDCLOVER seems to work now, but not yet finally tested

---
 clover_leaf.c             | 164 +++++++++++++++++++++++++++++++-------
 clover_leaf.h             |   3 +-
 clover_trlog_monomial.c   |   2 +-
 cloverdet_monomial.c      |   6 +-
 cloverdetratio_monomial.c |  16 ++--
 clovernd_trlog_monomial.c |   6 +-
 cloverndpoly_monomial.c   |  25 ++++--
 7 files changed, 172 insertions(+), 50 deletions(-)

diff --git a/clover_leaf.c b/clover_leaf.c
index e6c704f76..677dc71fb 100644
--- a/clover_leaf.c
+++ b/clover_leaf.c
@@ -588,6 +588,26 @@ void mult_6x6(_Complex double a[6][6], _Complex double b[6][6], _Complex double
   return;
 }
 
+void add_6x6(_Complex double a[6][6], _Complex double b[6][6], _Complex double d[6][6]) {
+
+  for(int i = 0; i < 6; i++) {
+    for(int j = 0; j < 6; j++) {
+      a[i][j] = b[i][j] + d[i][j];
+    }
+  }
+  return;
+}
+
+void sub_6x6(_Complex double a[6][6], _Complex double b[6][6], _Complex double d[6][6]) {
+
+  for(int i = 0; i < 6; i++) {
+    for(int j = 0; j < 6; j++) {
+      a[i][j] = b[i][j] - d[i][j];
+    }
+  }
+  return;
+}
+
 void copy_6x6(_Complex double a[6][6], const _Complex double b[6][6]) {
   for(int i = 0; i < 6; i++) {
     for(int j = 0; j < 6; j++) {
@@ -813,15 +833,15 @@ void sw_deriv(const int ieo, const double mu) {
 #endif
     x = g_eo2lexic[icx];
     /* compute the insertion matrix */
-    _su3_plus_su3(lswp[0],sw_inv[icy][0][1],sw_inv[icy][0][0]);
-    _su3_plus_su3(lswp[1],sw_inv[icy][1][1],sw_inv[icy][1][0]);
-    _su3_plus_su3(lswp[2],sw_inv[icy][2][1],sw_inv[icy][2][0]);
-    _su3_plus_su3(lswp[3],sw_inv[icy][3][1],sw_inv[icy][3][0]);
-
-    _su3_minus_su3(lswm[0],sw_inv[icy][0][1],sw_inv[icy][0][0]);
-    _su3_minus_su3(lswm[1],sw_inv[icy][1][1],sw_inv[icy][1][0]);
-    _su3_minus_su3(lswm[2],sw_inv[icy][2][1],sw_inv[icy][2][0]);
-    _su3_minus_su3(lswm[3],sw_inv[icy][3][1],sw_inv[icy][3][0]);
+    _su3_plus_su3(lswp[0], sw_inv[icy][0][1], sw_inv[icy][0][0]);
+    _su3_plus_su3(lswp[1], sw_inv[icy][1][1], sw_inv[icy][1][0]);
+    _su3_plus_su3(lswp[2], sw_inv[icy][2][1], sw_inv[icy][2][0]);
+    _su3_plus_su3(lswp[3], sw_inv[icy][3][1], sw_inv[icy][3][0]);
+
+    _su3_minus_su3(lswm[0], sw_inv[icy][0][1], sw_inv[icy][0][0]);
+    _su3_minus_su3(lswm[1], sw_inv[icy][1][1], sw_inv[icy][1][0]);
+    _su3_minus_su3(lswm[2], sw_inv[icy][2][1], sw_inv[icy][2][0]);
+    _su3_minus_su3(lswm[3], sw_inv[icy][3][1], sw_inv[icy][3][0]);
     
     /* add up to swm[] and swp[] */
     _su3_refac_acc(swm[x][0], fac, lswm[0]);
@@ -834,15 +854,15 @@ void sw_deriv(const int ieo, const double mu) {
     _su3_refac_acc(swp[x][3], fac, lswp[3]);
     if(fabs(mu) > 0.) {
       /* compute the insertion matrix */
-      _su3_plus_su3(lswp[0],sw_inv[icy+VOLUME/2][0][1],sw_inv[icy+VOLUME/2][0][0]);
-      _su3_plus_su3(lswp[1],sw_inv[icy+VOLUME/2][1][1],sw_inv[icy+VOLUME/2][1][0]);
-      _su3_plus_su3(lswp[2],sw_inv[icy+VOLUME/2][2][1],sw_inv[icy+VOLUME/2][2][0]);
-      _su3_plus_su3(lswp[3],sw_inv[icy+VOLUME/2][3][1],sw_inv[icy+VOLUME/2][3][0]); 
-
-      _su3_minus_su3(lswm[0],sw_inv[icy+VOLUME/2][0][1],sw_inv[icy+VOLUME/2][0][0]);
-      _su3_minus_su3(lswm[1],sw_inv[icy+VOLUME/2][1][1],sw_inv[icy+VOLUME/2][1][0]);
-      _su3_minus_su3(lswm[2],sw_inv[icy+VOLUME/2][2][1],sw_inv[icy+VOLUME/2][2][0]);
-      _su3_minus_su3(lswm[3],sw_inv[icy+VOLUME/2][3][1],sw_inv[icy+VOLUME/2][3][0]);
+      _su3_plus_su3(lswp[0], sw_inv[icy+VOLUME/2][0][1], sw_inv[icy+VOLUME/2][0][0]);
+      _su3_plus_su3(lswp[1], sw_inv[icy+VOLUME/2][1][1], sw_inv[icy+VOLUME/2][1][0]);
+      _su3_plus_su3(lswp[2], sw_inv[icy+VOLUME/2][2][1], sw_inv[icy+VOLUME/2][2][0]);
+      _su3_plus_su3(lswp[3], sw_inv[icy+VOLUME/2][3][1], sw_inv[icy+VOLUME/2][3][0]); 
+
+      _su3_minus_su3(lswm[0], sw_inv[icy+VOLUME/2][0][1], sw_inv[icy+VOLUME/2][0][0]);
+      _su3_minus_su3(lswm[1], sw_inv[icy+VOLUME/2][1][1], sw_inv[icy+VOLUME/2][1][0]);
+      _su3_minus_su3(lswm[2], sw_inv[icy+VOLUME/2][2][1], sw_inv[icy+VOLUME/2][2][0]);
+      _su3_minus_su3(lswm[3], sw_inv[icy+VOLUME/2][3][1], sw_inv[icy+VOLUME/2][3][0]);
       
       /* add up to swm[] and swp[] */
       _su3_refac_acc(swm[x][0], fac, lswm[0]);
@@ -864,6 +884,95 @@ void sw_deriv(const int ieo, const double mu) {
   return;
 }
 
+void sw_deriv_nd(const int ieo) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+  int icy;
+  int ioff;
+  int x;
+  double fac = 1.0000;
+  su3 ALIGN lswp[4], lswm[4], v;
+  _Complex double ALIGN a0[6][6], a1[6][6], b[6][6], c[6][6];
+
+  /* convention: Tr clover-leaf times insertion */
+  if(ieo == 0) {
+    ioff=0;
+  } 
+  else {
+    ioff = (VOLUME+RAND)/2;
+  }
+
+#ifndef OMP
+  icy = 0;
+#endif
+
+#ifdef OMP
+#pragma omp for
+#endif
+  for(int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+#ifdef OMP
+    icy = icx - ioff;
+#endif
+    x = g_eo2lexic[icx];
+    /* compute the insertion matrix */
+    populate_6x6_matrix(b, &sw[x][0][0], 0, 0);
+    populate_6x6_matrix(b, &sw[x][1][0], 0, 3);
+    _su3_dagger(v, sw[x][1][0]); 
+    populate_6x6_matrix(b, &v, 3, 0);
+    populate_6x6_matrix(b, &sw[x][2][0], 3, 3);
+
+    populate_6x6_matrix(c, &sw_inv[icy][0][0], 0, 0);
+    populate_6x6_matrix(c, &sw_inv[icy][1][0], 0, 3);
+    populate_6x6_matrix(c, &sw_inv[icy][2][0], 3, 3);
+    populate_6x6_matrix(c, &sw_inv[icy][3][0], 3, 0);
+
+    mult_6x6(a0, b, c);
+
+    populate_6x6_matrix(b, &sw[x][0][1], 0, 0);
+    populate_6x6_matrix(b, &sw[x][1][1], 0, 3);
+    _su3_dagger(v, sw[x][1][1]); 
+    populate_6x6_matrix(b, &v, 3, 0);
+    populate_6x6_matrix(b, &sw[x][2][1], 3, 3);
+
+    populate_6x6_matrix(c, &sw_inv[icy][0][1], 0, 0);
+    populate_6x6_matrix(c, &sw_inv[icy][1][1], 0, 3);
+    populate_6x6_matrix(c, &sw_inv[icy][2][1], 3, 3);
+    populate_6x6_matrix(c, &sw_inv[icy][3][1], 3, 0);
+
+    mult_6x6(a1, b, c);
+    add_6x6(b, a1, a0);
+    get_3x3_block_matrix(&lswp[0], b, 0, 0);
+    get_3x3_block_matrix(&lswp[1], b, 0, 3);
+    get_3x3_block_matrix(&lswp[2], b, 3, 3);
+    get_3x3_block_matrix(&lswp[3], b, 3, 0);
+
+    sub_6x6(b, a1, a0);
+    get_3x3_block_matrix(&lswm[0], b, 0, 0);
+    get_3x3_block_matrix(&lswm[1], b, 0, 3);
+    get_3x3_block_matrix(&lswm[2], b, 3, 3);
+    get_3x3_block_matrix(&lswm[3], b, 3, 0);
+    
+    /* add up to swm[] and swp[] */
+    _su3_refac_acc(swm[x][0], fac, lswm[0]);
+    _su3_refac_acc(swm[x][1], fac, lswm[1]);
+    _su3_refac_acc(swm[x][2], fac, lswm[2]);
+    _su3_refac_acc(swm[x][3], fac, lswm[3]);
+    _su3_refac_acc(swp[x][0], fac, lswp[0]);
+    _su3_refac_acc(swp[x][1], fac, lswp[1]);
+    _su3_refac_acc(swp[x][2], fac, lswp[2]);
+    _su3_refac_acc(swp[x][3], fac, lswp[3]);
+#ifndef OMP
+    ++icy;
+#endif
+  }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+  return;
+}
+
 
 // direct product of Y_e(o) and X_e(o) in colour space   
 // with insertion matrix at site x
@@ -871,7 +980,8 @@ void sw_deriv(const int ieo, const double mu) {
 // result is again stored in swm and swp                 
 // includes a gamma5 multiplication for kk
 
-void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll) {
+void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll, 
+	       const double fac) {
 #ifdef OMP
 #pragma omp parallel
   {
@@ -923,14 +1033,14 @@ void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll)
     _su3_minus_su3(lswm[3],u3,v3);
     
     /* add up to swm[0] and swp[0] */
-    _su3_acc(swm[x][0], lswm[0]);
-    _su3_acc(swm[x][1], lswm[1]);
-    _su3_acc(swm[x][2], lswm[2]);
-    _su3_acc(swm[x][3], lswm[3]);
-    _su3_acc(swp[x][0], lswp[0]);
-    _su3_acc(swp[x][1], lswp[1]);
-    _su3_acc(swp[x][2], lswp[2]);
-    _su3_acc(swp[x][3], lswp[3]);
+    _su3_refac_acc(swm[x][0], fac, lswm[0]);
+    _su3_refac_acc(swm[x][1], fac, lswm[1]);
+    _su3_refac_acc(swm[x][2], fac, lswm[2]);
+    _su3_refac_acc(swm[x][3], fac, lswm[3]);
+    _su3_refac_acc(swp[x][0], fac, lswp[0]);
+    _su3_refac_acc(swp[x][1], fac, lswp[1]);
+    _su3_refac_acc(swp[x][2], fac, lswp[2]);
+    _su3_refac_acc(swp[x][3], fac, lswp[3]);
   }
 #ifdef OMP
   } /* OpenMP closing brace */
diff --git a/clover_leaf.h b/clover_leaf.h
index 71881e4ad..3171d9cc6 100644
--- a/clover_leaf.h
+++ b/clover_leaf.h
@@ -32,7 +32,8 @@ double sw_trace_nd(const int ieo, const double mu, const double eps);
 void sw_invert(const int ieo, const double mu);
 void sw_invert_nd(const double mshift);
 void sw_deriv(const int ieo, const double mu);
-void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll);
+void sw_deriv_nd(const int ieo);
+void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll, const double fac);
 void sw_all(hamiltonian_field_t * const hf, const double kappa, const double c_sw);
 int init_swpm(const int V);
 
diff --git a/clover_trlog_monomial.c b/clover_trlog_monomial.c
index 93f7cc891..ac116334b 100644
--- a/clover_trlog_monomial.c
+++ b/clover_trlog_monomial.c
@@ -66,7 +66,7 @@ double clover_trlog_acc(const int id, hamiltonian_field_t * const hf) {
   sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
   /*compute the contribution from the clover trlog term */
   mnl->energy1 = -sw_trace(EO, mnl->mu);
-  if(g_proc_id == 0 && g_debug_level > 3) {
+  if(g_proc_id == 0 && g_debug_level > 0) {
     printf("called clover_trlog_acc for id %d dH = %1.4e\n", 
 	   id, mnl->energy1 - mnl->energy0);
   }
diff --git a/cloverdet_monomial.c b/cloverdet_monomial.c
index 2236742da..6a4a54330 100644
--- a/cloverdet_monomial.c
+++ b/cloverdet_monomial.c
@@ -108,10 +108,10 @@ void cloverdet_derivative(const int id, hamiltonian_field_t * const hf) {
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-  sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[3]);
+  sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[3], mnl->forcefactor);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  sw_spinor(OO, mnl->w_fields[0], mnl->w_fields[1]);
+  sw_spinor(OO, mnl->w_fields[0], mnl->w_fields[1], mnl->forcefactor);
   
   // compute the contribution for the det-part
   // we again compute only the insertion matrices for S_det
@@ -123,7 +123,7 @@ void cloverdet_derivative(const int id, hamiltonian_field_t * const hf) {
   // finally, using the insertion matrices stored in swm and swp
   // we compute the terms F^{det} and F^{sw} at once
   // uses the gaugefields in hf and changes the derivative field in hf
-  sw_all(hf, mnl->kappa*mnl->forcefactor, mnl->c_sw);
+  sw_all(hf, mnl->kappa, mnl->c_sw);
 
   g_mu = g_mu1;
   g_mu3 = 0.;
diff --git a/cloverdetratio_monomial.c b/cloverdetratio_monomial.c
index 8bbebd8b9..13d53b682 100644
--- a/cloverdetratio_monomial.c
+++ b/cloverdetratio_monomial.c
@@ -110,10 +110,10 @@ void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e  
-  sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[3]);
+  sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[3], mnl->forcefactor);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  sw_spinor(OO, mnl->w_fields[0], mnl->w_fields[1]);
+  sw_spinor(OO, mnl->w_fields[0], mnl->w_fields[1], mnl->forcefactor);
 
   g_mu3 = mnl->rho2; // rho2
   
@@ -136,12 +136,12 @@ void cloverdetratio_derivative_orig(const int no, hamiltonian_field_t * const hf
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-  sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[3]);
+  sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[3], mnl->forcefactor);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  sw_spinor(OO, mnl->w_fields[0], mnl->w_fields[1]);
+  sw_spinor(OO, mnl->w_fields[0], mnl->w_fields[1], mnl->forcefactor);
 
-  sw_all(hf, mnl->kappa*mnl->forcefactor, mnl->c_sw);
+  sw_all(hf, mnl->kappa, mnl->c_sw);
   
   g_mu = g_mu1;
   g_mu3 = 0.;
@@ -217,12 +217,12 @@ void cloverdetratio_derivative(const int no, hamiltonian_field_t * const hf) {
   // computes the insertion matrices for S_eff
   // result is written to swp and swm
   // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e  
-  sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3]);
+  sw_spinor(EO, mnl->w_fields[2], mnl->w_fields[3], mnl->forcefactor);
   
   // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-  sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1]);
+  sw_spinor(OE, mnl->w_fields[0], mnl->w_fields[1], mnl->forcefactor);
 
-  sw_all(hf, mnl->kappa*mnl->forcefactor, mnl->c_sw);
+  sw_all(hf, mnl->kappa, mnl->c_sw);
   
   g_mu = g_mu1;
   g_mu3 = 0.;
diff --git a/clovernd_trlog_monomial.c b/clovernd_trlog_monomial.c
index 2fa893d5c..ae937090e 100644
--- a/clovernd_trlog_monomial.c
+++ b/clovernd_trlog_monomial.c
@@ -66,9 +66,9 @@ double clovernd_trlog_acc(const int id, hamiltonian_field_t * const hf) {
   sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
   /*compute the contribution from the clover trlog term */
   mnl->energy1 = -sw_trace_nd(EE, mnl->mubar, mnl->epsbar);
-  if(g_proc_id == 0 && g_debug_level > 3) {
-    printf("called clover_trlog_acc for id %d dH = %1.4e\n", 
+  if(g_proc_id == 0 && g_debug_level > 0) {
+    printf("called clovernd_trlog_acc for id %d dH = %1.4e\n", 
 	   id, mnl->energy1 - mnl->energy0);
   }
-  return(0.*(mnl->energy1 - mnl->energy0));
+  return(mnl->energy1 - mnl->energy0);
 }
diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 5b4eab791..f23b6edde 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -125,18 +125,29 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
     deriv_Sb(OE, g_chi_dn_spinor_field[j-1], mnl->w_fields[3], hf, mnl->forcefactor);
 
     // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-    sw_spinor(EE, mnl->w_fields[3], mnl->w_fields[0]);
+    sw_spinor(EE, mnl->w_fields[3], mnl->w_fields[0], mnl->forcefactor);
     // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-    sw_spinor(OO, g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[mnl->MDPolyDegree]);
+    sw_spinor(OO, g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[mnl->MDPolyDegree], mnl->forcefactor);
 
     // even/even sites sandwiched by gamma_5 Y_e and gamma_5 X_e
-    sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[1]);
+    sw_spinor(EE, mnl->w_fields[2], mnl->w_fields[1], mnl->forcefactor);
     // odd/odd sites sandwiched by gamma_5 Y_o and gamma_5 X_o
-    sw_spinor(OO, g_chi_dn_spinor_field[j-1], g_chi_up_spinor_field[mnl->MDPolyDegree]);
+    sw_spinor(OO, g_chi_dn_spinor_field[j-1], g_chi_up_spinor_field[mnl->MDPolyDegree], mnl->forcefactor);
   }
-  //to be coded
-  //sw_deriv(EE, mnl->mu);
-  sw_all(hf, mnl->kappa*mnl->forcefactor, mnl->c_sw);
+  // trlog part does not depend on the normalisation of the polynomial
+  sw_deriv_nd(EE);
+  sw_all(hf, mnl->kappa, mnl->c_sw);
+
+  for(int i = 0; i < VOLUME; i++) { 
+    for(int mu = 0; mu < 4; mu++) { 
+      _su3_zero(swm[i][mu]);
+      _su3_zero(swp[i][mu]);
+    }
+  }
+
+  //sw_invert(EE, mnl->mubar);
+  //sw_deriv(EE, mnl->mubar);
+  //sw_all(hf, mnl->kappa, mnl->c_sw);
 
   return;
 }

From da63adaad2c7b984f5e1863e16a1d1b18860d96d Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 10 Oct 2012 21:13:58 +0200
Subject: [PATCH 055/110] reduced verbosity again

---
 clovernd_trlog_monomial.c | 2 +-
 cloverndpoly_monomial.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clovernd_trlog_monomial.c b/clovernd_trlog_monomial.c
index ae937090e..e1f21f93d 100644
--- a/clovernd_trlog_monomial.c
+++ b/clovernd_trlog_monomial.c
@@ -66,7 +66,7 @@ double clovernd_trlog_acc(const int id, hamiltonian_field_t * const hf) {
   sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
   /*compute the contribution from the clover trlog term */
   mnl->energy1 = -sw_trace_nd(EE, mnl->mubar, mnl->epsbar);
-  if(g_proc_id == 0 && g_debug_level > 0) {
+  if(g_proc_id == 0 && g_debug_level > 3) {
     printf("called clovernd_trlog_acc for id %d dH = %1.4e\n", 
 	   id, mnl->energy1 - mnl->energy0);
   }
diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index f23b6edde..8385f8e77 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -238,7 +238,7 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   mnl->energy1 = square_norm(g_chi_up_spinor_field[ij], VOLUME/2, 1);
   mnl->energy1 += square_norm(g_chi_dn_spinor_field[ij], VOLUME/2, 1);
   
-  if(g_proc_id == 0 && g_debug_level > 0) {
+  if(g_proc_id == 0 && g_debug_level > 3) {
     printf("called cloverndpoly_acc for id %d %d dH = %1.4e\n", id, g_running_phmc, mnl->energy1 - mnl->energy0);
   }
   return(mnl->energy1 - mnl->energy0);

From 730bf0e13effbbcf8ed5268295b03a236c0edee2 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 10 Oct 2012 21:38:00 +0200
Subject: [PATCH 056/110] typo fixed that let the ndclover compute eigenvalues
 always

---
 cloverndpoly_monomial.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 8385f8e77..0ba16dbef 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -164,7 +164,7 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   sw_invert_nd(mnl->mubar*mnl->mubar - mnl->epsbar*mnl->epsbar);
   
   // we measure before trajectory!
-  if((mnl->rec_ev != 0) || (hf->traj_counter%mnl->rec_ev == 0)) {
+  if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) {
     phmc_compute_ev(hf->traj_counter-1, id, &Qsw_pm_ndbipsi);
   }
 

From 5fed5e9227a9a2e66a414fe7b5c32d96668afb94 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 12 Oct 2012 10:21:07 +0200
Subject: [PATCH 057/110] some docu added

---
 doc/eo_pre.tex | 43 +++++++++++++++++--------------------------
 1 file changed, 17 insertions(+), 26 deletions(-)

diff --git a/doc/eo_pre.tex b/doc/eo_pre.tex
index 726a3ea07..5393b24e3 100644
--- a/doc/eo_pre.tex
+++ b/doc/eo_pre.tex
@@ -502,14 +502,29 @@ \subsubsection{Combining Clover and Nondegenerate Twisted mass term}
 \begin{equation}
   M_{oo|ee}^h = 1+T_{oo|ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1\,.
 \end{equation}
-Because $1+T_{oo|ee}$ is hermitian, we can invert $M_{ee}^h$ by
+Because $1+T_{ee}$ is hermitian, we can invert $M_{ee}^h$ by
 \begin{equation}
   \label{eq:ndSdet}
   (1+T_{ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1)^{-1} =
   \frac{(1+T_{ee}-i\bar\mu\gamma_5\tau^3+\bar\epsilon\tau^1)}
   {(1+T_{ee})^2 + \bar\mu^2 - \bar\epsilon^2}\,.
 \end{equation}
-Therefore, the determinant we have to compute is
+In practice we compute $((1+T_{ee})^2 + \bar\mu^2 -
+\bar\epsilon^2)^{-1}$ and store the result in the first {\ttfamily
+  VOLUME/2} elements of the array {\ttfamily sw\_inv}. Wherever the
+clover terms needs to be applied we then multiply with  $((1+T_{ee})^2
++ \bar\mu^2 - \bar\epsilon^2)^{-1}$ and then with the nominator in
+eq.~(\ref{eq:ndSdet}). One could save computing time here for the
+price of using more memory by storing the full inverse. Actually, it
+would be only slightly more than in the two flavour case: in addition
+we would only have to store $\bar\epsilon((1+T_{ee})^2
++ \bar\mu^2 - \bar\epsilon^2)^{-1}$. This would also allow to re-use a
+lot of the $N_f=2$ flavour implementation.
+
+The clover part $1+T_{ee}$ is identical to the one in the $N_f=2$
+flavour case and stored in the array {\ttfamily sw}. 
+
+The determinant we have to compute is
 \[
 \det(Q^h) =
 \det[\gamma_5(1+T_{ee}+i\bar\mu\gamma_5\tau^3-\bar\epsilon\tau^1)]\
@@ -527,7 +542,6 @@ \subsubsection{Combining Clover and Nondegenerate Twisted mass term}
    \frac{(1+T_{ee}-i\bar\mu\gamma_5\tau^3+\bar\epsilon\tau^1)}
   {(1+T_{ee})^2 + \bar\mu^2 - \bar\epsilon^2}\ \right]\,.
 \end{equation}
-%{\bf is it correct that we take the flvour trace as well??}
 The trace in flavour simplifies the computation to
 \begin{equation}
  \tr_\mathrm{dirac}\left[\ i\sigma_{\mu\nu}
@@ -536,29 +550,6 @@ \subsubsection{Combining Clover and Nondegenerate Twisted mass term}
 \end{equation}
 This can be treated analogously to the degenerate case described
 above.
-%This is unfortunately a rather large matrix in Dirac and flavour
-%space. However, $[(1+T_{ee})^2 + \bar\mu^2 - \bar\epsilon^2]^{-1}$
-%are two $6\times6$ block matrices, with off diagonal $6\times6$ blocks
-%being identically zero. Therefore, the matrix $T_\mathrm{nddet}$ in
-%2-flavour Dirac space has the form (suppressing colour)
-%\[
-%T_\mathrm{nddet}=
-%\begin{pmatrix}
-%  u_0 & u_1 & 0 & 0 & a_0 & a_1 & 0 & 0\\
-%  u_3 & u_2 & 0 & 0 & a_3 & a_2 & 0 & 0\\
-%  0 & 0 & l_0 & l_1 & 0 & 0 & b_0 & b_1\\
-%  0 & 0 & l_3 & l_2 & 0 & 0 & b_3 & b_2\\
-%  a_0 & a_1 & 0 & 0 & u'_0 & u'_1 & 0 & 0\\
-%  a_3 & a_2 & 0 & 0 & u'_3 & u'_2 & 0 & 0\\
-%  0 & 0 & b_0 & b_1 & 0 & 0 & l'_0 & l'_1\\
-%  0 & 0 & b_3 & b_2 & 0 & 0 & l'_3 & l'_2\\
-%\end{pmatrix}\,.
-%\]
-%Actually, $u_i$ and $u'_i$ and $l_i$ and $l'_i$ differ by $\pm\mu$, so
-%this $\mu$-dependence should cancel in the sum for derivative (the
-%$\mu$-dependence in the denominator of eq.~(\ref{eq:ndSdet}) of course
-%remains). The off-diagonal $4\times4$ blocks are of course simply
-%$\epsilon [(1+T_{ee})^2 + \bar\mu^2 - \bar\epsilon^2]^{-1}$.
 
 \subsection{Inversion}
 

From 64be3a685d3856870c9e57f602cdbafc5b4681a2 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 12 Oct 2012 18:12:30 +0200
Subject: [PATCH 058/110] typo fixed

---
 ndpoly_monomial.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index 0f63e6fb8..809aad6dd 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -168,7 +168,7 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   ndpoly_set_global_parameter(mnl, phmc_exact_poly);
 
   // we measure before trajectory!
-  if((mnl->rec_ev != 0) || (hf->traj_counter%mnl->rec_ev == 0)) {
+  if((mnl->rec_ev != 0) && (hf->traj_counter%mnl->rec_ev == 0)) {
     phmc_compute_ev(hf->traj_counter-1, id, &Qtm_pm_ndbipsi);
   }
 

From 618287d867b253deff4188f74fee4bc90fbb5d05 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 12 Oct 2012 19:02:11 +0200
Subject: [PATCH 059/110] some output sorted and ifdefs for SSE removed

---
 clovertm_operators.c    |   5 --
 init_chi_spinor_field.c |   5 --
 monomial.c              | 118 ++++++++++++++++++++++++----------------
 3 files changed, 70 insertions(+), 58 deletions(-)

diff --git a/clovertm_operators.c b/clovertm_operators.c
index b7b110b0d..38c750cf9 100644
--- a/clovertm_operators.c
+++ b/clovertm_operators.c
@@ -1044,13 +1044,8 @@ void init_sw_fields() {
       sw[i] = sw[i-1]+3;
       sw_inv[i] = sw_inv[i-1]+4;
     }
-#    if (defined SSE || defined SSE2 || defined SSE3)
     sw[0][0] = (su3*)(((unsigned long int)(_sw)+ALIGN_BASE)&~ALIGN_BASE);
     sw_inv[0][0] = (su3*)(((unsigned long int)(_sw_inv)+ALIGN_BASE)&~ALIGN_BASE);
-#    else
-    sw[0][0] = _sw;
-    sw_inv[0][0] = _sw_inv;
-#    endif
     tmp = sw[0][0];
     for(int i = 0; i < V; i++) {
       for(int j = 0; j < 3; j++) {
diff --git a/init_chi_spinor_field.c b/init_chi_spinor_field.c
index d88526e1e..9d1ff5f83 100644
--- a/init_chi_spinor_field.c
+++ b/init_chi_spinor_field.c
@@ -61,13 +61,8 @@ int init_chi_spinor_field(const int V, const int nr) {
       errno = 0;
       return(2);
     }
-#if ( defined SSE || defined SSE2 || defined SSE3)
     g_chi_up_spinor_field[0] = (spinor*)(((unsigned long int)(sp_up)+ALIGN_BASE)&~ALIGN_BASE);
     g_chi_dn_spinor_field[0] = (spinor*)(((unsigned long int)(sp_dn)+ALIGN_BASE)&~ALIGN_BASE);
-#else
-    g_chi_up_spinor_field[0] = sp_up;
-    g_chi_dn_spinor_field[0] = sp_dn;
-#endif
     
     for(i = 1; i < nr; i++){
       g_chi_up_spinor_field[i] = g_chi_up_spinor_field[i-1]+V;
diff --git a/monomial.c b/monomial.c
index f3911cdb5..2aba1245d 100644
--- a/monomial.c
+++ b/monomial.c
@@ -177,6 +177,9 @@ int init_monomials(const int V, const int even_odd_flag) {
 	monomial_list[i].hbfunction = &det_heatbath;
 	monomial_list[i].accfunction = &det_acc;
 	monomial_list[i].derivativefunction = &det_derivative;
+	if(g_proc_id == 0 && g_debug_level > 1) {
+	  printf("# Initialised monomial of type DET, no_monomials= %d\n", no_monomials);
+	}
       }
       else if(monomial_list[i].type == CLOVERDET) {
 	monomial_list[i].hbfunction = &cloverdet_heatbath;
@@ -192,6 +195,9 @@ int init_monomials(const int V, const int even_odd_flag) {
 	sw_mu = monomial_list[i].mu;
 	sw_k = monomial_list[i].kappa;
 	sw_c = monomial_list[i].c_sw;
+	if(g_proc_id == 0 && g_debug_level > 1) {
+	  printf("# Initialised monomial of type CLOVERDET, no_monomials= %d\n", no_monomials);
+	}
       }
       else if(monomial_list[i].type == CLOVERDETRATIO) {
 	monomial_list[i].hbfunction = &cloverdetratio_heatbath;
@@ -202,18 +208,29 @@ int init_monomials(const int V, const int even_odd_flag) {
 	monomial_list[i].Qp = &Qsw_plus_psi;
 	monomial_list[i].Qm = &Qsw_minus_psi;
 	init_swpm(VOLUME);
+	if(g_proc_id == 0 && g_debug_level > 1) {
+	  printf("# Initialised monomial of type CLOVERDETRATIO, no_monomials= %d\n", no_monomials);
+	}
       }
       else if(monomial_list[i].type == DETRATIO) {
 	monomial_list[i].hbfunction = &detratio_heatbath;
 	monomial_list[i].accfunction = &detratio_acc;
 	monomial_list[i].derivativefunction = &detratio_derivative;
+	if(g_proc_id == 0 && g_debug_level > 1) {
+	  printf("# Initialised monomial of type DETRATIO, no_monomials= %d\n", no_monomials);
+	}
       }
       else if(monomial_list[i].type == POLY) {
 	monomial_list[i].hbfunction = &poly_heatbath;
 	monomial_list[i].accfunction = &poly_acc;
 	monomial_list[i].derivativefunction = &poly_derivative;
 	retval=init_poly_monomial(V,i);
-	if(retval!=0) return retval;
+	if(retval != 0) {
+	  return retval;
+	}
+      	if(g_proc_id == 0 && g_debug_level > 1) {
+	  printf("# Initialised monomial of type POLY, no_monomials= %d\n", no_monomials);
+	}
       }
       else if(monomial_list[i].type == POLYDETRATIO) {
 	monomial_list[i].hbfunction = &poly_heatbath;
@@ -222,6 +239,9 @@ int init_monomials(const int V, const int even_odd_flag) {
 	monomial_list[i].MDPolyDetRatio = 1;
 	retval=init_poly_monomial(V,i);
 	if(retval!=0) return retval;
+      	if(g_proc_id == 0 && g_debug_level > 1) {
+	  printf("# Initialised monomial of type POLYDETRATIO, no_monomials= %d\n", no_monomials);
+	}
       }
       else if(monomial_list[i].type == NDPOLY) {
 	monomial_list[i].hbfunction = &ndpoly_heatbath;
@@ -231,6 +251,9 @@ int init_monomials(const int V, const int even_odd_flag) {
 	monomial_list[i].pf2 = __pf+no*V;
 	no++;
 	retval = init_ndpoly_monomial(i);
+	if(g_proc_id == 0 && g_debug_level > 1) {
+	  printf("# Initialised monomial of type NDPOLY, no_monomials= %d\n", no_monomials);
+	}
       }
       else if(monomial_list[i].type == NDCLOVER) {
 	init_swpm(VOLUME);
@@ -249,6 +272,9 @@ int init_monomials(const int V, const int even_odd_flag) {
 	//monomial_list[i].Qm = &Qsw_dagger_ndpsi;
 	no++;
 	retval = init_ndpoly_monomial(i);
+	if(g_proc_id == 0 && g_debug_level > 1) {
+	  printf("# Initialised monomial of type NDCLOVER, no_monomials= %d\n", no_monomials);
+	}
       }
       else if(monomial_list[i].type == NDDETRATIO) {
 	monomial_list[i].hbfunction = &dummy_heatbath;
@@ -257,6 +283,9 @@ int init_monomials(const int V, const int even_odd_flag) {
 	monomial_list[i].pf2 = __pf+no*V;
 	monomial_list[i].timescale = -5;
 	no++;
+	if(g_proc_id == 0 && g_debug_level > 1) {
+	  printf("# Initialised monomial of type NDDETRATIO, no_monomials= %d\n", no_monomials);
+	}
       }
     }
     else {
@@ -278,6 +307,9 @@ int init_monomials(const int V, const int even_odd_flag) {
 	g_rgi_C1 = monomial_list[i].c1;
 	monomial_list[i].c0 = 1. - 8.*monomial_list[i].c1;
 	g_rgi_C0 = monomial_list[i].c0;
+	if(g_proc_id == 0 && g_debug_level > 1) {
+	  printf("# Initialised monomial of type GAUGE, no_monomials= %d\n", no_monomials);
+	}
       }
     }
     monomial_list[i].id = i;
@@ -299,7 +331,7 @@ int init_monomials(const int V, const int even_odd_flag) {
     monomial_list[no_monomials-1].derivativefunction = NULL;
     monomial_list[no_monomials-1].timescale = 0;
     monomial_list[no_monomials-1].even_odd_flag = even_odd_flag;
-    if(g_proc_id == 0) {
+    if(g_proc_id == 0 && g_debug_level > 1) {
       printf("# Initialised clover_trlog_monomial, no_monomials= %d\n", no_monomials);
     }
   }
@@ -320,7 +352,7 @@ int init_monomials(const int V, const int even_odd_flag) {
     monomial_list[no_monomials-1].derivativefunction = NULL;
     monomial_list[no_monomials-1].timescale = 0;
     monomial_list[no_monomials-1].even_odd_flag = 1;
-    if(g_proc_id == 0) {
+    if(g_proc_id == 0 && g_debug_level > 1) {
       printf("# Initialised clovernd_trlog_monomial, no_monomials= %d\n", no_monomials);
     }
   }
@@ -336,7 +368,7 @@ void free_monomials() {
 
 
 int init_poly_monomial(const int V, const int id){
-
+  
   monomial * mnl = &monomial_list[id];
   int i,j,k;
   FILE* rootsFile=NULL;
@@ -347,32 +379,25 @@ int init_poly_monomial(const int V, const int id){
   double eps;
 
   spinor *_pf=(spinor*)NULL;
-
+  
   if((void*)(_pf = (spinor*)calloc((mnl->MDPolyDegree/2+2)*V+1, sizeof(spinor))) == NULL) {
-      printf ("malloc errno in init_poly_monomial pf fields: %d\n",errno); 
-      errno = 0;
-      return(1);
-    }
-
-    if((void*)(mnl->MDPoly_chi_spinor_fields=(spinor**)calloc(mnl->MDPolyDegree/2+2,sizeof(spinor*))) ==NULL ){
-      printf ("malloc errno in init_poly_monomial pf fields: %d\n",errno); 
-      errno = 0;
-      return(2);
-    }
-
-#if ( defined SSE || defined SSE2 || defined SSE3)
-      (mnl->MDPoly_chi_spinor_fields)[0] = (spinor*)(((unsigned long int)(_pf)+ALIGN_BASE)&~ALIGN_BASE);
-#else
-      (mnl->MDPoly_chi_spinor_fields)[0] = _pf;
-#endif
-
-
+    printf ("malloc errno in init_poly_monomial pf fields: %d\n",errno); 
+    errno = 0;
+    return(1);
+  }
+  
+  if((void*)(mnl->MDPoly_chi_spinor_fields=(spinor**)calloc(mnl->MDPolyDegree/2+2,sizeof(spinor*))) ==NULL ){
+    printf ("malloc errno in init_poly_monomial pf fields: %d\n",errno); 
+    errno = 0;
+    return(2);
+  }
+  
+  (mnl->MDPoly_chi_spinor_fields)[0] = (spinor*)(((unsigned long int)(_pf)+ALIGN_BASE)&~ALIGN_BASE);
+  
   for(i = 1; i < (mnl->MDPolyDegree/2+2); i++){
     mnl->MDPoly_chi_spinor_fields[i] = mnl->MDPoly_chi_spinor_fields[i-1]+V;
   }
-
-
-
+  
   if(strlen(monomial_list[id].MDPolyRootsFile)==0){
     sprintf(monomial_list[id].MDPolyRootsFile,
 	    "%s_deg_%d_eps_%1.16e.roots",
@@ -398,35 +423,33 @@ int init_poly_monomial(const int V, const int id){
     } 
     else {
       fprintf(stderr,"Reading local normalization from file FAILED\n Borting Ab\n");
-      #ifdef MPI
-         MPI_Finalize();
-      #endif
+#ifdef MPI
+      MPI_Finalize();
+#endif
       exit(6);
     }
   }
-
-
+  
   /* read in the roots from the given file */
-
+  
   if((void*)(mnl->MDPolyRoots=(_Complex double*)calloc(mnl->MDPolyDegree,sizeof(_Complex double))) ==NULL ){
     printf ("malloc errno in init_poly_monomial roots array: %d\n",errno); 
     errno = 0;
     return(3);
   }
-
-
+  
+  printf("reading roots...!\n");
   if((rootsFile=fopen(mnl->MDPolyRootsFile,"r")) != (FILE*)NULL) {
-    if (fgets(title, 100, rootsFile) == NULL)
-    {
+    if (fgets(title, 100, rootsFile) == NULL) {
       fprintf(stderr, "Cant read Roots file: %s Aborting...\n", mnl->MDPolyRootsFile);
-      #ifdef MPI
-         MPI_Finalize();
-      #endif
+#ifdef MPI
+      MPI_Finalize();
+#endif
       exit(6);
     }
     
     /* Here we read in the 2n roots needed for the polinomial in sqrt(s) */
-    for(j=0; j<(mnl->MDPolyDegree); j++){
+    for(j = 0; j < (mnl->MDPolyDegree); j++) {
       errcode = fscanf(rootsFile," %d %lf %lf \n", &k, (double*)&(mnl->MDPolyRoots[j]), (double*)&(mnl->MDPolyRoots[j]) + 1);
     }
     fclose(rootsFile);
@@ -438,17 +461,16 @@ int init_poly_monomial(const int V, const int id){
 #endif
     exit(6);
   }
-
-
-
-  printf("Here come the roots\n");
-
+  
+  if(g_proc_id == 0 && g_debug_level > 2) {
+    printf("# the root are:\n");
     for(j=0; j<(mnl->MDPolyDegree); j++){
-      printf("%lf %lf\n",  creal(mnl->MDPolyRoots[j]), cimag(mnl->MDPolyRoots[j]));
+      printf("# %lf %lf\n",  creal(mnl->MDPolyRoots[j]), cimag(mnl->MDPolyRoots[j]));
     }
-
+  }
+  
   return 0;
-
+  
 }
 
 void dummy_derivative(const int id, hamiltonian_field_t * const hf) {

From faf8c645c9aab29a226a1d1130016311696ff9d7 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 12 Oct 2012 19:26:37 +0200
Subject: [PATCH 060/110] removed some not needed assigns...

---
 cloverndpoly_monomial.c | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 0ba16dbef..40829c6e0 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -156,6 +156,7 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   int j;
   monomial * mnl = &monomial_list[id];
+  spinor *up0, *dn0, *up1, *dn1, *dummy;
 
   ndpoly_set_global_parameter(mnl, 0);
   g_mu3 = 0.;
@@ -177,20 +178,24 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
 
   Qsw_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
 	    g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0]);
+
+  up0 = g_chi_up_spinor_field[0];
+  up1 = g_chi_up_spinor_field[1];
+  dn0 = g_chi_dn_spinor_field[0];
+  dn1 = g_chi_dn_spinor_field[1];
   
   for(j = 1; j < (mnl->MDPolyDegree); j++){
-    assign(g_chi_up_spinor_field[0], g_chi_up_spinor_field[1], VOLUME/2);
-    assign(g_chi_dn_spinor_field[0], g_chi_dn_spinor_field[1], VOLUME/2);
-    
-    Qsw_tau1_sub_const_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
-			     g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], 
+    Qsw_tau1_sub_const_ndpsi(up0, dn0,
+			     up1, dn1, 
 			     mnl->MDPolyRoots[mnl->MDPolyDegree-2+j]);
+    dummy = up1; up1 = up0; up0 = dummy;
+    dummy = dn1; dn1 = dn0; dn0 = dummy;
   }
-  Ptilde_ndpsi(g_chi_up_spinor_field[0], g_chi_dn_spinor_field[0], mnl->PtildeCoefs, 
-	       mnl->PtildeDegree, g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], &Qsw_pm_ndpsi);
+  Ptilde_ndpsi(up0, dn0, mnl->PtildeCoefs, 
+	       mnl->PtildeDegree, up1, dn1, &Qsw_pm_ndpsi);
   
-  assign(mnl->pf, g_chi_up_spinor_field[0], VOLUME/2);
-  assign(mnl->pf2, g_chi_dn_spinor_field[0], VOLUME/2);
+  assign(mnl->pf, up0, VOLUME/2);
+  assign(mnl->pf2, dn0, VOLUME/2);
   
   if(g_proc_id == 0 && g_debug_level > 3) {
     printf("called cloverndpoly_heatbath for id %d\n", id);
@@ -200,7 +205,7 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
 
 
 double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
-  int j, ij=0;
+  int j;
   monomial * mnl = &monomial_list[id];
   spinor *up0, *dn0, *up1, *dn1, *dummy;
 
@@ -211,7 +216,6 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
 
   mnl->energy1 = 0.;
 
-  /* IF PHMC */
   up0 = g_chi_up_spinor_field[0];
   up1 = g_chi_up_spinor_field[1];
   dn0 = g_chi_dn_spinor_field[0];
@@ -221,7 +225,6 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   assign(dn0, mnl->pf2, VOLUME/2);
 
   for(j = 1; j <= (mnl->MDPolyDegree-1); j++) {
-    /* Change this name !!*/
     Qsw_tau1_sub_const_ndpsi(up1, dn1, up0, dn0, mnl->MDPolyRoots[j-1]);
     
     dummy = up1; up1 = up0; up0 = dummy;
@@ -229,14 +232,8 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     /* result always in up0 and dn0 */
   }
   
-  ij=0;
-  if(up0 != g_chi_up_spinor_field[ij]) {
-    assign(g_chi_up_spinor_field[ij], up0, VOLUME/2);
-    assign(g_chi_dn_spinor_field[ij], dn0, VOLUME/2);
-  }
-  
-  mnl->energy1 = square_norm(g_chi_up_spinor_field[ij], VOLUME/2, 1);
-  mnl->energy1 += square_norm(g_chi_dn_spinor_field[ij], VOLUME/2, 1);
+  mnl->energy1 = square_norm(up0, VOLUME/2, 1);
+  mnl->energy1 += square_norm(dn0, VOLUME/2, 1);
   
   if(g_proc_id == 0 && g_debug_level > 3) {
     printf("called cloverndpoly_acc for id %d %d dH = %1.4e\n", id, g_running_phmc, mnl->energy1 - mnl->energy0);

From 7f787f7ecf343572c0ea9415ba1abd22d6a815ac Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 12 Oct 2012 19:58:22 +0200
Subject: [PATCH 061/110] output cleaned up

---
 clover_trlog_monomial.c |  2 +-
 cloverndpoly_monomial.c | 13 +--------
 ndpoly_monomial.c       | 65 ++++++++++-------------------------------
 3 files changed, 18 insertions(+), 62 deletions(-)

diff --git a/clover_trlog_monomial.c b/clover_trlog_monomial.c
index ac116334b..93f7cc891 100644
--- a/clover_trlog_monomial.c
+++ b/clover_trlog_monomial.c
@@ -66,7 +66,7 @@ double clover_trlog_acc(const int id, hamiltonian_field_t * const hf) {
   sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
   /*compute the contribution from the clover trlog term */
   mnl->energy1 = -sw_trace(EO, mnl->mu);
-  if(g_proc_id == 0 && g_debug_level > 0) {
+  if(g_proc_id == 0 && g_debug_level > 3) {
     printf("called clover_trlog_acc for id %d dH = %1.4e\n", 
 	   id, mnl->energy1 - mnl->energy0);
   }
diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index 40829c6e0..a638855d5 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -100,7 +100,7 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
   assign(g_chi_up_spinor_field[mnl->MDPolyDegree], g_chi_up_spinor_field[mnl->MDPolyDegree-2], VOLUME/2);
   assign(g_chi_dn_spinor_field[mnl->MDPolyDegree], g_chi_dn_spinor_field[mnl->MDPolyDegree-2], VOLUME/2);
   
-  for(j=(mnl->MDPolyDegree-1); j>=1; j--) {
+  for(j = (mnl->MDPolyDegree-1); j > 0; j--) {
     assign(g_chi_up_spinor_field[mnl->MDPolyDegree-1], g_chi_up_spinor_field[mnl->MDPolyDegree], VOLUME/2);
     assign(g_chi_dn_spinor_field[mnl->MDPolyDegree-1], g_chi_dn_spinor_field[mnl->MDPolyDegree], VOLUME/2);
     
@@ -138,17 +138,6 @@ void cloverndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
   sw_deriv_nd(EE);
   sw_all(hf, mnl->kappa, mnl->c_sw);
 
-  for(int i = 0; i < VOLUME; i++) { 
-    for(int mu = 0; mu < 4; mu++) { 
-      _su3_zero(swm[i][mu]);
-      _su3_zero(swp[i][mu]);
-    }
-  }
-
-  //sw_invert(EE, mnl->mubar);
-  //sw_deriv(EE, mnl->mubar);
-  //sw_all(hf, mnl->kappa, mnl->c_sw);
-
   return;
 }
 
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index 809aad6dd..c500e4457 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -104,8 +104,8 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 		    g_chi_up_spinor_field[j-1], g_chi_dn_spinor_field[j-1], EO);
       
       /* \delta M_eo sandwitched by  chi[j-1]_e^\dagger  and  chi[2N-j]_o */
-      deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[phmc_dop_n_cheby], hf, mnl->forcefactor);      /* UP */
-      deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[phmc_dop_n_cheby], hf, mnl->forcefactor);    /* DN */
+      deriv_Sb(EO, mnl->w_fields[0], g_chi_up_spinor_field[phmc_dop_n_cheby], hf, mnl->forcefactor);/* UP */
+      deriv_Sb(EO, mnl->w_fields[1], g_chi_dn_spinor_field[phmc_dop_n_cheby], hf, mnl->forcefactor);/* DN */
       
       /* Get the even parts of the  (2N-j)-th  chi_spinors */
       H_eo_tm_ndpsi(mnl->w_fields[0], mnl->w_fields[1], 
@@ -184,11 +184,8 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
     zero_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2);
   }
 
-  if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-    printf("PHMC: Here comes the computation of H_old with \n \n");
-    printf("PHMC: First: random spinors and their norm  \n ");
-    printf("PHMC: OLD Ennergy UP %e \n", mnl->energy0);
-    printf("PHMC: OLD Energy  DN + UP %e \n\n", mnl->energy0);
+  if((g_proc_id == g_stdio_proc) && (g_debug_level > 5)) {
+    printf("# NDPOLY: OLD Energy  DN + UP %e \n\n", mnl->energy0);
   }
 
   if(phmc_exact_poly==0){
@@ -254,20 +251,6 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   assign(mnl->pf, g_chi_up_spinor_field[0], VOLUME/2);
   assign(mnl->pf2, g_chi_dn_spinor_field[0], VOLUME/2);
 
-  if(g_debug_level > 2) {
-    temp = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
-    if(g_proc_id == g_stdio_proc) {
-      printf("PHMC: Then: evaluate Norm of pseudofermion heatbath BHB \n ");
-      printf("PHMC: Norm of BHB up squared %e \n", temp);
-    }
-
-    if(g_epsbar!=0.0 || phmc_exact_poly==0) 
-      temp += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
-
-    if(g_proc_id == g_stdio_proc){
-      printf("PHMC: Norm of BHB up + BHB dn squared %e \n\n", temp);
-    }
-  }
   if(g_proc_id == 0 && g_debug_level > 3) {
     printf("called ndpoly_heatbath for id %d \n", id);
   }
@@ -322,15 +305,12 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     temp = square_norm(g_chi_dn_spinor_field[ij], VOLUME/2, 1);
     Ener[ij] += temp;
 
-    if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-      printf("PHMC: Here comes the computation of H_new with \n \n");
-
-      printf("PHMC: At j=%d  PHMC Final Energy %e \n", ij, mnl->energy1+Ener[ij]);
-      printf("PHMC: At j=%d  PHMC Only Final Energy %e \n", ij, Ener[ij]);
+    if((g_proc_id == g_stdio_proc) && (g_debug_level > 4)) {
+      printf("# NDPOLY: At j=%d H before H-correction %e \n", ij, Ener[ij]);
     }
     
     /* Here comes the loop for the evaluation of A, A^2, ...  */
-    for(j = 1; j < 1; j++){ /* To omit corrections just set  j<1 */
+    for(j = 1; j < 8; j++){ /* To omit corrections just set  j<1 */
       
       if(j % 2){ /*  Chi[j] = ( Qdag P  Ptilde ) Chi[j-1]  */ 
 	Ptilde_ndpsi(g_chi_up_spinor_field[j], g_chi_dn_spinor_field[j], 
@@ -358,29 +338,26 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
       sgn = -1.0;
       for(ij = 1; ij < j; ij++){
 	fact = factor[j] / (factor[ij] * factor[j-ij]);
-	if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-	  printf("PHMC: Here  j=%d  and  ij=%d   sign=%f  fact=%f \n", j ,ij, sgn, fact);
+	if((g_proc_id == g_stdio_proc) && (g_debug_level > 4)) {
+	  printf("# NDPOLY: Here  j=%d  and  ij=%d   sign=%f  fact=%f \n", j ,ij, sgn, fact);
 	}
 	Ener[j] += sgn*fact*Ener[ij];
 	sgn = -sgn;
       }
       temp = square_norm(g_chi_up_spinor_field[j], VOLUME/2, 1);
       temp += square_norm(g_chi_dn_spinor_field[j], VOLUME/2, 1);
-      if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-	printf("PHMC: Here  j=%d   sign=%f  temp=%e \n", j, sgn, temp);
+      if((g_proc_id == g_stdio_proc) && (g_debug_level > 4)) {
+	printf("# NDPOLY: Here  j=%d   sign=%f  temp=%e \n", j, sgn, temp);
       }
 
       Ener[j] += sgn*temp;
 
       Diff = fabs(Ener[j] - Ener[j-1]);
       if((g_proc_id == g_stdio_proc) && (g_debug_level > 0)) {
-	printf("PHMC: Correction aftern %d steps: %e \n", j, Diff);
+	printf("# NDPOLY: H-Correction after %d steps: %e \n", j, Diff);
       }
 
       if(Diff < mnl->PrecisionHfinal) {
-	if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-	  printf("PHMC: At j = %d  PHMC Only Final Energy %e \n", j, Ener[j]);
-	}
 	break;
       }
     }
@@ -406,17 +383,12 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     temp = square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
     Ener[0] += temp;
 
-    if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
+    if((g_proc_id == g_stdio_proc) && (g_debug_level > 4)) {
       ij=0;
-      printf("PHMC: Here comes the computation of H_new with \n \n");
-      printf("PHMC: At j=%d  P+HMC Final Energy %e \n", ij, mnl->energy1+Ener[0]);
-      printf("PHMC: At j=%d  PHMC Only Final Energy %e \n", ij, Ener[0]);
+      printf("# NDPOLY: At j=%d  PHMC Only Final Energy %e \n", ij, Ener[0]);
     }
 
     mnl->energy1 += Ener[0];
-    if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-      printf("PHMC: At j = %d  P=%e +HMC Final Energy %e \n\n", ij, Ener[0], mnl->energy1);
-    }
   } 
   else if(phmc_exact_poly == 1 && g_epsbar == 0.0) {
     for(j = 1; j < (mnl->MDPolyDegree); j++) {
@@ -430,16 +402,11 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
     temp = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
     Ener[0] = temp;
 
-    if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-      printf("PHMC: Here comes the computation of H_new with \n \n");
-      printf("PHMC: At j=%d  P+HMC Final Energy %e \n", ij, mnl->energy1+Ener[0]);
-      printf("PHMC: At j=%d  PHMC Only Final Energy %e \n", ij, Ener[0]);
+    if((g_proc_id == g_stdio_proc) && (g_debug_level > 4)) {
+      printf("# NDPOLY: At j=%d  PHMC Only Final Energy %e \n", ij, Ener[0]);
     }
 
     mnl->energy1 += Ener[0];
-    if((g_proc_id == g_stdio_proc) && (g_debug_level > 2)) {
-      printf("PHMC: At j = %d  P=%e +HMC Final Energy %e \n\n", ij, Ener[0], mnl->energy1);
-    }
   }
 
   if(g_proc_id == 0 && g_debug_level > 3) {

From fbe0ec16fa5028e7b84186494495f527277837fc Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sat, 13 Oct 2012 13:22:36 +0200
Subject: [PATCH 062/110] removed linsolve everywhere and replaced with cg_her,
 which is faster now

---
 Makefile.in               |   2 +-
 chebyshev_polynomial.c    |   4 +-
 chebyshev_polynomial_nd.c |  25 --
 cloverdetratio_monomial.c |   7 -
 cloverndpoly_monomial.c   |   9 -
 det_monomial.c            |  36 +--
 detratio_monomial.c       |  53 ++--
 integrator.c              |   9 -
 invert_clover_eo.c        |   1 -
 invert_doublet_eo.c       |   1 -
 invert_eo.c               |   4 -
 linsolve.c                | 495 --------------------------------------
 linsolve.h                |  25 --
 monomial.c                |   6 +
 nddetratio_monomial.c     |   1 -
 ndpoly_monomial.c         |   3 -
 poly_monomial.c           |  15 +-
 reweighting_factor_nd.c   |   1 -
 tm_operators_nd.c         |   1 -
 19 files changed, 40 insertions(+), 658 deletions(-)
 delete mode 100644 linsolve.c
 delete mode 100644 linsolve.h

diff --git a/Makefile.in b/Makefile.in
index 3cb70cfb9..bb6902b56 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -50,7 +50,7 @@ MODULES = read_input gamma hybrid_update measure_gauge_action start \
 	invert_eo invert_doublet_eo update_gauge \
 	polyakov_loop getopt sighandler reweighting_factor \
 	source_generation boundary update_tm ranlxd  \
-	mpi_init linsolve deriv_Sb deriv_Sb_D_psi ranlxs \
+	mpi_init deriv_Sb deriv_Sb_D_psi ranlxs \
 	xchange_deri geometry_eo invert_overlap \
 	init_moment_field init_gauge_tmp \
 	xchange_field xchange_gauge prepare_source \
diff --git a/chebyshev_polynomial.c b/chebyshev_polynomial.c
index f81a53d9a..1bae10f16 100644
--- a/chebyshev_polynomial.c
+++ b/chebyshev_polynomial.c
@@ -26,13 +26,11 @@
 #include <stdio.h>
 #include <math.h>
 #include "global.h"
-#include "linsolve.h"
 #include "linalg_eo.h"
 #include "start.h"
 #include "tm_operators.h"
-#include "chebyshev_polynomial.h"
 #include "tm_operators_nd.h"
-
+#include "chebyshev_polynomial.h"
 
 #define PI 3.141592653589793
 
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index aa9ae5546..5756be9f4 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -25,7 +25,6 @@
 #include <stdio.h>
 #include <math.h>
 #include "global.h"
-#include "linsolve.h"
 #include "linalg_eo.h"
 #include "start.h"
 #include "tm_operators.h"
@@ -106,19 +105,15 @@ double cheb_eval(int M, double *c, double s){
 void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
 			     const double EVMin, const double EVMax,
 			     matrix_mult_nd Qsq) { 
-  int j;
   double temp, temp2;
   int degree_of_p = *_degree_of_p + 1;
 
-  double sum=0.0;
-
   spinor *ss=NULL, *ss_=NULL, *sc=NULL, *sc_=NULL;
   spinor *auxs=NULL, *auxs_=NULL, *auxc=NULL, *auxc_=NULL;
   spinor *aux2s=NULL, *aux2s_=NULL, *aux2c=NULL, *aux2c_=NULL;
 
   *coefs = calloc(degree_of_p, sizeof(double));
 
-#if ( defined SSE || defined SSE2 || defined SSE3)
   ss_   = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
   auxs_ = calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
   aux2s_= calloc(VOLUMEPLUSRAND/2+1, sizeof(spinor));
@@ -133,16 +128,6 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
   auxc  = (spinor *)(((unsigned long int)(auxc_)+ALIGN_BASE)&~ALIGN_BASE);
   aux2c = (spinor *)(((unsigned long int)(aux2c_)+ALIGN_BASE)&~ALIGN_BASE);
   
-#else
-  ss   =calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-  auxs =calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-  aux2s=calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-  sc   =calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-  auxc =calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-  aux2c=calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
-#endif
-  
-  
   chebyshev_coefs(EVMin, EVMax, *coefs, degree_of_p, -0.5);
 
   random_spinor_field(ss,VOLUME/2, 1);
@@ -173,7 +158,6 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
     /* this is || (P S P - 1)X ||^2 /|| 2X ||^2 */
     /* where X is a random spinor field         */
     printf("# NDPOLY MD Polynomial: relative squared accuracy in components:\n# UP=%e  DN=%e \n", temp, temp2);
-    /*     printf("NDPOLY: Sum remaining | c_n | = %e \n", sum); */
     fflush(stdout);
   }
 
@@ -189,20 +173,11 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
   /* RECALL THAT WE NEED AN EVEN DEGREE !!!! */
   *_degree_of_p = degree_of_p;
 
-#if ( defined SSE || defined SSE2 || defined SSE3)
   free(ss_);   
   free(auxs_); 
   free(aux2s_);
   free(sc_);   
   free(auxc_); 
   free(aux2c_);
-#else
-  free(ss);   
-  free(auxs); 
-  free(aux2s);
-  free(sc);   
-  free(auxc); 
-  free(aux2c);
-#endif
   return;
 }
diff --git a/cloverdetratio_monomial.c b/cloverdetratio_monomial.c
index 13d53b682..073c6d28a 100644
--- a/cloverdetratio_monomial.c
+++ b/cloverdetratio_monomial.c
@@ -28,24 +28,17 @@
 #include <time.h>
 #include "global.h"
 #include "su3.h"
-#include "su3adj.h"
-#include "su3spinor.h"
-#include "ranlxd.h"
 #include "start.h"
 #include "linalg_eo.h"
-#include "linsolve.h"
 #include "deriv_Sb.h"
 #include "gamma.h"
 #include "tm_operators.h"
-#include "hybrid_update.h"
 #include "Hopping_Matrix.h"
 #include "solver/chrono_guess.h"
 #include "solver/solver.h"
 #include "read_input.h"
-#include "smearing/stout.h"
 #include "clovertm_operators.h"
 #include "clover_leaf.h"
-
 #include "monomial.h"
 #include "boundary.h"
 #include "cloverdetratio_monomial.h"
diff --git a/cloverndpoly_monomial.c b/cloverndpoly_monomial.c
index a638855d5..fee570d71 100644
--- a/cloverndpoly_monomial.c
+++ b/cloverndpoly_monomial.c
@@ -27,29 +27,20 @@
 #include <time.h>
 #include "global.h"
 #include "su3.h"
-#include "su3adj.h"
 #include "linalg_eo.h"
 #include "start.h"
-#include "linsolve.h"
 #include "solver/solver.h"
 #include "deriv_Sb.h"
 #include "tm_operators.h"
-#include "chebyshev_polynomial.h"
 #include "tm_operators_nd.h"
 #include "Hopping_Matrix.h"
 #include "phmc.h"
-#include "tm_operators_nd.h"
-#include "chebyshev_polynomial_nd.h"
 #include "Ptilde_nd.h"
-#include "reweighting_factor_nd.h"
 #include "monomial.h"
 #include "hamiltonian_field.h"
 #include "boundary.h"
-#include "phmc.h"
-#include "init_chi_spinor_field.h"
 #include "clovertm_operators.h"
 #include "clover_leaf.h"
-
 #include "cloverndpoly_monomial.h"
 
 /********************************************
diff --git a/det_monomial.c b/det_monomial.c
index 4e8c62ff4..7d39e22ae 100644
--- a/det_monomial.c
+++ b/det_monomial.c
@@ -26,21 +26,14 @@
 #include <math.h>
 #include "global.h"
 #include "su3.h"
-#include "su3adj.h"
-#include "su3spinor.h"
-#include "ranlxd.h"
-#include "sse.h"
 #include "start.h"
 #include "linalg_eo.h"
-#include "linsolve.h"
 #include "deriv_Sb.h"
 #include "deriv_Sb_D_psi.h"
-#include "gamma.h"
 #include "tm_operators.h"
 #include "hybrid_update.h"
 #include "Hopping_Matrix.h"
 #include "solver/chrono_guess.h"
-#include "solver/bicgstab_complex.h"
 #include "solver/solver.h"
 #include "read_input.h"
 #include "hamiltonian_field.h"
@@ -48,9 +41,6 @@
 #include "monomial.h"
 #include "det_monomial.h"
 
-extern int ITER_MAX_BCG;
-extern int ITER_MAX_CG;
-
 /* think about chronological solver ! */
 
 void det_derivative(const int id, hamiltonian_field_t * const hf) {
@@ -77,14 +67,14 @@ void det_derivative(const int id, hamiltonian_field_t * const hf) {
     /* Invert Q_{+} Q_{-} */
     /* X_o -> w_fields[1] */
     chrono_guess(mnl->w_fields[1], mnl->pf, mnl->csg_field, mnl->csg_index_array,
-		 mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_pm_psi);
+		 mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
     mnl->iter1 += cg_her(mnl->w_fields[1], mnl->pf, mnl->maxiter, mnl->forceprec, 
-			 g_relative_precision_flag, VOLUME/2, &Qtm_pm_psi);
+			 g_relative_precision_flag, VOLUME/2, mnl->Qsq);
     chrono_add_solution(mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array,
 			mnl->csg_N, &mnl->csg_n, VOLUME/2);
     
     /* Y_o -> w_fields[0]  */
-    Qtm_minus_psi(mnl->w_fields[0], mnl->w_fields[1]);
+    mnl->Qm(mnl->w_fields[0], mnl->w_fields[1]);
     
     /* apply Hopping Matrix M_{eo} */
     /* to get the even sites of X_e */
@@ -128,7 +118,7 @@ void det_derivative(const int id, hamiltonian_field_t * const hf) {
 		   mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_plus_psi);
       mnl->iter1 += bicgstab_complex(mnl->w_fields[0], mnl->pf, 
 				     mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
-				     VOLUME,  Q_plus_psi);
+				     VOLUME, &Q_plus_psi);
       chrono_add_solution(mnl->w_fields[0], mnl->csg_field, mnl->csg_index_array,
 			  mnl->csg_N, &mnl->csg_n, VOLUME/2);
       
@@ -139,7 +129,7 @@ void det_derivative(const int id, hamiltonian_field_t * const hf) {
 		   mnl->csg_index_array2, mnl->csg_N2, mnl->csg_n2, VOLUME/2, &Q_minus_psi);
       mnl->iter1 += bicgstab_complex(mnl->w_fields[1], mnl->w_fields[0], 
 				     mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
-				     VOLUME, Q_minus_psi);
+				     VOLUME, &Q_minus_psi);
       chrono_add_solution(mnl->w_fields[1], mnl->csg_field2, mnl->csg_index_array2,
 			  mnl->csg_N2, &mnl->csg_n2, VOLUME/2);
       g_mu = -g_mu;   
@@ -169,7 +159,7 @@ void det_heatbath(const int id, hamiltonian_field_t * const hf) {
     random_spinor_field(mnl->w_fields[0], VOLUME/2, mnl->rngrepro);
     mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
-    Qtm_plus_psi(mnl->pf, mnl->w_fields[0]);
+    mnl->Qp(mnl->pf, mnl->w_fields[0]);
     chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
 			mnl->csg_N, &mnl->csg_n, VOLUME/2);
     if(mnl->solver != CG) {
@@ -200,7 +190,6 @@ void det_heatbath(const int id, hamiltonian_field_t * const hf) {
 
 double det_acc(const int id, hamiltonian_field_t * const hf) {
   monomial * mnl = &monomial_list[id];
-  int save_iter = ITER_MAX_BCG;
   int save_sloppy = g_sloppy_precision_flag;
 
   g_mu = mnl->mu;
@@ -211,12 +200,14 @@ double det_acc(const int id, hamiltonian_field_t * const hf) {
       ITER_MAX_BCG = 0;
     }
     chrono_guess(mnl->w_fields[0], mnl->pf, mnl->csg_field, mnl->csg_index_array,
-		 mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_plus_psi);
+    	 mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
     g_sloppy_precision_flag = 0;
-    mnl->iter0 = bicg(mnl->w_fields[0], mnl->pf, mnl->accprec, g_relative_precision_flag);
+    mnl->iter0 = cg_her(mnl->w_fields[0], mnl->pf, mnl->maxiter, mnl->accprec, g_relative_precision_flag,
+    			VOLUME/2, mnl->Qsq);
+    mnl->Qm(mnl->w_fields[1], mnl->w_fields[0]);
     g_sloppy_precision_flag = save_sloppy;
     /* Compute the energy contr. from first field */
-    mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
+    mnl->energy1 = square_norm(mnl->w_fields[1], VOLUME/2, 1);
   }
   else {
     if(mnl->solver == CG) {
@@ -224,7 +215,7 @@ double det_acc(const int id, hamiltonian_field_t * const hf) {
 		   mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_pm_psi);
       mnl->iter0 = cg_her(mnl->w_fields[1], mnl->pf, 
 			  mnl->maxiter, mnl->accprec, g_relative_precision_flag, 
-			  VOLUME, Q_pm_psi);
+			  VOLUME, &Q_pm_psi);
       Q_minus_psi(mnl->w_fields[0], mnl->w_fields[1]);
       /* Compute the energy contr. from first field */
       mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME, 1);
@@ -234,7 +225,7 @@ double det_acc(const int id, hamiltonian_field_t * const hf) {
 		   mnl->csg_N, mnl->csg_n, VOLUME/2, &Q_plus_psi);
       mnl->iter0 += bicgstab_complex(mnl->w_fields[0], mnl->pf, 
 				     mnl->maxiter, mnl->forceprec, g_relative_precision_flag, 
-				     VOLUME,  Q_plus_psi);
+				     VOLUME,  &Q_plus_psi);
       mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME, 1);
     }
   }
@@ -244,6 +235,5 @@ double det_acc(const int id, hamiltonian_field_t * const hf) {
     printf("called det_acc for id %d %d dH = %1.4e\n", 
 	   id, mnl->even_odd_flag, mnl->energy1 - mnl->energy0);
   }
-  ITER_MAX_BCG = save_iter;
   return(mnl->energy1 - mnl->energy0);
 }
diff --git a/detratio_monomial.c b/detratio_monomial.c
index aa93f013f..bb24ac24a 100644
--- a/detratio_monomial.c
+++ b/detratio_monomial.c
@@ -27,36 +27,23 @@
 #include <time.h>
 #include "global.h"
 #include "su3.h"
-#include "su3adj.h"
-#include "su3spinor.h"
-#include "ranlxd.h"
 #include "start.h"
 #include "linalg_eo.h"
-#include "linsolve.h"
 #include "deriv_Sb.h"
 #include "deriv_Sb_D_psi.h"
-#include "gamma.h"
 #include "tm_operators.h"
-#include "hybrid_update.h"
 #include "Hopping_Matrix.h"
 #include "solver/chrono_guess.h"
-#include "solver/bicgstab_complex.h"
 #include "solver/solver.h"
 #include "read_input.h"
-#include "smearing/stout.h"
-#include "clover_leaf.h"
-
+#include "gamma.h"
 #include "monomial.h"
 #include "boundary.h"
 #include "detratio_monomial.h"
 
-extern int ITER_MAX_BCG;
-extern int ITER_MAX_CG;
-
 /* think about chronological solver ! */
 
 void detratio_derivative(const int no, hamiltonian_field_t * const hf) {
-  int saveiter = ITER_MAX_BCG;
 
   monomial * mnl = &monomial_list[no];
 
@@ -197,13 +184,11 @@ void detratio_derivative(const int no, hamiltonian_field_t * const hf) {
   g_mu = g_mu1;
   boundary(g_kappa);
 
-  ITER_MAX_BCG = saveiter;
   return;
 }
 
 
 void detratio_heatbath(const int id, hamiltonian_field_t * const hf) {
-  int saveiter = ITER_MAX_BCG;
   monomial * mnl = &monomial_list[id];
 
   g_mu = mnl->mu;
@@ -216,20 +201,15 @@ void detratio_heatbath(const int id, hamiltonian_field_t * const hf) {
     random_spinor_field(mnl->w_fields[0], VOLUME/2, mnl->rngrepro);
     mnl->energy0  = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
-    Qtm_plus_psi(mnl->w_fields[1], mnl->w_fields[0]);
+    mnl->Qp(mnl->w_fields[1], mnl->w_fields[0]);
     g_mu = mnl->mu2;
     boundary(mnl->kappa2);
-    zero_spinor_field(mnl->pf,VOLUME/2);
-    if(mnl->solver == CG) ITER_MAX_BCG = 0;
-    ITER_MAX_CG = mnl->maxiter;
-    mnl->iter0 += bicg(mnl->pf, mnl->w_fields[1], mnl->accprec, g_relative_precision_flag);
-
-    chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
+    zero_spinor_field(mnl->w_fields[0], VOLUME/2);
+    mnl->iter0 = cg_her(mnl->w_fields[0], mnl->w_fields[1], mnl->maxiter, mnl->accprec, g_relative_precision_flag,
+    			VOLUME/2, mnl->Qsq);
+    mnl->Qm(mnl->pf, mnl->w_fields[0]);
+    chrono_add_solution(mnl->w_fields[0], mnl->csg_field, mnl->csg_index_array,
 			mnl->csg_N, &mnl->csg_n, VOLUME/2);
-    if(mnl->solver != CG) {
-      chrono_add_solution(mnl->pf, mnl->csg_field2, mnl->csg_index_array2,
-			  mnl->csg_N2, &mnl->csg_n2, VOLUME/2);
-    }
   }
   else {
     random_spinor_field(mnl->w_fields[0], VOLUME, mnl->rngrepro);
@@ -253,31 +233,28 @@ void detratio_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
   g_mu = g_mu1;
   boundary(g_kappa);
-  ITER_MAX_BCG = saveiter;
   return;
 }
 
 double detratio_acc(const int id, hamiltonian_field_t * const hf) {
   monomial * mnl = &monomial_list[id];
-  int saveiter = ITER_MAX_BCG;
   int save_sloppy = g_sloppy_precision_flag;
 
   g_mu = mnl->mu2;
   boundary(mnl->kappa2);
   if(even_odd_flag) {
-    Qtm_plus_psi(mnl->w_fields[1], mnl->pf);
+    mnl->Qp(mnl->w_fields[1], mnl->pf);
     g_mu = mnl->mu;
     boundary(mnl->kappa);
-    if(mnl->solver == CG) ITER_MAX_BCG = 0;
-    ITER_MAX_CG = mnl->maxiter;
     chrono_guess(mnl->w_fields[0], mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array, 
-		 mnl->csg_N, mnl->csg_n, VOLUME/2, &Qtm_plus_psi);
-    g_sloppy_precision_flag = 0;    
-    mnl->iter0 += bicg(mnl->w_fields[0], mnl->w_fields[1], mnl->accprec, g_relative_precision_flag); 
+		 mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
+    g_sloppy_precision_flag = 0;
+    mnl->iter0 += cg_her(mnl->w_fields[0], mnl->w_fields[1], mnl->maxiter, mnl->accprec, g_relative_precision_flag,
+			 VOLUME/2, mnl->Qsq);
+    mnl->Qm(mnl->w_fields[1], mnl->w_fields[0]);
     g_sloppy_precision_flag = save_sloppy;
-    /*     ITER_MAX_BCG = *saveiter_max; */
     /* Compute the energy contr. from second field */
-    mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
+    mnl->energy1 = square_norm(mnl->w_fields[1], VOLUME/2, 1);
   }
   else {
     Q_plus_psi(mnl->w_fields[1], mnl->pf);
@@ -288,13 +265,11 @@ double detratio_acc(const int id, hamiltonian_field_t * const hf) {
     mnl->iter0 += bicgstab_complex(mnl->w_fields[0], mnl->w_fields[1], 
 				   mnl->maxiter, mnl->accprec, g_relative_precision_flag, 
 				   VOLUME, Q_plus_psi); 
-    /*     ITER_MAX_BCG = *saveiter_max; */
     /* Compute the energy contr. from second field */
     mnl->energy1 = square_norm(mnl->w_fields[0], VOLUME, 1);
   }
   g_mu = g_mu1;
   boundary(g_kappa);
-  ITER_MAX_BCG = saveiter;
   if(g_proc_id == 0 && g_debug_level > 3) {
     printf("called detratio_acc for id %d %d dH = %1.4e\n", 
 	   id, mnl->even_odd_flag, mnl->energy1 - mnl->energy0);
diff --git a/integrator.c b/integrator.c
index 225a651ef..1a2d171a9 100644
--- a/integrator.c
+++ b/integrator.c
@@ -25,16 +25,7 @@
 #include <stdio.h>
 #include <math.h>
 #include <time.h>
-#include "su3.h"
-#include "su3adj.h"
-#include "expo.h"
-#include "ranlxd.h"
-#include "sse.h"
 #include "global.h"
-#include "linalg_eo.h"
-#include "start.h"
-#include "linsolve.h"
-#include "tm_operators.h"
 #include "monomial.h"
 #include "update_momenta.h"
 #include "update_gauge.h"
diff --git a/invert_clover_eo.c b/invert_clover_eo.c
index bd8a1db5d..41c836c71 100644
--- a/invert_clover_eo.c
+++ b/invert_clover_eo.c
@@ -42,7 +42,6 @@
 #include"Hopping_Matrix.h"
 #include"clovertm_operators.h"
 #include"D_psi.h"
-#include"linsolve.h"
 #include"gamma.h"
 #include"solver/solver.h"
 #include"invert_clover_eo.h"
diff --git a/invert_doublet_eo.c b/invert_doublet_eo.c
index 206e5059b..969c173ee 100644
--- a/invert_doublet_eo.c
+++ b/invert_doublet_eo.c
@@ -40,7 +40,6 @@
 #include"tm_operators.h"
 #include"Hopping_Matrix.h"
 #include"D_psi.h"
-#include"linsolve.h"
 #include"gamma.h"
 #include"solver/solver.h"
 #include"read_input.h"
diff --git a/invert_eo.c b/invert_eo.c
index 45d3cbcf0..8bfbc245e 100644
--- a/invert_eo.c
+++ b/invert_eo.c
@@ -15,9 +15,6 @@
  * 
  * You should have received a copy of the GNU General Public License
  * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-/****************************************************************
  *
  * invert_eo makes an inversion with EO preconditioned
  * tm Operator
@@ -43,7 +40,6 @@
 #include"tm_operators.h"
 #include"Hopping_Matrix.h"
 #include"D_psi.h"
-#include"linsolve.h"
 #include"gamma.h"
 #include"solver/solver.h"
 #include"read_input.h"
diff --git a/linsolve.c b/linsolve.c
deleted file mode 100644
index 250dc57c4..000000000
--- a/linsolve.c
+++ /dev/null
@@ -1,495 +0,0 @@
-/***********************************************************************
- * 
- * Copyright (C) 2001 Martin Hasenbusch
- *                    
- * some parts change by C. Urbach 2001-2007
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-
-#ifdef HAVE_CONFIG_H
-# include<config.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <math.h>
-#include <time.h>
-#ifdef MPI
-# include <mpi.h>
-#endif
-#include "global.h"
-#include "su3.h"
-#include "su3adj.h"
-#include "linalg_eo.h"
-#include "gamma.h"
-#include "start.h"
-#include "tm_operators.h"
-#include "linalg/assign_add_mul_r_add_mul.h"
-#include "linsolve.h"
-#include "gettime.h"
-
-/* k output , l input */
-int solve_cg(spinor * const k, spinor * const l, double eps_sq, const int rel_prec)
-{
-  static double normsq, pro, err, alpha_cg, beta_cg, squarenorm, sqnrm, sqnrm2;
-  int iteration = 0, i, j;
-  int save_sloppy = g_sloppy_precision;
-  double atime, etime, flops;
-  spinor *x, *delta, *y;
-  
-  /* initialize residue r and search vector p */
-  atime = gettime();
-  squarenorm = square_norm(l, VOLUME/2, 1);
-
-  if(g_sloppy_precision_flag == 1) { 
-    delta = g_spinor_field[DUM_SOLVER+3];
-    x = g_spinor_field[DUM_SOLVER+4];
-    y = g_spinor_field[DUM_SOLVER+5];
-    assign(delta, l, VOLUME/2);
-    Qtm_pm_psi(y, k);
-    diff(delta, l, y, VOLUME/2);
-    sqnrm = square_norm(delta, VOLUME/2, 1);
-    if(((sqnrm <= eps_sq) && (rel_prec == 0)) || ((sqnrm <= eps_sq*squarenorm) && (rel_prec == 1))) {
-      return(0);
-    }
-    
-    for(i = 0; i < 20; i++) {
-      g_sloppy_precision = 1;
-      /* main CG loop in lower precision */
-      zero_spinor_field(x, VOLUME/2);
-      assign(g_spinor_field[DUM_SOLVER+1], delta, VOLUME/2);
-      assign(g_spinor_field[DUM_SOLVER+2], delta, VOLUME/2);
-      sqnrm2 = sqnrm;
-      for(j = 0; j <= ITER_MAX_CG; j++) {
-	Qtm_pm_psi(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2]);
-	pro = scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
-	alpha_cg = sqnrm2 / pro;
-	assign_add_mul_r(x, g_spinor_field[DUM_SOLVER+2], alpha_cg, VOLUME/2);
-	
-	assign_mul_add_r(g_spinor_field[DUM_SOLVER], -alpha_cg, g_spinor_field[DUM_SOLVER+1], VOLUME/2);
-	err = square_norm(g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
-	
-	if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
-	  printf("inner CG: %d res^2 %g\n", iteration+j+1, err);
-	  fflush(stdout);
-	}
-	
-	if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){
-	  break;
-	}
-	beta_cg = err / sqnrm2;
-	assign_mul_add_r(g_spinor_field[DUM_SOLVER+2], beta_cg, g_spinor_field[DUM_SOLVER], VOLUME/2);
-	assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER], VOLUME/2);
-	sqnrm2 = err;
-      }
-      /* end main CG loop */
-      iteration += j;
-      g_sloppy_precision = 0;
-      add(k, k, x, VOLUME/2);
-      
-      Qtm_pm_psi(y, x);
-      diff(delta, delta, y, VOLUME/2);
-      sqnrm = square_norm(delta, VOLUME/2, 1);
-      if(g_debug_level > 0 && g_proc_id == g_stdio_proc) {
-	printf("mixed CG(linsolve): true residue %d\t%g\t\n",iteration, sqnrm); fflush( stdout);
-      }
-      
-      if(((sqnrm <= eps_sq) && (rel_prec == 0)) || ((sqnrm <= eps_sq*squarenorm) && (rel_prec == 1))) {
-	break;
-      }
-      iteration++;
-    }
-  }
-  else {
-    Qtm_pm_psi(g_spinor_field[DUM_SOLVER], k); 
-    
-    diff(g_spinor_field[DUM_SOLVER+1], l, g_spinor_field[DUM_SOLVER], VOLUME/2);
-    assign(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2);
-    normsq=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
-    
-    /* main loop */
-    for(iteration = 1; iteration <= ITER_MAX_CG; iteration++) {
-      Qtm_pm_psi(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2]);
-      pro=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
-      alpha_cg=normsq/pro;
-      assign_add_mul_r(k, g_spinor_field[DUM_SOLVER+2], alpha_cg, VOLUME/2);
-      
-      assign_mul_add_r(g_spinor_field[DUM_SOLVER], -alpha_cg, g_spinor_field[DUM_SOLVER+1], VOLUME/2);
-      err=square_norm(g_spinor_field[DUM_SOLVER], VOLUME/2, 1);
-      
-      if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
-	printf("CG (linsolve): iterations: %d res^2 %e\n", iteration, err);
-	fflush(stdout);
-      }
-      
-      if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){
-	break;
-      }
-      beta_cg = err/normsq;
-      assign_mul_add_r(g_spinor_field[DUM_SOLVER+2], beta_cg, g_spinor_field[DUM_SOLVER], VOLUME/2);
-      assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER], VOLUME/2);
-      normsq=err;
-    }
-  }
-  etime = gettime();
-  /* 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */
-  /* 2*1608.0 because the linalg is over VOLUME/2 */
-  flops = (2*(2*1608.0+2*3*4) + 2*3*4 + iteration*(2.*(2*1608.0+2*3*4) + 10*3*4))*VOLUME/2/1.0e6f;
-  if(g_proc_id==0 && g_debug_level > 0) {
-    printf("# CG(linsolve): iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime); 
-    printf("# CG(linsolve): flopcount: t/s: %1.4e mflops_local: %.1f mflops: %.1f\n", 
-	   etime-atime, flops/(etime-atime), g_nproc*flops/(etime-atime));
-  }
-  g_sloppy_precision = save_sloppy;
-  return(iteration);
-}
-
-
-/* k output , l input */
-int bicg(spinor * const k, spinor * const l, double eps_sq, const int rel_prec) {
-
-  double err, d1, squarenorm=0.;
-  _Complex double rho0, rho1, omega, alpha, beta;
-  int iteration, N=VOLUME/2;
-  spinor * r, * p, * v, *hatr, * s, * t, * P, * Q;
-  
-
-  if(ITER_MAX_BCG > 0) {
-
-
-
-    hatr = g_spinor_field[DUM_SOLVER];
-    r = g_spinor_field[DUM_SOLVER+1];
-    v = g_spinor_field[DUM_SOLVER+2];
-    p = g_spinor_field[DUM_SOLVER+3];
-    s = g_spinor_field[DUM_SOLVER+4];
-    t = g_spinor_field[DUM_SOLVER+5];
-    P = k;
-    Q = l;
-
-    squarenorm = square_norm(Q, VOLUME/2, 1);
-    
-    Mtm_plus_psi(r, P);
-    gamma5(g_spinor_field[DUM_SOLVER], l, VOLUME/2);
-    diff(p, hatr, r, N);
-    assign(r, p, N);
-    assign(hatr, p, N);
-    rho0 = scalar_prod(hatr, r, N, 1);
-    
-    for(iteration = 0; iteration < ITER_MAX_BCG; iteration++){
-      err = square_norm(r, N, 1);
-      if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
-	printf("BiCGstab: iterations: %d res^2 %e\n", iteration, err);
-	fflush(stdout);
-      }
-      if (((err <= eps_sq) && (rel_prec == 0)) 
-	  || ((err <= eps_sq*squarenorm) && (rel_prec == 1))){
-	break;
-      }
-      Mtm_plus_psi(v, p);
-      alpha = rho0 / scalar_prod(hatr, v, N, 1);
-      assign(s, r, N);
-      assign_diff_mul(s, v, alpha, N);
-      Mtm_plus_psi(t, s);
-      omega = scalar_prod(t,s, N, 1);
-      d1 = square_norm(t, N, 1);
-      omega /= d1;
-      assign_add_mul_add_mul(P, p, s, alpha, omega, N);
-      assign(r, s, N);
-      assign_diff_mul(r, t, omega, N);
-      rho1 = scalar_prod(hatr, r, N, 1);
-      beta = -(alpha * rho1) / (omega * rho0);
-      assign_mul_bra_add_mul_ket_add(p, v, r, omega, beta, N);
-      rho0 = rho1;
-    }
-    
-    if(g_proc_id==0 && g_debug_level > 0) {
-      printf("BiCGstab: iterations: %d eps_sq: %1.4e\n", iteration, eps_sq); 
-    }
-  }
-  else{
-    iteration = ITER_MAX_BCG;
-    gamma5(k, l, VOLUME/2);
-  }
-
-  /* if bicg fails, redo with conjugate gradient */
-  if(iteration>=ITER_MAX_BCG){
-    iteration = solve_cg(k,l,eps_sq, rel_prec);
-    /* Save the solution for reuse! not needed since Chronological inverter is there */
-    /*     assign(g_spinor_field[DUM_DERI+6], k, VOLUME/2); */
-    Qtm_minus_psi(k, k);;
-  }
-  return iteration;
-}
-
-#ifdef _USE_NOT_USED_NOR_TESTED
-
-
-/*lambda: smallest eigenvalue, k eigenvector */
-int eva(double *rz, int k, double q_off, double eps_sq) {
-  static double ritz,norm0,normg,normg0,beta_cg;
-  static double costh,sinth,cosd,sind,aaa,normp,xxx;
-  static double xs1,xs2,xs3;
-  int iteration;
-  /* Initialize k to be gaussian */
-  random_spinor_field(g_spinor_field[k], VOLUME/2);
-  norm0=square_norm(g_spinor_field[k], VOLUME/2, 1); 
-  /*normalize k */
-  assign_mul_bra_add_mul_r( g_spinor_field[k], 1./sqrt(norm0),0., g_spinor_field[k], VOLUME/2);
-  Q_psi(DUM_SOLVER,k,q_off);
-  Q_psi(DUM_SOLVER,DUM_SOLVER,q_off);
-  /*compute the ritz functional */
-  /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/
-  ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1); 
-  zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
-  assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k], 1., -ritz, VOLUME/2);
-  assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2);
-  normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
-  
-  /* main loop */
-  for(iteration=1;iteration<=ITER_MAX_BCG;iteration++) {
-    if(normg0 <= eps_sq) break;
-    Q_psi(DUM_SOLVER+2,DUM_SOLVER+1,q_off);
-    Q_psi(DUM_SOLVER+2,DUM_SOLVER+2,q_off);
-    /*   compute costh and sinth */
-    normp=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
-    xxx=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
-    
-    xs1=0.5*(ritz+xxx/normp);
-    xs2=0.5*(ritz-xxx/normp);
-    normp=sqrt(normp);
-    xs3=normg0/normp;
-    aaa=sqrt(xs2*xs2+xs3*xs3);
-    cosd=xs2/aaa;
-    sind=xs3/aaa;
-    
-    if(cosd<=0.) { 
-      costh=sqrt(0.5*(1.-cosd));
-      sinth=-0.5*sind/costh;
-    }
-    else {
-      sinth=-sqrt(0.5*(1.+cosd));
-      costh=-0.5*sind/sinth;
-    } 
-    ritz=ritz-2.*aaa*sinth*sinth;
-    
-    assign_add_mul_r_add_mul(g_spinor_field[k],g_spinor_field[k], g_spinor_field[DUM_SOLVER +1], costh-1., sinth/normp, VOLUME/2);
-    assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2],
-			     costh-1., sinth/normp, VOLUME/2);
-    
-    /*   compute g */
-    zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
-    assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k],
-			     1., -ritz, VOLUME/2);
-    
-    /*   calculate the norm of g' and beta_cg=costh g'^2/g^2 */
-    normg=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
-    beta_cg=costh*normg/normg0;
-    if(beta_cg*costh*normp>20.*sqrt(normg))  beta_cg=0.;
-    normg0=normg;    
-    /*   compute the new value of p */
-    assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2), VOLUME/2, 1);
-    assign_mul_add_r(g_spinor_field[DUM_SOLVER+1],beta_cg, g_spinor_field[DUM_SOLVER+2], VOLUME/2);
-    if(iteration%20==0) {
-      /* readjust x */
-      xxx=sqrt(square_norm(g_spinor_field[k], VOLUME/2), 1);
-      assign_mul_bra_add_mul_r( g_spinor_field[k], 1./xxx,0., g_spinor_field[k], VOLUME/2);
-      Q_psi(DUM_SOLVER,k,q_off);
-      Q_psi(DUM_SOLVER,DUM_SOLVER,q_off);
-      /*compute the ritz functional */
-      ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1);
-      /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/
-      zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
-      assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k], 
-			       1., -ritz, VOLUME/2);
-      normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
-      /*subtract a linear combination of x and g from p to
-	insure (x,p)=0 and (p,g)=(g,g) */
-      cosd=scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
-      assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -cosd, VOLUME/2);
-      cosd=scalar_prod_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1)-normg0;
-      assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], -cosd/sqrt(normg0), VOLUME/2);
-    }
-  }
-  *rz=ritz;
-  return iteration;
-}
-
-/*lambda: largest eigenvalue, k eigenvector */
-int evamax(double *rz, int k, double q_off, double eps_sq) {
-  static double ritz,norm0,normg,normg0,beta_cg;
-  static double costh,sinth,cosd,sind,aaa,normp,xxx;
-  static double xs1,xs2,xs3;
-  int iteration;
-  /* Initialize k to be gaussian */
-  random_spinor_field(g_spinor_field[k], VOLUME/2);
-  norm0=square_norm(g_spinor_field[k], VOLUME/2, 1); 
-  /*normalize k */
-  assign_mul_bra_add_mul_r( g_spinor_field[k], 1./sqrt(norm0),0., g_spinor_field[k], VOLUME/2);
-  Q_psi(DUM_SOLVER,k,q_off);
-  Q_psi(DUM_SOLVER,DUM_SOLVER,q_off);
-  /*compute the ritz functional */
-  /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/
-  ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1); 
-  zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
-  assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k],
-			   1., -ritz, VOLUME/2);
-  assign(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2);
-  normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
-  
-  /* main loop */
-  for(iteration=1;iteration<=ITER_MAX_BCG;iteration++) {
-    if(normg0 <= eps_sq) break;
-    Q_psi(DUM_SOLVER+2,DUM_SOLVER+1,q_off);
-    Q_psi(DUM_SOLVER+2,DUM_SOLVER+2,q_off);
-    /*   compute costh and sinth */
-    normp=square_norm(g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
-    xxx=scalar_prod_r(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
-    
-    xs1=0.5*(ritz+xxx/normp);
-    xs2=0.5*(ritz-xxx/normp);
-    normp=sqrt(normp);
-    xs3=normg0/normp;
-    aaa=sqrt(xs2*xs2+xs3*xs3);
-    cosd=xs2/aaa;
-    sind=xs3/aaa;
-    
-    if(cosd>=0.) { 
-      costh=sqrt(0.5*(1.+cosd));
-      sinth=0.5*sind/costh;
-    }
-    else {
-      sinth=sqrt(0.5*(1.-cosd));
-      costh=0.5*sind/sinth;
-    } 
-    ritz=xs1+aaa;
-    
-    assign_add_mul_r_add_mul(g_spinor_field[k], g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], 
-			     costh-1., sinth/normp, VOLUME/2);
-    assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+2],
-			     costh-1., sinth/normp, VOLUME/2);
-    
-    /*   compute g */
-    zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
-    assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k], 
-			     1., -ritz, VOLUME/2);
-    
-    /*   calculate the norm of g' and beta_cg=costh g'^2/g^2 */
-    normg=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
-    beta_cg=costh*normg/normg0;
-    if(beta_cg*costh*normp>20.*sqrt(normg))  beta_cg=0.;
-    normg0=normg;    
-    /*   compute the new value of p */
-    assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2), VOLUME/2, 1);
-    assign_mul_add_r(g_spinor_field[DUM_SOLVER+1],beta_cg, g_spinor_field[DUM_SOLVER+2], VOLUME/2);
-    /*   restore the state of the iteration */
-    if(iteration%20==0) {
-      /* readjust x */
-      xxx=sqrt(square_norm(g_spinor_field[k], VOLUME/2), 1);
-      assign_mul_bra_add_mul_r( g_spinor_field[k], 1./xxx,0., g_spinor_field[k], VOLUME/2);
-      Q_psi(DUM_SOLVER,k,q_off);
-      Q_psi(DUM_SOLVER,DUM_SOLVER,q_off);
-      /*compute the ritz functional */
-      ritz=scalar_prod_r(g_spinor_field[DUM_SOLVER], g_spinor_field[k], VOLUME/2, 1);
-      /*put g on DUM_SOLVER+2 and p on DUM_SOLVER+1*/
-      zero_spinor_field(g_spinor_field[DUM_SOLVER+2],VOLUME/2);
-      assign_add_mul_r_add_mul(g_spinor_field[DUM_SOLVER+2], g_spinor_field[DUM_SOLVER], g_spinor_field[k],
-			       1., -ritz, VOLUME/2);
-      normg0=square_norm(g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1);
-      /*subtract a linear combination of x and g from p to 
-	insure (x,p)=0 and (p,g)=(g,g) */
-      cosd=scalar_prod_r(g_spinor_field[k], g_spinor_field[DUM_SOLVER+1], VOLUME/2, 1);
-      assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[k], -cosd, VOLUME/2);
-      cosd=scalar_prod_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], VOLUME/2, 1)-normg0;
-      assign_add_mul_r(g_spinor_field[DUM_SOLVER+1], g_spinor_field[DUM_SOLVER+2], -cosd/sqrt(normg0), VOLUME/2);
-    }
-  }
-  *rz=ritz;
-  return iteration;
-}
-
-/*lambda: smallest eigenvalue, k eigenvector */
-int evamax0(double *rz, int k, double q_off, double eps_sq) {
-
-  static double norm,norm0;
-  int j;
-  random_spinor_field(g_spinor_field[k], VOLUME/2);
-  norm0=square_norm(g_spinor_field[k], VOLUME/2, 1); 
-  norm=1000.;
-  assign_mul_bra_add_mul_r( g_spinor_field[k], 1./sqrt(norm0),0., g_spinor_field[k], VOLUME/2);
-  for(j=1;j<ITER_MAX_BCG;j++)
-    {
-      Q_psi(k,k,q_off);  Q_psi(k,k,q_off);
-      norm0=square_norm(g_spinor_field[k], VOLUME/2, 1);
-      norm0=sqrt(norm0);
-      assign_mul_bra_add_mul_r( g_spinor_field[k], 1./norm0,0., g_spinor_field[k], VOLUME/2);
-      if((norm-norm0)*(norm-norm0) <= eps_sq) break;
-      norm=norm0;
-    }
-  *rz=norm0;
-  return j;
-}
-
-/* this is actually the not the bicg but the geometric series 
-   The use of the geometric series avoids  in contrast to the bicg
-   reversibility problems when a reduced accuracy of the solver employed
-
-   !!! This is not tested in the current env. and should not be used !!!
-*/
-
-int bicg(spinor * const k, spinor * const l, double eps_sq) {
-  int iteration;
-  double xxx;
-  xxx=0.0;
-  gamma5(g_spinor_field[DUM_SOLVER+1], l, VOLUME/2);
-  /* main loop */
-  for(iteration=1;iteration<=ITER_MAX_BCG;iteration++) {
-    /* compute the residual*/
-    M_psi(DUM_SOLVER,k,q_off);
-    xxx=diff_and_square_norm(g_spinor_field[DUM_SOLVER], g_spinor_field[DUM_SOLVER+1], VOLUME/2);
-    /*apply the solver step for the residual*/
-    M_psi(DUM_SOLVER+2,DUM_SOLVER,q_off-(2.+2.*q_off));
-    assign_add_mul_r(k,-1./((1.+q_off)*(1.+q_off)),g_spinor_field[DUM_SOLVER+2], VOLUME/2);
-    if(xxx <= eps_sq) break;
-  }
-
-  if(g_proc_id==0) {
-    sout = fopen(solvout, "a");
-    fprintf(sout, "%d %e %f\n",iteration,xxx, g_mu);
-    fclose(sout);
-  }
-
-  /* if the geometric series fails, redo with conjugate gradient */
-  if(iteration>=ITER_MAX_BCG) {
-    if(ITER_MAX_BCG == 0) {
-      iteration = 0;
-    }
-    zero_spinor_field(k,VOLUME/2);
-    iteration += solve_cg(k,l,q_off,eps_sq);
-    Q_psi(k,k,q_off);
-    if(ITER_MAX_BCG != 0) {
-      iteration -= 1000000;
-    }
-    if(g_proc_id == 0) {
-      sout = fopen(solvout, "a");
-      fprintf(sout, "%d %e\n",iteration, g_mu);
-      fclose(sout);
-    }
-  }
-  
-  return iteration;
-}
-#endif
diff --git a/linsolve.h b/linsolve.h
deleted file mode 100644
index 836dbc546..000000000
--- a/linsolve.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2001 Carsten Urbach
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- * 
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-#ifndef _LINSOLVE_H
-#define _LINSOLVE_H
-
-int solve_cg(spinor * const k, spinor * const l, double eps_sq, const int rel_prec);
-int bicg(spinor * const k, spinor * const l, double eps_sq, const int rel_prec);
-
-#endif
diff --git a/monomial.c b/monomial.c
index 2aba1245d..ecf6fae89 100644
--- a/monomial.c
+++ b/monomial.c
@@ -177,6 +177,9 @@ int init_monomials(const int V, const int even_odd_flag) {
 	monomial_list[i].hbfunction = &det_heatbath;
 	monomial_list[i].accfunction = &det_acc;
 	monomial_list[i].derivativefunction = &det_derivative;
+	monomial_list[i].Qsq = &Qtm_pm_psi;
+	monomial_list[i].Qp = &Qtm_plus_psi;
+	monomial_list[i].Qm = &Qtm_minus_psi;
 	if(g_proc_id == 0 && g_debug_level > 1) {
 	  printf("# Initialised monomial of type DET, no_monomials= %d\n", no_monomials);
 	}
@@ -216,6 +219,9 @@ int init_monomials(const int V, const int even_odd_flag) {
 	monomial_list[i].hbfunction = &detratio_heatbath;
 	monomial_list[i].accfunction = &detratio_acc;
 	monomial_list[i].derivativefunction = &detratio_derivative;
+	monomial_list[i].Qsq = &Qtm_pm_psi;
+	monomial_list[i].Qp = &Qtm_plus_psi;
+	monomial_list[i].Qm = &Qtm_minus_psi;
 	if(g_proc_id == 0 && g_debug_level > 1) {
 	  printf("# Initialised monomial of type DETRATIO, no_monomials= %d\n", no_monomials);
 	}
diff --git a/nddetratio_monomial.c b/nddetratio_monomial.c
index 6ba80a1a5..59230840a 100644
--- a/nddetratio_monomial.c
+++ b/nddetratio_monomial.c
@@ -30,7 +30,6 @@
 #include "su3adj.h"
 #include "linalg_eo.h"
 #include "start.h"
-#include "linsolve.h"
 #include "solver/solver.h"
 #include "deriv_Sb.h"
 #include "tm_operators.h"
diff --git a/ndpoly_monomial.c b/ndpoly_monomial.c
index c500e4457..599fe4848 100644
--- a/ndpoly_monomial.c
+++ b/ndpoly_monomial.c
@@ -27,10 +27,8 @@
 #include <time.h>
 #include "global.h"
 #include "su3.h"
-#include "su3adj.h"
 #include "linalg_eo.h"
 #include "start.h"
-#include "linsolve.h"
 #include "solver/solver.h"
 #include "deriv_Sb.h"
 #include "tm_operators.h"
@@ -162,7 +160,6 @@ void ndpoly_derivative(const int id, hamiltonian_field_t * const hf) {
 
 void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   int j;
-  double temp;
   monomial * mnl = &monomial_list[id];
 
   ndpoly_set_global_parameter(mnl, phmc_exact_poly);
diff --git a/poly_monomial.c b/poly_monomial.c
index 4b151fceb..5f002c8ac 100644
--- a/poly_monomial.c
+++ b/poly_monomial.c
@@ -42,7 +42,6 @@
 #include "linalg/diff.h"
 #include "linalg_eo.h"
 #include "deriv_Sb.h"
-#include "linsolve.h"
 #include "tm_operators.h"
 #include "solver/solver.h"
 #include "solver/chrono_guess.h"
@@ -307,17 +306,13 @@ void poly_heatbath(const int id, hamiltonian_field_t * const hf){
       g_mu = mnl->mu2;
       boundary(mnl->kappa2);
       zero_spinor_field(mnl->pf,VOLUME/2);
-      if(mnl->solver == CG) ITER_MAX_BCG = 0;
-      ITER_MAX_CG = mnl->maxiter;
-      mnl->iter0 += bicg(mnl->pf, mnl->w_fields[0], mnl->accprec, g_relative_precision_flag);
-      
-      chrono_add_solution(mnl->pf, mnl->csg_field, mnl->csg_index_array,
+      mnl->iter0 = cg_her(mnl->w_fields[1], mnl->w_fields[0], mnl->maxiter, mnl->accprec, g_relative_precision_flag,
+			  VOLUME/2, &Qtm_pm_psi);
+      Qtm_minus_psi(mnl->pf, mnl->w_fields[1]);
+
+      chrono_add_solution(mnl->w_fields[1], mnl->csg_field, mnl->csg_index_array,
 			  mnl->csg_N, &mnl->csg_n, VOLUME/2);
       
-      if(mnl->solver != CG) {
-	chrono_add_solution(mnl->pf, mnl->csg_field2, mnl->csg_index_array2,
-			    mnl->csg_N2, &mnl->csg_n2, VOLUME/2);
-      }
     } else {
       /* store constructed phi field */
       assign(mnl->pf, mnl->w_fields[0], VOLUME/2);
diff --git a/reweighting_factor_nd.c b/reweighting_factor_nd.c
index e30fdd380..f28a08f5c 100644
--- a/reweighting_factor_nd.c
+++ b/reweighting_factor_nd.c
@@ -24,7 +24,6 @@
 #include <stdio.h>
 #include <math.h>
 #include "global.h"
-#include "linsolve.h"
 #include "linalg_eo.h"
 #include "start.h"
 #include "tm_operators.h"
diff --git a/tm_operators_nd.c b/tm_operators_nd.c
index ad131d06c..90b804d35 100644
--- a/tm_operators_nd.c
+++ b/tm_operators_nd.c
@@ -36,7 +36,6 @@
 #include "Hopping_Matrix.h"
 #include "phmc.h"
 #include "gamma.h"
-#include "linsolve.h"
 #include "linalg_eo.h"
 #include "tm_operators.h"
 #include "clovertm_operators.h"

From ad9a49426a92558898448f76253dfdd884c4731d Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Thu, 18 Oct 2012 13:14:34 +0200
Subject: [PATCH 063/110] adjusted NDPOLY sample input file for all the changes

---
 sample-input/sample-hmc2.input | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sample-input/sample-hmc2.input b/sample-input/sample-hmc2.input
index 9fdda4d0c..ae97596e7 100644
--- a/sample-input/sample-hmc2.input
+++ b/sample-input/sample-hmc2.input
@@ -56,9 +56,13 @@ BeginMonomial NDPOLY
   Timescale = 1
   StildeMin = 0.013577
   StildeMax = 3.096935
+  LocNormConst = 3.3394134092406311254
   PrecisionPtilde = 1e-05
   DegreeOfMDPolynomial = 48
   PrecisionHfinal = 1e-10
+  2Kappamubar = 0.1105
+  2Kappaepsbar = 0.0935
+  kappa = 0.170
   ComputeEVFreq = 2
 EndMonomial
 

From 2ece7d6520f80c2f1b43395a432093e26c36eb0d Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Thu, 18 Oct 2012 22:25:05 +0200
Subject: [PATCH 064/110] removed some leftover SF variables

---
 geometry_eo.c                  |  58 ------------------
 global.h                       |  16 -----
 io/utils_write_first_message.c |   8 ---
 monomial.c                     |   6 --
 monomial.h                     |   4 --
 read_input.l                   | 105 +--------------------------------
 6 files changed, 2 insertions(+), 195 deletions(-)

diff --git a/geometry_eo.c b/geometry_eo.c
index c81048244..68cf6eb2e 100644
--- a/geometry_eo.c
+++ b/geometry_eo.c
@@ -772,64 +772,6 @@ void geometry(){
   startvaluez = 1;
 #endif
 
-  /*** Coordinates for SFBC ***/
-
-  /* it finds which are the values of t,x,y,z (x0,x1,x2,x3) for each lattice site x (ix) */
-  /* the result is written in the global variables g_t[ix], g_x[ix], g_y[ix], g_z[ix], respectively */
-
-  g_t=(int*)malloc(sizeof(int)*VOLUME);
-
-  if(g_t==NULL) {
-    fprintf(stderr,"allocation of memory failed");
-    exit(-1);
-  }
-
-  g_x=(int*)malloc(sizeof(int)*VOLUME);
-
-  if(g_x==NULL) {
-    fprintf(stderr,"allocation of memory failed");
-    exit(-1);
-  }
-
-  g_y=(int*)malloc(sizeof(int)*VOLUME);
-
-  if(g_y==NULL) {
-    fprintf(stderr,"allocation of memory failed");
-    exit(-1);
-  }
-
-  g_z=(int*)malloc(sizeof(int)*VOLUME);
-
-  if(g_z==NULL) {
-    fprintf(stderr,"allocation of memory failed");
-    exit(-1);
-  }
-
-  
-  for (x0 = 0 ; x0 < T ; x0++) {
-    
-    for (x1 = 0 ; x1 < LX ; x1++) {
-      
-      for (x2 = 0 ; x2 < LY ; x2++) {
-	
-	for (x3 = 0 ; x3 < LZ ; x3++) {
-	  
-
-	  ix = Index(x0, x1, x2, x3);
-
-	  g_t[ix] = x0;
-	  g_x[ix] = x1;
-	  g_y[ix] = x2;
-	  g_z[ix] = x3;
-	
-  
-	}
-      }
-    }
-  }
-  
-  /*** END OF coordinates SFBC ***/
-
   /* extended for boundary slices */
   for (x0 = -startvaluet; x0 < (T+startvaluet); x0++){
     for (x1 = -startvaluex; x1 < (LX+startvaluex); x1++){
diff --git a/global.h b/global.h
index 1b94ca869..5455a0bcc 100644
--- a/global.h
+++ b/global.h
@@ -193,22 +193,6 @@ EXTERN double g_kappa, g_c_sw, g_ka_csw_8, g_beta;
 EXTERN double g_mu, g_mu1, g_mu2, g_mu3;
 EXTERN double g_rgi_C0, g_rgi_C1;
 
-/*************************/
-/* SF definitions */
-EXTERN double g_eta;                          /* background field parameter */
-EXTERN double g_Ct, g_Cs;                     /* plaquette part */
-EXTERN double g_C1ss, g_C1tss, g_C1tts;       /* rectangle part */
-EXTERN int g_Tbsf;                            /* it sets at which time slice I want to put the SF b.c. (end point)
-                                                 T = lattice time extent */
-/* variables specifying the value of t,x,y,z for each lattice site ix */
-EXTERN int* g_t;
-EXTERN int* g_x;
-EXTERN int* g_y;
-EXTERN int* g_z;
-EXTERN int g_sf_inc_wrap_sq;
-/* end of SF definitions */
-/*************************/
-
 /* Parameters for non-degenrate case */
 EXTERN double g_mubar, g_epsbar;
 EXTERN int g_use_clover_flag;
diff --git a/io/utils_write_first_message.c b/io/utils_write_first_message.c
index 1bc297ade..9b4c7ae78 100644
--- a/io/utils_write_first_message.c
+++ b/io/utils_write_first_message.c
@@ -139,10 +139,6 @@ int write_first_messages(FILE * parameterfile, const int inv) {
   if(inv != 1) {
     printf("# mu = %f\n", g_mu/2./g_kappa);
     printf("# g_rgi_C0 = %f, g_rgi_C1 = %f\n", g_rgi_C0, g_rgi_C1);
-/*     printf("# SFBC parameters (gauge):\n"); */
-/*     printf("# g_Ct = %f, g_Cs = %f\n", g_Ct, g_Cs); */
-/*     printf("# g_C1ss = %f, g_C1tss = %f, g_C1tts = %f\n", g_C1ss, g_C1tss, g_C1tts); */
-/*     printf("# g_eta = %f\n", g_eta); */
     printf("# Using %s precision for the inversions!\n", 
 	   g_relative_precision_flag ? "relative" : "absolute");
   }
@@ -157,10 +153,6 @@ int write_first_messages(FILE * parameterfile, const int inv) {
 	    Nmeas,Nsave);
     fprintf(parameterfile, "# mu = %f\n", g_mu/2./g_kappa);
     fprintf(parameterfile, "# g_rgi_C0 = %f, g_rgi_C1 = %f\n", g_rgi_C0, g_rgi_C1);
-    fprintf(parameterfile, "# SFBC parameters (gauge):\n");
-    fprintf(parameterfile, "# g_Ct = %f, g_Cs = %f\n", g_Ct, g_Cs);
-    fprintf(parameterfile, "# g_C1ss = %f, g_C1tss = %f, g_C1tts = %f\n", g_C1ss, g_C1tss, g_C1tts);
-    fprintf(parameterfile, "# g_eta = %f\n", g_eta);
     fprintf(parameterfile, "# Using %s precision for the inversions!\n", 
 	    g_relative_precision_flag ? "relative" : "absolute");
   }
diff --git a/monomial.c b/monomial.c
index ecf6fae89..ff9e4bae7 100644
--- a/monomial.c
+++ b/monomial.c
@@ -97,12 +97,6 @@ int add_monomial(const int type) {
   monomial_list[no_monomials].c1 = _default_g_rgi_C1;
   monomial_list[no_monomials].c0 = 1.;
   monomial_list[no_monomials].beta = _default_g_beta;
-  monomial_list[no_monomials].eta = _default_g_eta;
-  monomial_list[no_monomials].ct = _default_g_Ct; 
-  monomial_list[no_monomials].cs = _default_g_Cs;
-  monomial_list[no_monomials].c1ss = _default_g_C1ss; 
-  monomial_list[no_monomials].c1tss = _default_g_C1tss; 
-  monomial_list[no_monomials].c1tts = _default_g_C1tts; 
   monomial_list[no_monomials].rngrepro = _default_reproduce_randomnumber_flag;
   /* poly monomial */
   monomial_list[no_monomials].rec_ev = _default_g_rec_ev;
diff --git a/monomial.h b/monomial.h
index 78b9d1f66..48e739c0d 100644
--- a/monomial.h
+++ b/monomial.h
@@ -74,10 +74,6 @@ typedef struct {
   double accprec;
   /* force normalisation */
   double forcefactor;
-  /* sf */          
-  double eta;             
-  double ct, cs; 
-  double c1ss, c1tss, c1tts;
   /* some book-keeping */
   char name[100];
   /* pseudo fermion field */
diff --git a/read_input.l b/read_input.l
index dc460847b..f902ca412 100644
--- a/read_input.l
+++ b/read_input.l
@@ -205,7 +205,6 @@ inline void rmQuotes(char *str){
 %x LLX
 %x LLY
 %x LLZ
-%x TTBSF
 %x NPROCX
 %x NPROCY
 %x NPROCZ
@@ -278,7 +277,6 @@ inline void rmQuotes(char *str){
 %x CLDETMONOMIAL
 %x CLDETRATMONOMIAL
 %x GAUGEMONOMIAL
-%x SFGAUGEMONOMIAL
 %x NDPOLYMONOMIAL
 %x POLYMONOMIAL
 %x CLPOLYMONOMIAL
@@ -313,7 +311,6 @@ inline void rmQuotes(char *str){
 ^LX{EQL}                           BEGIN(LLX);
 ^LY{EQL}                           BEGIN(LLY);
 ^LZ{EQL}                           BEGIN(LLZ);
-^g_Tbsf{EQL}                       BEGIN(TTBSF);
 ^NRXProcs{EQL}                     BEGIN(NPROCX);
 ^NRYProcs{EQL}                     BEGIN(NPROCY);
 ^NRZProcs{EQL}                     BEGIN(NPROCZ);
@@ -839,11 +836,6 @@ inline void rmQuotes(char *str){
     mnl->gtype = 3;
     strcpy((*mnl).name, "GAUGE");
   }
-  else if(strcmp(yytext, "SFGAUGE")==0) {
-    mnl->type = SFGAUGE;
-    mnl->gtype = 6;
-    strcpy((*mnl).name, "SFGAUGE");
-  }
   else {
     fprintf(stderr, "Unknown monomial type %s in line %d\n", yytext, line_of_file);
     exit(1);
@@ -858,7 +850,6 @@ inline void rmQuotes(char *str){
   if(myverbose) printf("monomial has id %d\n", current_monomial);
 
   if(mnl->type == GAUGE) BEGIN(GAUGEMONOMIAL);
-  else if(mnl->type == SFGAUGE) BEGIN(SFGAUGEMONOMIAL);
   else if(mnl->type == NDPOLY) BEGIN(NDPOLYMONOMIAL);
   else if(mnl->type == NDCLOVER) BEGIN(CLPOLYMONOMIAL);
   else if(mnl->type == POLY || mnl->type == POLYDETRATIO)  {
@@ -872,7 +863,7 @@ inline void rmQuotes(char *str){
 
 
 
-<DETMONOMIAL,GAUGEMONOMIAL,SFGAUGEMONOMIAL,NDPOLYMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLPOLYMONOMIAL>{
+<DETMONOMIAL,GAUGEMONOMIAL,NDPOLYMONOMIAL,POLYMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,CLPOLYMONOMIAL>{
   {SPC}*Timescale{EQL}{DIGIT}+ {
     if(mnl->type == NDDETRATIO) {
       mnl->timescale = -5;
@@ -1026,81 +1017,8 @@ inline void rmQuotes(char *str){
     g_rgi_C1 = c;
     if(myverbose) printf("  RectangleCoefficient c1  set to %e line %d monomial %d\n", c, line_of_file, current_monomial);
   }
-  {SPC}*IncludeWrappedSquares{EQL}yes {
-    g_sf_inc_wrap_sq = 1;
-    if(myverbose) printf("  IncludeWrappedSquares set to true line %d monomial %d\n", line_of_file, current_monomial);
-  }
-  {SPC}*IncludeWrappedSquares{EQL}no {
-    g_sf_inc_wrap_sq = 0;
-    if(myverbose) printf("  IncludeWrappedSquares set to false line %d monomial %d\n", line_of_file, current_monomial);
-  }
 }
 
-<SFGAUGEMONOMIAL>{
-  {SPC}*Type{EQL} BEGIN(GTYPE);
-  {SPC}*UseRectangleStaples{EQL}yes {
-    mnl->use_rectangles = 1;
-    g_dbw2rand = 1;
-    if(myverbose) printf("  UseRectangleStaples set to true line %d monomial %d\n", line_of_file, current_monomial);
-  }
-  {SPC}*UseRectangleStaples{EQL}no {
-    mnl->use_rectangles = 0;
-    /* g_dbw2rand = 0; */
-    mnl->c1 = 0.;
-    g_rgi_C1 = 0.;
-    if(myverbose) printf("  UseRectangleStaples set to false line %d monomial %d\n", line_of_file, current_monomial);
-  }
-  {SPC}*Beta{EQL}{FLT} {
-    sscanf(yytext, " %[a-zA-Z] = %lf",name , &c);
-    mnl->beta = c;
-    g_beta = c;
-    if(myverbose) printf("  beta set to %e line %d monomial %d\n", c, line_of_file, current_monomial);
-  }
-  {SPC}*RectangleCoefficient{EQL}{FLT} {
-    sscanf(yytext, " %[a-zA-Z] = %lf",name , &c);
-    mnl->c1 = c;
-    g_rgi_C1 = c;
-    if(myverbose) printf("  RectangleCoefficient c1  set to %e line %d monomial %d\n", c, line_of_file, current_monomial);
-  }
-  {SPC}*Eta{EQL}{FLT} {
-    sscanf(yytext, " %[a-zA-Z] = %lf",name , &c);
-    mnl->eta = c;
-    g_eta = c;
-    if(myverbose) printf("  eta set to %e line %d monomial %d\n", c, line_of_file, current_monomial);
-  }
-  {SPC}*PlaquetteCoefficientT{EQL}{FLT} {
-    sscanf(yytext, " %[a-zA-Z] = %lf",name , &c);
-    mnl->ct = c;
-    g_Ct = c;
-    if(myverbose) printf("  PlaquetteCoefficientT ct set to %e line %d monomial %d\n", c, line_of_file, current_monomial);
-  }
-  {SPC}*PlaquetteCoefficientS{EQL}{FLT} {
-    sscanf(yytext, " %[a-zA-Z] = %lf",name , &c);
-    mnl->cs = c;
-    g_Cs = c;
-    if(myverbose) printf("  PlaquetteCoefficientS cs set to %e line %d monomial %d\n", c, line_of_file, current_monomial);
-  }
-  {SPC}*RectangleCoefficientSS{EQL}{FLT} {
-    sscanf(yytext, " %[a-zA-Z] = %lf",name , &c);
-    mnl->c1ss = c;
-    g_C1ss = c;
-    if(myverbose) printf("  RectangleCoefficientSS c1ss set to %e line %d monomial %d\n", c, line_of_file, current_monomial);
-  }
-  {SPC}*RectangleCoefficientTSS{EQL}{FLT} {
-    sscanf(yytext, " %[a-zA-Z] = %lf",name , &c);
-    mnl->c1tss = c;
-    g_C1tss = c;
-    if(myverbose) printf("  RectangleCoefficientTSS c1tss set to %e line %d monomial %d\n", c, line_of_file, current_monomial);
-  }
-  {SPC}*RectangleCoefficientTTS{EQL}{FLT} {
-    sscanf(yytext, " %[a-zA-Z] = %lf",name , &c);
-    mnl->c1tts = c;
-    g_C1tts = c;
-    if(myverbose) printf("  RectangleCoefficientTTS c1tts set to %e line %d monomial %d\n", c, line_of_file, current_monomial);
-  }
-}
-
-
 <NDPOLYMONOMIAL>{
   {SPC}*ExactPolynomial{EQL}yes {
     phmc_exact_poly = 1;
@@ -1266,19 +1184,6 @@ inline void rmQuotes(char *str){
     mnl->use_rectangles = 1;
     BEGIN(GAUGEMONOMIAL);
   }
-  sf_Wilson {
-    mnl->gtype = 5;
-    mnl->c1 = 0.;
-    mnl->use_rectangles = 0;
-    g_rgi_C1 = 0.;
-    /* g_dbw2rand = 0; */
-    BEGIN(SFGAUGEMONOMIAL);
-  }
-  sf_user {
-    mnl->gtype = 6;
-    BEGIN(SFGAUGEMONOMIAL);
-  }
-
 }
 
 <INITINTEGRATOR>egrator{SPC}* {
@@ -1478,10 +1383,6 @@ inline void rmQuotes(char *str){
   if(myverbose!=0) printf("LZ =%s\n", yytext);
 #endif
 }
-<TTBSF>{DIGIT}+                  {
-  g_Tbsf = atoi(yytext);
-  if(myverbose!=0) printf("g_Tbsf =%s\n", yytext);
-}
 <NPROCX>{DIGIT}+              {
 #ifndef FIXEDVOLUME
   N_PROC_X = atoi(yytext);
@@ -1951,7 +1852,7 @@ inline void rmQuotes(char *str){
 }
 
 
-<INITMONOMIAL,DETMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,NDPOLYMONOMIAL,CLPOLYMONOMIAL,GAUGEMONOMIAL,SFGAUGEMONOMIAL,INTEGRATOR,INITINTEGRATOR,INITMEASUREMENT,PIONNORMMEAS,ONLINEMEAS,INITOPERATOR,TMOP,DBTMOP,OVERLAPOP,WILSONOP,CLOVEROP,DBCLOVEROP,POLYMONOMIAL,PLOOP,INITGPU,GPU>\n   {
+<INITMONOMIAL,DETMONOMIAL,CLDETMONOMIAL,CLDETRATMONOMIAL,NDPOLYMONOMIAL,CLPOLYMONOMIAL,GAUGEMONOMIAL,INTEGRATOR,INITINTEGRATOR,INITMEASUREMENT,PIONNORMMEAS,ONLINEMEAS,INITOPERATOR,TMOP,DBTMOP,OVERLAPOP,WILSONOP,CLOVEROP,DBCLOVEROP,POLYMONOMIAL,PLOOP,INITGPU,GPU>\n   {
   line_of_file++;
 }
 <*>\n                       {
@@ -2004,7 +1905,6 @@ int read_input(char * conf_file){
   LX = _default_LX;
   LY = _default_LY;
   LZ = _default_LZ;
-  g_Tbsf = _default_g_Tbsf;
   N_PROC_X = _default_N_PROC_X;
   N_PROC_Y = _default_N_PROC_Y;
   N_PROC_Z = _default_N_PROC_Z;
@@ -2033,7 +1933,6 @@ int read_input(char * conf_file){
   g_dbw2rand = 0;
   g_running_phmc = 0;
   g_beta = _default_g_beta;
-  g_eta = _default_g_eta;
   g_N_s = _default_g_N_s;
   g_dflgcr_flag = _default_g_dflgcr_flag;
   random_seed = _default_random_seed;

From 53654a549307ff3859541f37411281ee18657737 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Thu, 18 Oct 2012 22:55:57 +0200
Subject: [PATCH 065/110] removed some leftover SF variables

---
 geometry_eo.c           | 17 -----------------
 init_geometry_indices.c |  3 ---
 read_input.l            |  5 -----
 3 files changed, 25 deletions(-)

diff --git a/geometry_eo.c b/geometry_eo.c
index 68cf6eb2e..6a3297e61 100644
--- a/geometry_eo.c
+++ b/geometry_eo.c
@@ -289,7 +289,6 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
   y3 = (x3 + LZ) % LZ;
   ix = ((y0*LX + y1)*LY + y2)*LZ + y3;
   
-  y0=x0; /* ?!? */
 #if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
   if(x0 == T) {
     ix = VOLUME + y3 + LZ*y2 + LZ*LY*y1;
@@ -1459,22 +1458,6 @@ void geometry(){
 
   Hopping_Matrix_Indices();
 
-  /* This establishes the time-coordinate values. */
-  /* This should only be used for the SFBC because */
-  /* it may eventually vanish. */
-  /* This should be merged into the above. */
-  /* FIX this later, but for now */
-  /* I'm assuming a scalar machine. */
-  for( x0 = 0; x0 <  T; x0++ )
-  for( x1 = 0; x1 < LX; x1++ )
-  for( x2 = 0; x2 < LY; x2++ )
-  for( x3 = 0; x3 < LZ; x3++ )
-  {
-    ix = Index( x0, x1, x2, x3 );
-    assert( ix == g_ipt[ x0 ][ x1 ][ x2 ][ x3 ] );
-    g_t[ix] = x0;
-  }
-
   free(xeven);
 }
 
diff --git a/init_geometry_indices.c b/init_geometry_indices.c
index d673f0ef8..0edc6437c 100644
--- a/init_geometry_indices.c
+++ b/init_geometry_indices.c
@@ -128,8 +128,6 @@ int init_geometry_indices(const int V) {
   /* This should only be used for the SFBC. */
   /* This should not be used for anything other than the SFBC */
   /* because it might eventually vanish. */
-  g_t = (int*)calloc(V, sizeof(int));
-  if((void*)g_t == NULL) return(20);
 
   g_idn[0] = idn;
   g_iup[0] = iup;
@@ -184,5 +182,4 @@ void free_geometry_indices() {
   free(g_field_z_ipt_odd);
   free(g_field_z_ipt_even);
 #endif
-  free(g_t);
 }
diff --git a/read_input.l b/read_input.l
index f902ca412..16b9d299f 100644
--- a/read_input.l
+++ b/read_input.l
@@ -1956,11 +1956,6 @@ int read_input(char * conf_file){
   X2 = _default_X2;
   X3 = _default_X3;
   g_rgi_C1 = _default_g_rgi_C1;
-  g_Ct = _default_g_Ct;
-  g_Cs = _default_g_Cs;
-  g_C1ss = _default_g_C1ss;
-  g_C1tss = _default_g_C1tss;
-  g_C1tts = _default_g_C1tts;
   read_source_flag= _default_read_source_flag;
   if(SourceInfo.basename == NULL) SourceInfo.basename = (char*)malloc(100*sizeof(char));
   strcpy(SourceInfo.basename, _default_source_filename);

From fdf0d7994806835791ffa29add5a3b6728134499 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 19 Oct 2012 09:34:46 +0200
Subject: [PATCH 066/110] moved monomial related files to subdirectory monomial

---
 Makefile.in                                   | 12 ++++------
 configure.in                                  |  4 ++--
 hmc_tm.c                                      |  2 +-
 init_spinor_field.c                           |  2 +-
 integrator.c                                  |  2 +-
 invert.c                                      |  2 +-
 .../clover_trlog_monomial.c                   |  2 +-
 .../clover_trlog_monomial.h                   |  0
 .../cloverdet_monomial.c                      |  2 +-
 .../cloverdet_monomial.h                      |  0
 .../cloverdetratio_monomial.c                 |  2 +-
 .../cloverdetratio_monomial.h                 |  0
 .../clovernd_trlog_monomial.c                 |  2 +-
 .../clovernd_trlog_monomial.h                 |  0
 .../cloverndpoly_monomial.c                   |  2 +-
 .../cloverndpoly_monomial.h                   |  0
 det_monomial.c => monomial/det_monomial.c     |  2 +-
 det_monomial.h => monomial/det_monomial.h     |  0
 .../detratio_monomial.c                       |  2 +-
 .../detratio_monomial.h                       |  0
 gauge_monomial.c => monomial/gauge_monomial.c |  2 +-
 gauge_monomial.h => monomial/gauge_monomial.h |  0
 monomial.c => monomial/monomial.c             |  2 +-
 monomial.h => monomial/monomial.h             | 24 +++++++++----------
 .../nddetratio_monomial.c                     |  2 +-
 .../nddetratio_monomial.h                     |  0
 .../ndpoly_monomial.c                         |  2 +-
 .../ndpoly_monomial.h                         |  0
 poly_monomial.c => monomial/poly_monomial.c   |  2 +-
 poly_monomial.h => monomial/poly_monomial.h   |  0
 .../sf_gauge_monomial.c                       |  2 +-
 .../sf_gauge_monomial.h                       |  0
 phmc.c                                        |  2 +-
 read_input.l                                  |  2 +-
 reweighting_factor.c                          |  2 +-
 solver/dirac_operator_eigenvectors.c          |  2 +-
 update_momenta.c                              |  2 +-
 update_tm.c                                   |  2 +-
 38 files changed, 42 insertions(+), 44 deletions(-)
 rename clover_trlog_monomial.c => monomial/clover_trlog_monomial.c (98%)
 rename clover_trlog_monomial.h => monomial/clover_trlog_monomial.h (100%)
 rename cloverdet_monomial.c => monomial/cloverdet_monomial.c (99%)
 rename cloverdet_monomial.h => monomial/cloverdet_monomial.h (100%)
 rename cloverdetratio_monomial.c => monomial/cloverdetratio_monomial.c (99%)
 rename cloverdetratio_monomial.h => monomial/cloverdetratio_monomial.h (100%)
 rename clovernd_trlog_monomial.c => monomial/clovernd_trlog_monomial.c (98%)
 rename clovernd_trlog_monomial.h => monomial/clovernd_trlog_monomial.h (100%)
 rename cloverndpoly_monomial.c => monomial/cloverndpoly_monomial.c (99%)
 rename cloverndpoly_monomial.h => monomial/cloverndpoly_monomial.h (100%)
 rename det_monomial.c => monomial/det_monomial.c (99%)
 rename det_monomial.h => monomial/det_monomial.h (100%)
 rename detratio_monomial.c => monomial/detratio_monomial.c (99%)
 rename detratio_monomial.h => monomial/detratio_monomial.h (100%)
 rename gauge_monomial.c => monomial/gauge_monomial.c (99%)
 rename gauge_monomial.h => monomial/gauge_monomial.h (100%)
 rename monomial.c => monomial/monomial.c (99%)
 rename monomial.h => monomial/monomial.h (89%)
 rename nddetratio_monomial.c => monomial/nddetratio_monomial.c (98%)
 rename nddetratio_monomial.h => monomial/nddetratio_monomial.h (100%)
 rename ndpoly_monomial.c => monomial/ndpoly_monomial.c (99%)
 rename ndpoly_monomial.h => monomial/ndpoly_monomial.h (100%)
 rename poly_monomial.c => monomial/poly_monomial.c (99%)
 rename poly_monomial.h => monomial/poly_monomial.h (100%)
 rename sf_gauge_monomial.c => monomial/sf_gauge_monomial.c (99%)
 rename sf_gauge_monomial.h => monomial/sf_gauge_monomial.h (100%)

diff --git a/Makefile.in b/Makefile.in
index bb6902b56..fdcb48cdf 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -56,18 +56,16 @@ MODULES = read_input gamma hybrid_update measure_gauge_action start \
 	xchange_field xchange_gauge prepare_source \
 	init_gauge_field init_geometry_indices init_spinor_field \
 	init_dirac_halfspinor xchange_halffield \
-	tm_operators_nd nddetratio_monomial \
+	tm_operators_nd \
 	chebyshev_polynomial_nd Ptilde_nd  \
 	init_chi_spinor_field reweighting_factor_nd \
 	init_bispinor_field D_psi \
 	xchange_lexicfield xchange_2fields online_measurement \
-	monomial det_monomial detratio_monomial update_momenta \
-	integrator gauge_monomial ndpoly_monomial phmc \
-	clover_trlog_monomial cloverdet_monomial cloverdetratio_monomial \
-	clovernd_trlog_monomial \
-	little_D block Dov_psi operator poly_monomial measurements pion_norm Dov_proj \
+	 update_momenta \
+	integrator  phmc \
+	little_D block Dov_psi operator measurements pion_norm Dov_proj \
 	xchange_field_tslice temporalgauge spinor_fft X_psi P_M_eta \
-	xchange_jacobi jacobi init_jacobi_field cloverndpoly_monomial \
+	xchange_jacobi jacobi init_jacobi_field \
 	fatal_error invert_clover_eo gettime @SPI_FILES@ init_omp_accumulators
 
 ## the GPU modules (all .cu files in $GPUDIR)
diff --git a/configure.in b/configure.in
index 673682a59..a1ae72278 100644
--- a/configure.in
+++ b/configure.in
@@ -40,7 +40,7 @@ AC_CHECK_PROG(CCDEP, gcc, "gcc", "$CC")
 LDFLAGS="$LDFLAGS -L\${HOME}/lib -L\${top_builddir}/lib"
 CCLD=${CC}
 
-USESUBDIRS="buffers cu io solver linalg"
+USESUBDIRS="buffers cu io solver linalg monomial"
 
 AC_CHECK_HEADERS([stdint.h],
 [ dnl for inttypes.h and stdint.h for uint_xxx types
@@ -897,7 +897,7 @@ if test ! -e tests/regressions; then
 fi
 
 
-LIBS="-lhmc -lsolver -llinalg -lhmc -lio $LIBS"
+LIBS="-lhmc -lmonomial -lsolver -llinalg -lhmc -lio $LIBS"
 AUTOCONF=autoconf
 
 for i in $USESUBDIRS
diff --git a/hmc_tm.c b/hmc_tm.c
index 7c15a9e28..46a35eb48 100644
--- a/hmc_tm.c
+++ b/hmc_tm.c
@@ -75,7 +75,7 @@
 #include "boundary.h"
 #include "phmc.h"
 #include "solver/solver.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "integrator.h"
 #include "sighandler.h"
 #include "measurements.h"
diff --git a/init_spinor_field.c b/init_spinor_field.c
index 628e41e88..a2c8eefcf 100644
--- a/init_spinor_field.c
+++ b/init_spinor_field.c
@@ -29,7 +29,7 @@
 #include "global.h"
 #include "su3.h"
 #include "sse.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 
 spinor * sp = NULL;
 spinor * sp_csg = NULL;
diff --git a/integrator.c b/integrator.c
index 1a2d171a9..13e97a0e2 100644
--- a/integrator.c
+++ b/integrator.c
@@ -26,7 +26,7 @@
 #include <math.h>
 #include <time.h>
 #include "global.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "update_momenta.h"
 #include "update_gauge.h"
 #include "hamiltonian_field.h"
diff --git a/invert.c b/invert.c
index bb3d293d2..28dde66d2 100644
--- a/invert.c
+++ b/invert.c
@@ -70,7 +70,7 @@
 #include "xchange_halffield.h"
 #include "smearing/stout.h"
 #include "invert_eo.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "ranlxd.h"
 #include "phmc.h"
 #include "D_psi.h"
diff --git a/clover_trlog_monomial.c b/monomial/clover_trlog_monomial.c
similarity index 98%
rename from clover_trlog_monomial.c
rename to monomial/clover_trlog_monomial.c
index 93f7cc891..7215869e4 100644
--- a/clover_trlog_monomial.c
+++ b/monomial/clover_trlog_monomial.c
@@ -31,7 +31,7 @@
 #include "su3spinor.h"
 #include "clovertm_operators.h"
 #include "clover_leaf.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "Hopping_Matrix.h"
 #include "clover_trlog_monomial.h"
 
diff --git a/clover_trlog_monomial.h b/monomial/clover_trlog_monomial.h
similarity index 100%
rename from clover_trlog_monomial.h
rename to monomial/clover_trlog_monomial.h
diff --git a/cloverdet_monomial.c b/monomial/cloverdet_monomial.c
similarity index 99%
rename from cloverdet_monomial.c
rename to monomial/cloverdet_monomial.c
index 6a4a54330..7bc86c851 100644
--- a/cloverdet_monomial.c
+++ b/monomial/cloverdet_monomial.c
@@ -42,7 +42,7 @@
 #include "read_input.h"
 #include "hamiltonian_field.h"
 #include "boundary.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "clovertm_operators.h"
 #include "cloverdet_monomial.h"
 
diff --git a/cloverdet_monomial.h b/monomial/cloverdet_monomial.h
similarity index 100%
rename from cloverdet_monomial.h
rename to monomial/cloverdet_monomial.h
diff --git a/cloverdetratio_monomial.c b/monomial/cloverdetratio_monomial.c
similarity index 99%
rename from cloverdetratio_monomial.c
rename to monomial/cloverdetratio_monomial.c
index 073c6d28a..43932630d 100644
--- a/cloverdetratio_monomial.c
+++ b/monomial/cloverdetratio_monomial.c
@@ -39,7 +39,7 @@
 #include "read_input.h"
 #include "clovertm_operators.h"
 #include "clover_leaf.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "boundary.h"
 #include "cloverdetratio_monomial.h"
 
diff --git a/cloverdetratio_monomial.h b/monomial/cloverdetratio_monomial.h
similarity index 100%
rename from cloverdetratio_monomial.h
rename to monomial/cloverdetratio_monomial.h
diff --git a/clovernd_trlog_monomial.c b/monomial/clovernd_trlog_monomial.c
similarity index 98%
rename from clovernd_trlog_monomial.c
rename to monomial/clovernd_trlog_monomial.c
index e1f21f93d..d21ada40a 100644
--- a/clovernd_trlog_monomial.c
+++ b/monomial/clovernd_trlog_monomial.c
@@ -31,7 +31,7 @@
 #include "su3spinor.h"
 #include "clovertm_operators.h"
 #include "clover_leaf.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "Hopping_Matrix.h"
 #include "clovernd_trlog_monomial.h"
 
diff --git a/clovernd_trlog_monomial.h b/monomial/clovernd_trlog_monomial.h
similarity index 100%
rename from clovernd_trlog_monomial.h
rename to monomial/clovernd_trlog_monomial.h
diff --git a/cloverndpoly_monomial.c b/monomial/cloverndpoly_monomial.c
similarity index 99%
rename from cloverndpoly_monomial.c
rename to monomial/cloverndpoly_monomial.c
index fee570d71..5f3d91a59 100644
--- a/cloverndpoly_monomial.c
+++ b/monomial/cloverndpoly_monomial.c
@@ -36,7 +36,7 @@
 #include "Hopping_Matrix.h"
 #include "phmc.h"
 #include "Ptilde_nd.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "hamiltonian_field.h"
 #include "boundary.h"
 #include "clovertm_operators.h"
diff --git a/cloverndpoly_monomial.h b/monomial/cloverndpoly_monomial.h
similarity index 100%
rename from cloverndpoly_monomial.h
rename to monomial/cloverndpoly_monomial.h
diff --git a/det_monomial.c b/monomial/det_monomial.c
similarity index 99%
rename from det_monomial.c
rename to monomial/det_monomial.c
index 7d39e22ae..6fd2938b0 100644
--- a/det_monomial.c
+++ b/monomial/det_monomial.c
@@ -38,7 +38,7 @@
 #include "read_input.h"
 #include "hamiltonian_field.h"
 #include "boundary.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "det_monomial.h"
 
 /* think about chronological solver ! */
diff --git a/det_monomial.h b/monomial/det_monomial.h
similarity index 100%
rename from det_monomial.h
rename to monomial/det_monomial.h
diff --git a/detratio_monomial.c b/monomial/detratio_monomial.c
similarity index 99%
rename from detratio_monomial.c
rename to monomial/detratio_monomial.c
index bb24ac24a..6ff36d92f 100644
--- a/detratio_monomial.c
+++ b/monomial/detratio_monomial.c
@@ -37,7 +37,7 @@
 #include "solver/solver.h"
 #include "read_input.h"
 #include "gamma.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "boundary.h"
 #include "detratio_monomial.h"
 
diff --git a/detratio_monomial.h b/monomial/detratio_monomial.h
similarity index 100%
rename from detratio_monomial.h
rename to monomial/detratio_monomial.h
diff --git a/gauge_monomial.c b/monomial/gauge_monomial.c
similarity index 99%
rename from gauge_monomial.c
rename to monomial/gauge_monomial.c
index 7ead1e05a..e39dbf0b5 100644
--- a/gauge_monomial.c
+++ b/monomial/gauge_monomial.c
@@ -39,7 +39,7 @@
 #include "read_input.h"
 #include "measure_gauge_action.h"
 #include "measure_rectangles.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "hamiltonian_field.h"
 #include "gauge_monomial.h"
 
diff --git a/gauge_monomial.h b/monomial/gauge_monomial.h
similarity index 100%
rename from gauge_monomial.h
rename to monomial/gauge_monomial.h
diff --git a/monomial.c b/monomial/monomial.c
similarity index 99%
rename from monomial.c
rename to monomial/monomial.c
index ecf6fae89..b9ef3750a 100644
--- a/monomial.c
+++ b/monomial/monomial.c
@@ -39,7 +39,7 @@
 #include "linalg_eo.h"
 #include "default_input_values.h"
 #include "read_input.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 
 
 
diff --git a/monomial.h b/monomial/monomial.h
similarity index 89%
rename from monomial.h
rename to monomial/monomial.h
index 78b9d1f66..b31aeb6bb 100644
--- a/monomial.h
+++ b/monomial/monomial.h
@@ -112,18 +112,18 @@ typedef struct {
   void (*Qm) (spinor * const, spinor * const);
 } monomial;
 
-#include "det_monomial.h"
-#include "detratio_monomial.h"
-#include "poly_monomial.h"
-#include "ndpoly_monomial.h"
-#include "nddetratio_monomial.h"
-#include "gauge_monomial.h"
-#include "sf_gauge_monomial.h"
-#include "clover_trlog_monomial.h"
-#include "clovernd_trlog_monomial.h"
-#include "cloverdet_monomial.h"
-#include "cloverdetratio_monomial.h"
-#include "cloverndpoly_monomial.h"
+#include "monomial/det_monomial.h"
+#include "monomial/detratio_monomial.h"
+#include "monomial/poly_monomial.h"
+#include "monomial/ndpoly_monomial.h"
+#include "monomial/nddetratio_monomial.h"
+#include "monomial/gauge_monomial.h"
+#include "monomial/sf_gauge_monomial.h"
+#include "monomial/clover_trlog_monomial.h"
+#include "monomial/clovernd_trlog_monomial.h"
+#include "monomial/cloverdet_monomial.h"
+#include "monomial/cloverdetratio_monomial.h"
+#include "monomial/cloverndpoly_monomial.h"
 
 /* list of all monomials */
 extern monomial monomial_list[max_no_monomials];
diff --git a/nddetratio_monomial.c b/monomial/nddetratio_monomial.c
similarity index 98%
rename from nddetratio_monomial.c
rename to monomial/nddetratio_monomial.c
index 59230840a..5264f3521 100644
--- a/nddetratio_monomial.c
+++ b/monomial/nddetratio_monomial.c
@@ -43,7 +43,7 @@
 #include "chebyshev_polynomial_nd.h"
 #include "Ptilde_nd.h"
 #include "reweighting_factor_nd.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "hamiltonian_field.h"
 #include "nddetratio_monomial.h"
 
diff --git a/nddetratio_monomial.h b/monomial/nddetratio_monomial.h
similarity index 100%
rename from nddetratio_monomial.h
rename to monomial/nddetratio_monomial.h
diff --git a/ndpoly_monomial.c b/monomial/ndpoly_monomial.c
similarity index 99%
rename from ndpoly_monomial.c
rename to monomial/ndpoly_monomial.c
index 599fe4848..84d054742 100644
--- a/ndpoly_monomial.c
+++ b/monomial/ndpoly_monomial.c
@@ -40,7 +40,7 @@
 #include "chebyshev_polynomial_nd.h"
 #include "Ptilde_nd.h"
 #include "reweighting_factor_nd.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "hamiltonian_field.h"
 #include "boundary.h"
 #include "phmc.h"
diff --git a/ndpoly_monomial.h b/monomial/ndpoly_monomial.h
similarity index 100%
rename from ndpoly_monomial.h
rename to monomial/ndpoly_monomial.h
diff --git a/poly_monomial.c b/monomial/poly_monomial.c
similarity index 99%
rename from poly_monomial.c
rename to monomial/poly_monomial.c
index 5f002c8ac..0b3dd5d90 100644
--- a/poly_monomial.c
+++ b/monomial/poly_monomial.c
@@ -33,7 +33,7 @@
 #include "global.h"
 #include "start.h"
 #include "read_input.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "poly_monomial.h"
 #include "boundary.h"
 #include "linalg/square_norm.h"
diff --git a/poly_monomial.h b/monomial/poly_monomial.h
similarity index 100%
rename from poly_monomial.h
rename to monomial/poly_monomial.h
diff --git a/sf_gauge_monomial.c b/monomial/sf_gauge_monomial.c
similarity index 99%
rename from sf_gauge_monomial.c
rename to monomial/sf_gauge_monomial.c
index 0c1a4ab8d..719c59eaf 100644
--- a/sf_gauge_monomial.c
+++ b/monomial/sf_gauge_monomial.c
@@ -36,7 +36,7 @@
 #include "read_input.h"
 #include "measure_gauge_action.h"
 #include "measure_rectangles.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "sf_gauge_monomial.h"
 #include "hamiltonian_field.h"
 #include "sf_utils.h"
diff --git a/sf_gauge_monomial.h b/monomial/sf_gauge_monomial.h
similarity index 100%
rename from sf_gauge_monomial.h
rename to monomial/sf_gauge_monomial.h
diff --git a/phmc.c b/phmc.c
index 103022965..3e5ae9690 100644
--- a/phmc.c
+++ b/phmc.c
@@ -36,7 +36,7 @@
 #include "Ptilde_nd.h"
 #include "tm_operators_nd.h"
 #include "phmc.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "solver/matrix_mult_typedef_bi.h"
 #include "gettime.h"
 
diff --git a/read_input.l b/read_input.l
index dc460847b..369bff91b 100644
--- a/read_input.l
+++ b/read_input.l
@@ -55,7 +55,7 @@ EQL {SPC}*={SPC}*
 #include"global.h"
 #include"read_input.h"
 #include"default_input_values.h"
-#include"monomial.h"
+#include"monomial/monomial.h"
 #include"measurements.h"
 #include"integrator.h"
 #include"operator.h"
diff --git a/reweighting_factor.c b/reweighting_factor.c
index e1e5defa3..cc3738904 100644
--- a/reweighting_factor.c
+++ b/reweighting_factor.c
@@ -27,7 +27,7 @@
 #include "global.h"
 #include "linalg_eo.h"
 #include "start.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "hamiltonian_field.h"
 #include "reweighting_factor.h"
 
diff --git a/solver/dirac_operator_eigenvectors.c b/solver/dirac_operator_eigenvectors.c
index 84c8856c5..bdf8d9943 100644
--- a/solver/dirac_operator_eigenvectors.c
+++ b/solver/dirac_operator_eigenvectors.c
@@ -19,7 +19,7 @@
 #include "config.h"
 #include "su3.h"
 #include "sse.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include <complex.h>
 #include "dirac_operator_eigenvectors.h"
 #include "geometry_eo.h"
diff --git a/update_momenta.c b/update_momenta.c
index 88b45dab2..ba9697f4a 100644
--- a/update_momenta.c
+++ b/update_momenta.c
@@ -30,7 +30,7 @@
 #include "su3.h"
 #include "su3adj.h"
 #include "su3spinor.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "xchange_deri.h"
 #include "clover_leaf.h"
 #include "read_input.h"
diff --git a/update_tm.c b/update_tm.c
index 8e3085aae..707f94ded 100644
--- a/update_tm.c
+++ b/update_tm.c
@@ -56,7 +56,7 @@
 #include "xchange.h"
 #include "measure_rectangles.h"
 #include "init_gauge_tmp.h"
-#include "monomial.h"
+#include "monomial/monomial.h"
 #include "integrator.h"
 #include "hamiltonian_field.h"
 #include "update_tm.h"

From 6cda4343e9bb7d1c0f90b472171c0d36bd804e62 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 19 Oct 2012 09:36:44 +0200
Subject: [PATCH 067/110] moved monomial related files to subdirectory monomial

---
 monomial/Makefile.in | 99 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 monomial/Makefile.in

diff --git a/monomial/Makefile.in b/monomial/Makefile.in
new file mode 100644
index 000000000..e2007ce6d
--- /dev/null
+++ b/monomial/Makefile.in
@@ -0,0 +1,99 @@
+
+srcdir = @srcdir@
+top_builddir =  @top_builddir@
+abs_top_builddir = @abs_top_builddir@
+top_srcdir = @top_srcdir@
+abs_top_srcdir = @abs_top_srcdir@
+subdir = linalg
+builddir = @builddir@
+
+CFLAGS = @CFLAGS@
+DEPFLAGS = @DEPFLAGS@
+LDFLAGS = @LDFLAGS@
+DEFS = @DEFS@
+OPTARGS = @OPTARGS@
+SOPTARGS = @SOPTARGS@
+
+AR = @AR@
+RANLIB = @RANLIB@
+CC = @CC@
+CCDEP = @CCDEP@
+CCLD = ${CC}
+LINK = ${CCLD} ${CFLAGS} ${LDFLAGS} ${OPTARGS} -o $@
+LEX = @LEX@
+AUTOCONF = @AUTOCONF@
+DEFS = @DEFS@
+
+INCLUDES = @INCLUDES@
+LDADD =
+#COMPILE = ${CC} ${DEFS} ${INCLUDES} ${CFLAGS}
+COMPILE = ${CC} $(DEFS) ${INCLUDES} ${CFLAGS}
+
+LIBRARIES = libmonomial
+libmonomial_TARGETS =  nddetratio_monomial monomial det_monomial detratio_monomial \
+	gauge_monomial ndpoly_monomial clover_trlog_monomial cloverdet_monomial cloverdetratio_monomial \
+	clovernd_trlog_monomial poly_monomial cloverndpoly_monomial
+
+
+libmonomial_STARGETS = 
+
+libmonomial_OBJECTS = $(addsuffix .o, ${libmonomial_TARGETS})
+libmonomial_SOBJECTS = $(addsuffix .o, ${libmonomial_STARGETS})
+
+# default rule
+
+all: Makefile dep libmonomial.a
+
+# rules for debugging
+debug all-debug: CFLAGS := $(CFLAGS) @DEBUG_FLAG@
+debug all-debug: all
+
+# rules for profiling information
+profile all-profile: CFLAGS := $(filter-out -fomit-frame-pointer,${CFLAGS}) @PROFILE_FLAG@
+profile all-profile: all
+
+
+#include dep rules
+
+-include $(addsuffix .d,${libmonomial_TARGETS})
+
+include ${top_srcdir}/Makefile.global
+
+# rule to compile objects
+
+${libmonomial_OBJECTS}: %.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/config.h
+	$(COMPILE) ${OPTARGS} -c $<
+
+${libmonomial_SOBJECTS}: %.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/config.h
+	$(COMPILE) ${SOPTARGS} -c $<
+
+# rule to make libmonomial
+
+libmonomial.a: ${libmonomial_OBJECTS} ${libmonomial_SOBJECTS} Makefile
+	@rm -f libmonomial.a
+	@${AR} cru libmonomial.a ${libmonomial_OBJECTS} ${libmonomial_SOBJECTS}
+	@$(RANLIB) libmonomial.a
+	@cp libmonomial.a ../lib/libmonomial.a
+
+# rule to generate .d files
+
+$(addsuffix .d, $(libmonomial_TARGETS) ${libmonomial_STARGETS}): %.d: ${srcdir}/%.c Makefile
+	@${CCDEP} ${DEPFLAGS} ${INCLUDES} $< > $@
+
+# rule to make dependencies
+
+dep: ${addsuffix .d, ${libmonomial_TARGETS} ${libmonomial_STARGETS}}
+
+# rules to clean
+
+compile-clean: Makefile
+	rm -f ${$(addsuffix _OBJECTS, ${LIBRARIES})} ${$(addsuffix _SOBJECTS, ${LIBRARIES})} *.d
+
+clean: compile-clean 
+	rm -f $(addsuffix .a, ${LIBRARIES})
+	rm -f ../lib/libmonomial.a
+
+distclean: clean
+	rm -f Makefile
+
+.PHONY: all dep clean compile-clean distclean profile all-profile debug all-debug

From 2a040e9237996660bbc2d81c8e6b4b4682975705 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 19 Oct 2012 09:59:51 +0200
Subject: [PATCH 068/110] moved exchange related files to subdirectory xchange

---
 Hopping_Matrix.c                              |  8 +-
 Hopping_Matrix_nocom.c                        |  2 +-
 LapH_ev.c                                     |  2 +-
 Makefile.in                                   | 16 ++-
 benchmark.c                                   |  3 +-
 block.c                                       |  2 +-
 buffers/utils.ih                              |  3 +-
 check_locallity.c                             |  3 +-
 configure.in                                  |  4 +-
 deriv_Sb.c                                    |  2 +-
 deriv_Sb_D_psi.c                              |  3 +-
 hmc_tm.c                                      |  3 +-
 hopping_test.c                                |  2 +-
 hybrid_update.c                               |  2 +-
 init_jacobi_field.c                           |  2 +-
 invert.c                                      |  3 +-
 invert_doublet_eo.c                           |  2 +-
 invert_eo.c                                   |  2 +-
 jacobi.c                                      |  2 +-
 solver/fgmres.c                               |  1 -
 solver/gmres_precon.c                         |  1 -
 test/check_xchange.c                          |  2 +-
 test_lemon.c                                  |  2 +-
 tm_sub_Hopping_Matrix.c                       |  5 +-
 tm_times_Hopping_Matrix.c                     |  5 +-
 update_gauge.c                                |  2 +-
 update_tm.c                                   |  2 +-
 xchange/Makefile.in                           | 98 +++++++++++++++++++
 xchange.h => xchange/xchange.h                | 12 +--
 .../xchange_2fields.c                         |  0
 .../xchange_2fields.h                         |  0
 xchange_deri.c => xchange/xchange_deri.c      |  0
 xchange_deri.h => xchange/xchange_deri.h      |  0
 xchange_field.c => xchange/xchange_field.c    |  0
 xchange_field.h => xchange/xchange_field.h    |  0
 .../xchange_field_tslice.c                    |  0
 .../xchange_field_tslice.h                    |  0
 xchange_gauge.c => xchange/xchange_gauge.c    |  0
 xchange_gauge.h => xchange/xchange_gauge.h    |  0
 .../xchange_halffield.c                       |  0
 .../xchange_halffield.h                       |  0
 xchange_jacobi.c => xchange/xchange_jacobi.c  |  0
 xchange_jacobi.h => xchange/xchange_jacobi.h  |  0
 .../xchange_lexicfield.c                      |  0
 .../xchange_lexicfield.h                      |  0
 45 files changed, 135 insertions(+), 61 deletions(-)
 create mode 100644 xchange/Makefile.in
 rename xchange.h => xchange/xchange.h (81%)
 rename xchange_2fields.c => xchange/xchange_2fields.c (100%)
 rename xchange_2fields.h => xchange/xchange_2fields.h (100%)
 rename xchange_deri.c => xchange/xchange_deri.c (100%)
 rename xchange_deri.h => xchange/xchange_deri.h (100%)
 rename xchange_field.c => xchange/xchange_field.c (100%)
 rename xchange_field.h => xchange/xchange_field.h (100%)
 rename xchange_field_tslice.c => xchange/xchange_field_tslice.c (100%)
 rename xchange_field_tslice.h => xchange/xchange_field_tslice.h (100%)
 rename xchange_gauge.c => xchange/xchange_gauge.c (100%)
 rename xchange_gauge.h => xchange/xchange_gauge.h (100%)
 rename xchange_halffield.c => xchange/xchange_halffield.c (100%)
 rename xchange_halffield.h => xchange/xchange_halffield.h (100%)
 rename xchange_jacobi.c => xchange/xchange_jacobi.c (100%)
 rename xchange_jacobi.h => xchange/xchange_jacobi.h (100%)
 rename xchange_lexicfield.c => xchange/xchange_lexicfield.c (100%)
 rename xchange_lexicfield.h => xchange/xchange_lexicfield.h (100%)

diff --git a/Hopping_Matrix.c b/Hopping_Matrix.c
index d8bf3a4c3..4ad0ef4c3 100644
--- a/Hopping_Matrix.c
+++ b/Hopping_Matrix.c
@@ -57,13 +57,7 @@
 #include "global.h"
 #include "su3.h"
 #ifdef MPI
-#  include "xchange_field.h"
-#  ifdef _USE_TSPLITPAR
-#    include "xchange_field_tslice.h"
-#  endif
-#  if defined _USE_HALFSPINOR
-#    include "xchange_halffield.h"
-#  endif
+#  include "xchange/xchange.h"
 #endif
 #include "boundary.h"
 #include "init_dirac_halfspinor.h"
diff --git a/Hopping_Matrix_nocom.c b/Hopping_Matrix_nocom.c
index 085db43e6..8db219718 100644
--- a/Hopping_Matrix_nocom.c
+++ b/Hopping_Matrix_nocom.c
@@ -41,7 +41,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include "global.h"
-#include "xchange.h"
+#include "xchange/xchange.h"
 #include "su3.h"
 #include "sse.h"
 #include "boundary.h"
diff --git a/LapH_ev.c b/LapH_ev.c
index d576d447f..93e1d6aa8 100644
--- a/LapH_ev.c
+++ b/LapH_ev.c
@@ -49,7 +49,7 @@
 #include "geometry_eo.h"
 #include "read_input.h"
 #include "start.h"
-#include "xchange.h"
+#include "xchange/xchange.h"
 #include "init_gauge_field.h"
 #include "init_geometry_indices.h"
 #include "mpi_init.h"
diff --git a/Makefile.in b/Makefile.in
index fdcb48cdf..59586e25c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -51,21 +51,17 @@ MODULES = read_input gamma hybrid_update measure_gauge_action start \
 	polyakov_loop getopt sighandler reweighting_factor \
 	source_generation boundary update_tm ranlxd  \
 	mpi_init deriv_Sb deriv_Sb_D_psi ranlxs \
-	xchange_deri geometry_eo invert_overlap \
-	init_moment_field init_gauge_tmp \
-	xchange_field xchange_gauge prepare_source \
+	geometry_eo invert_overlap \
+	init_moment_field init_gauge_tmp prepare_source \
 	init_gauge_field init_geometry_indices init_spinor_field \
-	init_dirac_halfspinor xchange_halffield \
-	tm_operators_nd \
+	init_dirac_halfspinor tm_operators_nd \
 	chebyshev_polynomial_nd Ptilde_nd  \
 	init_chi_spinor_field reweighting_factor_nd \
 	init_bispinor_field D_psi \
-	xchange_lexicfield xchange_2fields online_measurement \
-	 update_momenta \
-	integrator  phmc \
+	online_measurement update_momenta integrator  phmc \
 	little_D block Dov_psi operator measurements pion_norm Dov_proj \
-	xchange_field_tslice temporalgauge spinor_fft X_psi P_M_eta \
-	xchange_jacobi jacobi init_jacobi_field \
+	temporalgauge spinor_fft X_psi P_M_eta \
+	jacobi init_jacobi_field \
 	fatal_error invert_clover_eo gettime @SPI_FILES@ init_omp_accumulators
 
 ## the GPU modules (all .cu files in $GPUDIR)
diff --git a/benchmark.c b/benchmark.c
index 2ac38f1b4..63046e273 100644
--- a/benchmark.c
+++ b/benchmark.c
@@ -58,14 +58,13 @@
 #include "Hopping_Matrix_nocom.h"
 #include "tm_operators.h"
 #include "global.h"
-#include "xchange.h"
+#include "xchange/xchange.h"
 #include "init_gauge_field.h"
 #include "init_geometry_indices.h"
 #include "init_spinor_field.h"
 #include "init_moment_field.h"
 #include "init_dirac_halfspinor.h"
 #include "test/check_geometry.h"
-#include "xchange_halffield.h"
 #include "D_psi.h"
 #include "phmc.h"
 #include "mpi_init.h"
diff --git a/block.c b/block.c
index 48e90019b..dc068fc8a 100644
--- a/block.c
+++ b/block.c
@@ -31,7 +31,7 @@
 #include "D_psi.h"
 #include "linalg_eo.h"
 #include "start.h"
-#include "xchange_lexicfield.h"
+#include "xchange/xchange.h"
 #include "block.h"
 #include "solver/lu_solve.h"
 #include "su3.h"
diff --git a/buffers/utils.ih b/buffers/utils.ih
index fd04000b0..939ff5e0c 100644
--- a/buffers/utils.ih
+++ b/buffers/utils.ih
@@ -5,7 +5,6 @@
 #include <string.h>
 
 #include <global.h>
-#include <xchange_gauge.h>
-#include <xchange.h>
+#include <xchange/xchange.h>
 
 #include <buffers/utils.h>
diff --git a/check_locallity.c b/check_locallity.c
index c27e1b648..e769902b5 100644
--- a/check_locallity.c
+++ b/check_locallity.c
@@ -47,7 +47,7 @@
 #include "start.h"
 #include "measure_gauge_action.h"
 #ifdef MPI
-#include "xchange.h"
+#include "xchange/xchange.h"
 #endif
 #include "read_input.h"
 #include "mpi_init.h"
@@ -59,7 +59,6 @@
 #include "init_spinor_field.h"
 #include "init_moment_field.h"
 #include "init_dirac_halfspinor.h"
-#include "xchange_halffield.h"
 #include "smearing/stout.h"
 #include "su3spinor.h"
 #include "invert_eo.h"
diff --git a/configure.in b/configure.in
index a1ae72278..96e435cba 100644
--- a/configure.in
+++ b/configure.in
@@ -40,7 +40,7 @@ AC_CHECK_PROG(CCDEP, gcc, "gcc", "$CC")
 LDFLAGS="$LDFLAGS -L\${HOME}/lib -L\${top_builddir}/lib"
 CCLD=${CC}
 
-USESUBDIRS="buffers cu io solver linalg monomial"
+USESUBDIRS="buffers cu io solver linalg monomial xchange"
 
 AC_CHECK_HEADERS([stdint.h],
 [ dnl for inttypes.h and stdint.h for uint_xxx types
@@ -897,7 +897,7 @@ if test ! -e tests/regressions; then
 fi
 
 
-LIBS="-lhmc -lmonomial -lsolver -llinalg -lhmc -lio $LIBS"
+LIBS="-lhmc -lmonomial -lsolver -lxchange -llinalg -lhmc -lio $LIBS"
 AUTOCONF=autoconf
 
 for i in $USESUBDIRS
diff --git a/deriv_Sb.c b/deriv_Sb.c
index bfe71cc20..ddb6b453b 100644
--- a/deriv_Sb.c
+++ b/deriv_Sb.c
@@ -47,7 +47,7 @@
 #include "global.h"
 #include "su3.h"
 #include "boundary.h"
-#include "xchange_2fields.h"
+#include "xchange/xchange.h"
 #include "sse.h"
 #include "update_backward_gauge.h"
 #include "hamiltonian_field.h"
diff --git a/deriv_Sb_D_psi.c b/deriv_Sb_D_psi.c
index 7aea863e7..7604b1162 100644
--- a/deriv_Sb_D_psi.c
+++ b/deriv_Sb_D_psi.c
@@ -27,8 +27,7 @@
 #include "global.h"
 #include "su3.h"
 #include "boundary.h"
-#include "xchange_field.h"
-#include "xchange_lexicfield.h"
+#include "xchange/xchange.h"
 #include "sse.h"
 #include "hamiltonian_field.h"
 #include "deriv_Sb_D_psi.h"
diff --git a/hmc_tm.c b/hmc_tm.c
index 46a35eb48..c8e3e47c0 100644
--- a/hmc_tm.c
+++ b/hmc_tm.c
@@ -56,7 +56,7 @@
 #include "measure_gauge_action.h"
 #include "measure_rectangles.h"
 #ifdef MPI
-# include "xchange.h"
+# include "xchange/xchange.h"
 #endif
 #include "read_input.h"
 #include "mpi_init.h"
@@ -70,7 +70,6 @@
 #include "init_dirac_halfspinor.h"
 #include "init_bispinor_field.h"
 #include "init_chi_spinor_field.h"
-#include "xchange_halffield.h"
 #include "test/check_geometry.h"
 #include "boundary.h"
 #include "phmc.h"
diff --git a/hopping_test.c b/hopping_test.c
index a16015d70..44cf4dabd 100644
--- a/hopping_test.c
+++ b/hopping_test.c
@@ -56,7 +56,7 @@
 #include "Hopping_Matrix_nocom.h"
 #include "tm_operators.h"
 #include "global.h"
-#include "xchange.h"
+#include "xchange/xchange.h"
 #include "init_gauge_field.h"
 #include "init_geometry_indices.h"
 #include "init_spinor_field.h"
diff --git a/hybrid_update.c b/hybrid_update.c
index 66b1d2e03..1dce0f4c4 100644
--- a/hybrid_update.c
+++ b/hybrid_update.c
@@ -35,7 +35,7 @@
 #include "su3spinor.h"
 #include "expo.h"
 #include "sse.h"
-#include "xchange.h"
+#include "xchange/xchange.h"
 #include "get_rectangle_staples.h"
 #include "gamma.h"
 #include "get_staples.h"
diff --git a/init_jacobi_field.c b/init_jacobi_field.c
index 4bc4b16f2..52b4f1010 100644
--- a/init_jacobi_field.c
+++ b/init_jacobi_field.c
@@ -27,7 +27,7 @@
 #include "global.h"
 #include "su3.h"
 #include "start.h"
-#include "xchange_jacobi.h"
+#include "xchange/xchange.h"
 #include "init_jacobi_field.h"
 
 #ifdef WITHLAPH
diff --git a/invert.c b/invert.c
index 28dde66d2..94c8e9803 100644
--- a/invert.c
+++ b/invert.c
@@ -52,7 +52,7 @@
 /*#include "eigenvalues.h"*/
 #include "measure_gauge_action.h"
 #ifdef MPI
-#include "xchange.h"
+#include "xchange/xchange.h"
 #endif
 #include <io/utils.h>
 #include "read_input.h"
@@ -67,7 +67,6 @@
 #include "init_dirac_halfspinor.h"
 #include "init_bispinor_field.h"
 #include "init_chi_spinor_field.h"
-#include "xchange_halffield.h"
 #include "smearing/stout.h"
 #include "invert_eo.h"
 #include "monomial/monomial.h"
diff --git a/invert_doublet_eo.c b/invert_doublet_eo.c
index 969c173ee..c64f25c8f 100644
--- a/invert_doublet_eo.c
+++ b/invert_doublet_eo.c
@@ -43,7 +43,7 @@
 #include"gamma.h"
 #include"solver/solver.h"
 #include"read_input.h"
-#include"xchange.h"
+#include"xchange/xchange.h"
 #include"tm_operators_nd.h"
 #include"invert_doublet_eo.h"
 
diff --git a/invert_eo.c b/invert_eo.c
index 8bfbc245e..944398e07 100644
--- a/invert_eo.c
+++ b/invert_eo.c
@@ -43,7 +43,7 @@
 #include"gamma.h"
 #include"solver/solver.h"
 #include"read_input.h"
-#include"xchange.h"
+#include"xchange/xchange.h"
 #include"solver/poly_precon.h"
 #include"solver/dfl_projector.h"
 #include"invert_eo.h"
diff --git a/jacobi.c b/jacobi.c
index 506b1f762..b43b5eb23 100644
--- a/jacobi.c
+++ b/jacobi.c
@@ -36,7 +36,7 @@
 #endif
 #include "global.h"
 #include "su3.h"
-#include "xchange_jacobi.h"
+#include "xchange/xchange.h"
 
 #ifdef WITHLAPH
 
diff --git a/solver/fgmres.c b/solver/fgmres.c
index a0cb585af..58314849d 100644
--- a/solver/fgmres.c
+++ b/solver/fgmres.c
@@ -43,7 +43,6 @@
 #include"global.h"
 #include"su3.h"
 #include"linalg_eo.h"
-#include"xchange_field.h"
 #include"gmres_precon.h"
 #include"tm_operators.h"
 #include"sub_low_ev.h"
diff --git a/solver/gmres_precon.c b/solver/gmres_precon.c
index 70fc63ed5..bc3d6184a 100644
--- a/solver/gmres_precon.c
+++ b/solver/gmres_precon.c
@@ -51,7 +51,6 @@
 #include"su3.h"
 #include"linalg_eo.h"
 #include"start.h"
-#include"xchange_field.h"
 #include "solver_field.h"
 #include"gmres_precon.h"
 
diff --git a/test/check_xchange.c b/test/check_xchange.c
index 0b2a15e79..4367ccda8 100644
--- a/test/check_xchange.c
+++ b/test/check_xchange.c
@@ -37,7 +37,7 @@
 #include "global.h"
 #include "geometry_eo.h"
 #include "start.h"
-#include "xchange.h"
+#include "xchange/xchange.h"
 
 void set_deri_point();
 int check_geometry();
diff --git a/test_lemon.c b/test_lemon.c
index 0cf76210c..8353f08f9 100644
--- a/test_lemon.c
+++ b/test_lemon.c
@@ -51,7 +51,7 @@
 #include "start.h"
 #include "boundary.h"
 #include "global.h"
-#include "xchange.h"
+#include "xchange/xchange.h"
 #include "init_gauge_field.h"
 #include "init_geometry_indices.h"
 #include "measure_gauge_action.h"
diff --git a/tm_sub_Hopping_Matrix.c b/tm_sub_Hopping_Matrix.c
index 772f52295..bb63dc2a8 100644
--- a/tm_sub_Hopping_Matrix.c
+++ b/tm_sub_Hopping_Matrix.c
@@ -42,10 +42,7 @@
 #  include"DirectPut.h"
 #endif
 #ifdef MPI
-#  include "xchange_field.h"
-#  if defined _USE_HALFSPINOR
-#    include "xchange_halffield.h"
-#  endif
+#  include "xchange/xchange.h"
 #endif
 #include "boundary.h"
 #include "init_dirac_halfspinor.h"
diff --git a/tm_times_Hopping_Matrix.c b/tm_times_Hopping_Matrix.c
index 3c790f605..7b9dc882d 100644
--- a/tm_times_Hopping_Matrix.c
+++ b/tm_times_Hopping_Matrix.c
@@ -42,10 +42,7 @@
 #  include"DirectPut.h"
 #endif
 #ifdef MPI
-#  include "xchange_field.h"
-#  if defined _USE_HALFSPINOR
-#    include "xchange_halffield.h"
-#  endif
+#  include "xchange/xchange.h"
 #endif
 #include "boundary.h"
 #include "init_dirac_halfspinor.h"
diff --git a/update_gauge.c b/update_gauge.c
index 8734a958d..1d38b5cf2 100644
--- a/update_gauge.c
+++ b/update_gauge.c
@@ -34,7 +34,7 @@
 #include "su3spinor.h"
 #include "expo.h"
 #include "sse.h"
-#include "xchange.h"
+#include "xchange/xchange.h"
 #include "hamiltonian_field.h"
 #include "update_gauge.h"
 
diff --git a/update_tm.c b/update_tm.c
index 707f94ded..5cdf0a606 100644
--- a/update_tm.c
+++ b/update_tm.c
@@ -53,7 +53,7 @@
 #include "ranlxd.h"
 #include "read_input.h"
 #include "expo.h"
-#include "xchange.h"
+#include "xchange/xchange.h"
 #include "measure_rectangles.h"
 #include "init_gauge_tmp.h"
 #include "monomial/monomial.h"
diff --git a/xchange/Makefile.in b/xchange/Makefile.in
new file mode 100644
index 000000000..dc6f7e1bd
--- /dev/null
+++ b/xchange/Makefile.in
@@ -0,0 +1,98 @@
+
+srcdir = @srcdir@
+top_builddir =  @top_builddir@
+abs_top_builddir = @abs_top_builddir@
+top_srcdir = @top_srcdir@
+abs_top_srcdir = @abs_top_srcdir@
+subdir = linalg
+builddir = @builddir@
+
+CFLAGS = @CFLAGS@
+DEPFLAGS = @DEPFLAGS@
+LDFLAGS = @LDFLAGS@
+DEFS = @DEFS@
+OPTARGS = @OPTARGS@
+SOPTARGS = @SOPTARGS@
+
+AR = @AR@
+RANLIB = @RANLIB@
+CC = @CC@
+CCDEP = @CCDEP@
+CCLD = ${CC}
+LINK = ${CCLD} ${CFLAGS} ${LDFLAGS} ${OPTARGS} -o $@
+LEX = @LEX@
+AUTOCONF = @AUTOCONF@
+DEFS = @DEFS@
+
+INCLUDES = @INCLUDES@
+LDADD =
+#COMPILE = ${CC} ${DEFS} ${INCLUDES} ${CFLAGS}
+COMPILE = ${CC} $(DEFS) ${INCLUDES} ${CFLAGS}
+
+LIBRARIES = libxchange
+libxchange_TARGETS = xchange_deri xchange_field xchange_gauge xchange_halffield \
+	xchange_lexicfield xchange_2fields xchange_field_tslice \
+	xchange_jacobi 
+
+libxchange_STARGETS = 
+
+libxchange_OBJECTS = $(addsuffix .o, ${libxchange_TARGETS})
+libxchange_SOBJECTS = $(addsuffix .o, ${libxchange_STARGETS})
+
+# default rule
+
+all: Makefile dep libxchange.a
+
+# rules for debugging
+debug all-debug: CFLAGS := $(CFLAGS) @DEBUG_FLAG@
+debug all-debug: all
+
+# rules for profiling information
+profile all-profile: CFLAGS := $(filter-out -fomit-frame-pointer,${CFLAGS}) @PROFILE_FLAG@
+profile all-profile: all
+
+
+#include dep rules
+
+-include $(addsuffix .d,${libxchange_TARGETS})
+
+include ${top_srcdir}/Makefile.global
+
+# rule to compile objects
+
+${libxchange_OBJECTS}: %.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/config.h
+	$(COMPILE) ${OPTARGS} -c $<
+
+${libxchange_SOBJECTS}: %.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/config.h
+	$(COMPILE) ${SOPTARGS} -c $<
+
+# rule to make libxchange
+
+libxchange.a: ${libxchange_OBJECTS} ${libxchange_SOBJECTS} Makefile
+	@rm -f libxchange.a
+	@${AR} cru libxchange.a ${libxchange_OBJECTS} ${libxchange_SOBJECTS}
+	@$(RANLIB) libxchange.a
+	@cp libxchange.a ../lib/libxchange.a
+
+# rule to generate .d files
+
+$(addsuffix .d, $(libxchange_TARGETS) ${libxchange_STARGETS}): %.d: ${srcdir}/%.c Makefile
+	@${CCDEP} ${DEPFLAGS} ${INCLUDES} $< > $@
+
+# rule to make dependencies
+
+dep: ${addsuffix .d, ${libxchange_TARGETS} ${libxchange_STARGETS}}
+
+# rules to clean
+
+compile-clean: Makefile
+	rm -f ${$(addsuffix _OBJECTS, ${LIBRARIES})} ${$(addsuffix _SOBJECTS, ${LIBRARIES})} *.d
+
+clean: compile-clean 
+	rm -f $(addsuffix .a, ${LIBRARIES})
+	rm -f ../lib/libxchange.a
+
+distclean: clean
+	rm -f Makefile
+
+.PHONY: all dep clean compile-clean distclean profile all-profile debug all-debug
diff --git a/xchange.h b/xchange/xchange.h
similarity index 81%
rename from xchange.h
rename to xchange/xchange.h
index b2af30c60..ffdfa9a48 100644
--- a/xchange.h
+++ b/xchange/xchange.h
@@ -19,13 +19,13 @@
 #ifndef _XCHANGE_H
 #define _XCHANGE_H
 
-#include "xchange_field.h"
-#include "xchange_gauge.h"
-#include "xchange_deri.h"
-#include "xchange_halffield.h"
-#include "xchange_jacobi.h"
+#include "xchange/xchange_field.h"
+#include "xchange/xchange_gauge.h"
+#include "xchange/xchange_deri.h"
+#include "xchange/xchange_halffield.h"
+#include "xchange/xchange_jacobi.h"
 #  ifdef _USE_TSPLITPAR
-#    include "xchange_field_tslice.h"
+#    include "xchange/xchange_field_tslice.h"
 #  endif
 
 #endif
diff --git a/xchange_2fields.c b/xchange/xchange_2fields.c
similarity index 100%
rename from xchange_2fields.c
rename to xchange/xchange_2fields.c
diff --git a/xchange_2fields.h b/xchange/xchange_2fields.h
similarity index 100%
rename from xchange_2fields.h
rename to xchange/xchange_2fields.h
diff --git a/xchange_deri.c b/xchange/xchange_deri.c
similarity index 100%
rename from xchange_deri.c
rename to xchange/xchange_deri.c
diff --git a/xchange_deri.h b/xchange/xchange_deri.h
similarity index 100%
rename from xchange_deri.h
rename to xchange/xchange_deri.h
diff --git a/xchange_field.c b/xchange/xchange_field.c
similarity index 100%
rename from xchange_field.c
rename to xchange/xchange_field.c
diff --git a/xchange_field.h b/xchange/xchange_field.h
similarity index 100%
rename from xchange_field.h
rename to xchange/xchange_field.h
diff --git a/xchange_field_tslice.c b/xchange/xchange_field_tslice.c
similarity index 100%
rename from xchange_field_tslice.c
rename to xchange/xchange_field_tslice.c
diff --git a/xchange_field_tslice.h b/xchange/xchange_field_tslice.h
similarity index 100%
rename from xchange_field_tslice.h
rename to xchange/xchange_field_tslice.h
diff --git a/xchange_gauge.c b/xchange/xchange_gauge.c
similarity index 100%
rename from xchange_gauge.c
rename to xchange/xchange_gauge.c
diff --git a/xchange_gauge.h b/xchange/xchange_gauge.h
similarity index 100%
rename from xchange_gauge.h
rename to xchange/xchange_gauge.h
diff --git a/xchange_halffield.c b/xchange/xchange_halffield.c
similarity index 100%
rename from xchange_halffield.c
rename to xchange/xchange_halffield.c
diff --git a/xchange_halffield.h b/xchange/xchange_halffield.h
similarity index 100%
rename from xchange_halffield.h
rename to xchange/xchange_halffield.h
diff --git a/xchange_jacobi.c b/xchange/xchange_jacobi.c
similarity index 100%
rename from xchange_jacobi.c
rename to xchange/xchange_jacobi.c
diff --git a/xchange_jacobi.h b/xchange/xchange_jacobi.h
similarity index 100%
rename from xchange_jacobi.h
rename to xchange/xchange_jacobi.h
diff --git a/xchange_lexicfield.c b/xchange/xchange_lexicfield.c
similarity index 100%
rename from xchange_lexicfield.c
rename to xchange/xchange_lexicfield.c
diff --git a/xchange_lexicfield.h b/xchange/xchange_lexicfield.h
similarity index 100%
rename from xchange_lexicfield.h
rename to xchange/xchange_lexicfield.h

From c98bf14c7a130c96a26c4adfcbd404cf4cf012c0 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 19 Oct 2012 10:13:44 +0200
Subject: [PATCH 069/110] moved schroedinger functional related stuff into
 subdir sf, not compiled currently

---
 sf_calc_action.c => sf/sf_calc_action.c                     | 0
 sf_calc_action.h => sf/sf_calc_action.h                     | 0
 sf_get_rectangle_staples.c => sf/sf_get_rectangle_staples.c | 0
 sf_get_rectangle_staples.h => sf/sf_get_rectangle_staples.h | 0
 sf_get_staples.c => sf/sf_get_staples.c                     | 0
 sf_get_staples.h => sf/sf_get_staples.h                     | 0
 sf_observables.c => sf/sf_observables.c                     | 0
 sf_observables.h => sf/sf_observables.h                     | 0
 sf_utils.c => sf/sf_utils.c                                 | 0
 sf_utils.h => sf/sf_utils.h                                 | 0
 update_momenta.c                                            | 2 +-
 11 files changed, 1 insertion(+), 1 deletion(-)
 rename sf_calc_action.c => sf/sf_calc_action.c (100%)
 rename sf_calc_action.h => sf/sf_calc_action.h (100%)
 rename sf_get_rectangle_staples.c => sf/sf_get_rectangle_staples.c (100%)
 rename sf_get_rectangle_staples.h => sf/sf_get_rectangle_staples.h (100%)
 rename sf_get_staples.c => sf/sf_get_staples.c (100%)
 rename sf_get_staples.h => sf/sf_get_staples.h (100%)
 rename sf_observables.c => sf/sf_observables.c (100%)
 rename sf_observables.h => sf/sf_observables.h (100%)
 rename sf_utils.c => sf/sf_utils.c (100%)
 rename sf_utils.h => sf/sf_utils.h (100%)

diff --git a/sf_calc_action.c b/sf/sf_calc_action.c
similarity index 100%
rename from sf_calc_action.c
rename to sf/sf_calc_action.c
diff --git a/sf_calc_action.h b/sf/sf_calc_action.h
similarity index 100%
rename from sf_calc_action.h
rename to sf/sf_calc_action.h
diff --git a/sf_get_rectangle_staples.c b/sf/sf_get_rectangle_staples.c
similarity index 100%
rename from sf_get_rectangle_staples.c
rename to sf/sf_get_rectangle_staples.c
diff --git a/sf_get_rectangle_staples.h b/sf/sf_get_rectangle_staples.h
similarity index 100%
rename from sf_get_rectangle_staples.h
rename to sf/sf_get_rectangle_staples.h
diff --git a/sf_get_staples.c b/sf/sf_get_staples.c
similarity index 100%
rename from sf_get_staples.c
rename to sf/sf_get_staples.c
diff --git a/sf_get_staples.h b/sf/sf_get_staples.h
similarity index 100%
rename from sf_get_staples.h
rename to sf/sf_get_staples.h
diff --git a/sf_observables.c b/sf/sf_observables.c
similarity index 100%
rename from sf_observables.c
rename to sf/sf_observables.c
diff --git a/sf_observables.h b/sf/sf_observables.h
similarity index 100%
rename from sf_observables.h
rename to sf/sf_observables.h
diff --git a/sf_utils.c b/sf/sf_utils.c
similarity index 100%
rename from sf_utils.c
rename to sf/sf_utils.c
diff --git a/sf_utils.h b/sf/sf_utils.h
similarity index 100%
rename from sf_utils.h
rename to sf/sf_utils.h
diff --git a/update_momenta.c b/update_momenta.c
index ba9697f4a..1ed29e7d3 100644
--- a/update_momenta.c
+++ b/update_momenta.c
@@ -31,7 +31,7 @@
 #include "su3adj.h"
 #include "su3spinor.h"
 #include "monomial/monomial.h"
-#include "xchange_deri.h"
+#include "xchange/xchange.h"
 #include "clover_leaf.h"
 #include "read_input.h"
 #include "hamiltonian_field.h"

From 28abeff8c593f73d239f727556b5330db45bd989 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 19 Oct 2012 10:51:53 +0200
Subject: [PATCH 070/110] moved operator related stuff into subdir operator

---
 Makefile.in                                   | 10 +-
 Ptilde_nd.c                                   |  4 +-
 X_psi.c                                       |  4 +-
 benchmark.c                                   |  8 +-
 block.c                                       |  2 +-
 chebyshev_polynomial.c                        |  4 +-
 chebyshev_polynomial_nd.c                     |  4 +-
 check_locallity.c                             |  2 +-
 configure.in                                  |  4 +-
 hopping_test.c                                |  9 +-
 invert.c                                      |  6 +-
 invert_clover_eo.c                            |  8 +-
 invert_doublet_eo.c                           |  8 +-
 invert_eo.c                                   |  6 +-
 invert_overlap.c                              |  4 +-
 monomial/clover_trlog_monomial.c              |  6 +-
 monomial/cloverdet_monomial.c                 |  8 +-
 monomial/cloverdetratio_monomial.c            |  8 +-
 monomial/clovernd_trlog_monomial.c            |  6 +-
 monomial/cloverndpoly_monomial.c              | 10 +-
 monomial/det_monomial.c                       |  4 +-
 monomial/detratio_monomial.c                  |  4 +-
 monomial/monomial.c                           |  6 +-
 monomial/nddetratio_monomial.c                |  8 +-
 monomial/ndpoly_monomial.c                    | 12 +--
 monomial/poly_monomial.c                      |  6 +-
 operator.c                                    | 14 +--
 D_psi.c => operator/D_psi.c                   |  4 +-
 D_psi.h => operator/D_psi.h                   |  0
 Dov_proj.c => operator/Dov_proj.c             |  0
 Dov_proj.h => operator/Dov_proj.h             |  0
 Dov_psi.c => operator/Dov_psi.c               |  0
 Dov_psi.h => operator/Dov_psi.h               |  0
 Hopping_Matrix.c => operator/Hopping_Matrix.c |  2 +-
 Hopping_Matrix.h => operator/Hopping_Matrix.h |  0
 .../Hopping_Matrix_nocom.c                    |  2 +-
 .../Hopping_Matrix_nocom.h                    |  0
 operator/Makefile.in                          | 97 +++++++++++++++++++
 clover_leaf.c => operator/clover_leaf.c       |  4 +-
 clover_leaf.h => operator/clover_leaf.h       |  0
 .../clovertm_operators.c                      |  4 +-
 .../clovertm_operators.h                      |  0
 tm_operators.c => operator/tm_operators.c     | 10 +-
 tm_operators.h => operator/tm_operators.h     |  0
 .../tm_operators_nd.c                         |  8 +-
 .../tm_operators_nd.h                         |  0
 .../tm_sub_Hopping_Matrix.c                   |  0
 .../tm_sub_Hopping_Matrix.h                   |  0
 .../tm_times_Hopping_Matrix.c                 |  0
 .../tm_times_Hopping_Matrix.h                 |  0
 phmc.c                                        |  2 +-
 prepare_source.c                              |  2 +-
 reweighting_factor_nd.c                       |  4 +-
 solver/Msap.c                                 |  2 +-
 solver/dfl_projector.c                        |  2 +-
 solver/eigenvalues_bi.c                       |  2 +-
 solver/index_jd.c                             |  2 +-
 update_momenta.c                              |  2 +-
 update_tm.c                                   |  2 +-
 59 files changed, 211 insertions(+), 115 deletions(-)
 rename D_psi.c => operator/D_psi.c (99%)
 rename D_psi.h => operator/D_psi.h (100%)
 rename Dov_proj.c => operator/Dov_proj.c (100%)
 rename Dov_proj.h => operator/Dov_proj.h (100%)
 rename Dov_psi.c => operator/Dov_psi.c (100%)
 rename Dov_psi.h => operator/Dov_psi.h (100%)
 rename Hopping_Matrix.c => operator/Hopping_Matrix.c (99%)
 rename Hopping_Matrix.h => operator/Hopping_Matrix.h (100%)
 rename Hopping_Matrix_nocom.c => operator/Hopping_Matrix_nocom.c (97%)
 rename Hopping_Matrix_nocom.h => operator/Hopping_Matrix_nocom.h (100%)
 create mode 100644 operator/Makefile.in
 rename clover_leaf.c => operator/clover_leaf.c (99%)
 rename clover_leaf.h => operator/clover_leaf.h (100%)
 rename clovertm_operators.c => operator/clovertm_operators.c (99%)
 rename clovertm_operators.h => operator/clovertm_operators.h (100%)
 rename tm_operators.c => operator/tm_operators.c (99%)
 rename tm_operators.h => operator/tm_operators.h (100%)
 rename tm_operators_nd.c => operator/tm_operators_nd.c (99%)
 rename tm_operators_nd.h => operator/tm_operators_nd.h (100%)
 rename tm_sub_Hopping_Matrix.c => operator/tm_sub_Hopping_Matrix.c (100%)
 rename tm_sub_Hopping_Matrix.h => operator/tm_sub_Hopping_Matrix.h (100%)
 rename tm_times_Hopping_Matrix.c => operator/tm_times_Hopping_Matrix.c (100%)
 rename tm_times_Hopping_Matrix.h => operator/tm_times_Hopping_Matrix.h (100%)

diff --git a/Makefile.in b/Makefile.in
index 59586e25c..75ffc92f2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -40,13 +40,13 @@ LINKLIBS = ${top_builddir}/linalg/liblinalg.a  \
 
 COMPILE = ${CC} ${DEFS} ${INCLUDES} -o $@ ${CFLAGS}
 
-SMODULES = Hopping_Matrix_nocom tm_times_Hopping_Matrix Hopping_Matrix tm_operators tm_sub_Hopping_Matrix
+SMODULES = 
 
 MODULES = read_input gamma hybrid_update measure_gauge_action start \
 	expo get_staples update_backward_gauge \
 	measure_rectangles get_rectangle_staples  \
 	test/check_geometry test/check_xchange \
-	test/overlaptests clovertm_operators clover_leaf \
+	test/overlaptests \
 	invert_eo invert_doublet_eo update_gauge \
 	polyakov_loop getopt sighandler reweighting_factor \
 	source_generation boundary update_tm ranlxd  \
@@ -54,12 +54,12 @@ MODULES = read_input gamma hybrid_update measure_gauge_action start \
 	geometry_eo invert_overlap \
 	init_moment_field init_gauge_tmp prepare_source \
 	init_gauge_field init_geometry_indices init_spinor_field \
-	init_dirac_halfspinor tm_operators_nd \
+	init_dirac_halfspinor \
 	chebyshev_polynomial_nd Ptilde_nd  \
 	init_chi_spinor_field reweighting_factor_nd \
-	init_bispinor_field D_psi \
+	init_bispinor_field \
 	online_measurement update_momenta integrator  phmc \
-	little_D block Dov_psi operator measurements pion_norm Dov_proj \
+	little_D block operator measurements pion_norm \
 	temporalgauge spinor_fft X_psi P_M_eta \
 	jacobi init_jacobi_field \
 	fatal_error invert_clover_eo gettime @SPI_FILES@ init_omp_accumulators
diff --git a/Ptilde_nd.c b/Ptilde_nd.c
index e103e17e3..f20c70350 100644
--- a/Ptilde_nd.c
+++ b/Ptilde_nd.c
@@ -30,8 +30,8 @@
 #include "global.h"
 #include "linalg_eo.h"
 #include "start.h"
-#include "tm_operators.h"
-#include "tm_operators_nd.h"
+#include "operator/tm_operators.h"
+#include "operator/tm_operators_nd.h"
 #include "chebyshev_polynomial_nd.h"
 #include "phmc.h"
 #include "solver/matrix_mult_typedef_nd.h"
diff --git a/X_psi.c b/X_psi.c
index 68a582ebb..f35bfdf51 100644
--- a/X_psi.c
+++ b/X_psi.c
@@ -29,10 +29,10 @@
 #include "global.h"
 #include "su3.h"
 #include "linalg_eo.h"
-#include "D_psi.h"
+#include "operator/D_psi.h"
 #include "gamma.h"
 #include "X_psi.h"
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "solver/solver.h"
 #include "read_input.h"
 
diff --git a/benchmark.c b/benchmark.c
index 63046e273..1968fba7e 100644
--- a/benchmark.c
+++ b/benchmark.c
@@ -54,9 +54,9 @@
 #include "read_input.h"
 #include "start.h"
 #include "boundary.h"
-#include "Hopping_Matrix.h"
-#include "Hopping_Matrix_nocom.h"
-#include "tm_operators.h"
+#include "operator/Hopping_Matrix.h"
+#include "operator/Hopping_Matrix_nocom.h"
+#include "operator/tm_operators.h"
 #include "global.h"
 #include "xchange/xchange.h"
 #include "init_gauge_field.h"
@@ -65,7 +65,7 @@
 #include "init_moment_field.h"
 #include "init_dirac_halfspinor.h"
 #include "test/check_geometry.h"
-#include "D_psi.h"
+#include "operator/D_psi.h"
 #include "phmc.h"
 #include "mpi_init.h"
 
diff --git a/block.c b/block.c
index dc068fc8a..f6644ac3f 100644
--- a/block.c
+++ b/block.c
@@ -28,7 +28,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "global.h"
-#include "D_psi.h"
+#include "operator/D_psi.h"
 #include "linalg_eo.h"
 #include "start.h"
 #include "xchange/xchange.h"
diff --git a/chebyshev_polynomial.c b/chebyshev_polynomial.c
index 1bae10f16..caa04667d 100644
--- a/chebyshev_polynomial.c
+++ b/chebyshev_polynomial.c
@@ -28,8 +28,8 @@
 #include "global.h"
 #include "linalg_eo.h"
 #include "start.h"
-#include "tm_operators.h"
-#include "tm_operators_nd.h"
+#include "operator/tm_operators.h"
+#include "operator/tm_operators_nd.h"
 #include "chebyshev_polynomial.h"
 
 #define PI 3.141592653589793
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index 5756be9f4..5f805b843 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -27,8 +27,8 @@
 #include "global.h"
 #include "linalg_eo.h"
 #include "start.h"
-#include "tm_operators.h"
-#include "tm_operators_nd.h"
+#include "operator/tm_operators.h"
+#include "operator/tm_operators_nd.h"
 #include "phmc.h"
 #include "Ptilde_nd.h"
 #include "chebyshev_polynomial_nd.h"
diff --git a/check_locallity.c b/check_locallity.c
index e769902b5..49cc92dbf 100644
--- a/check_locallity.c
+++ b/check_locallity.c
@@ -62,7 +62,7 @@
 #include "smearing/stout.h"
 #include "su3spinor.h"
 #include "invert_eo.h"
-#include "D_psi.h"
+#include "operator/D_psi.h"
 #include "linalg/convert_eo_to_lexic.h"
 
 
diff --git a/configure.in b/configure.in
index 96e435cba..35e30f55c 100644
--- a/configure.in
+++ b/configure.in
@@ -40,7 +40,7 @@ AC_CHECK_PROG(CCDEP, gcc, "gcc", "$CC")
 LDFLAGS="$LDFLAGS -L\${HOME}/lib -L\${top_builddir}/lib"
 CCLD=${CC}
 
-USESUBDIRS="buffers cu io solver linalg monomial xchange"
+USESUBDIRS="buffers cu io solver linalg monomial xchange operator"
 
 AC_CHECK_HEADERS([stdint.h],
 [ dnl for inttypes.h and stdint.h for uint_xxx types
@@ -897,7 +897,7 @@ if test ! -e tests/regressions; then
 fi
 
 
-LIBS="-lhmc -lmonomial -lsolver -lxchange -llinalg -lhmc -lio $LIBS"
+LIBS="-lhmc -lmonomial -loperator -lsolver -lxchange -llinalg -lhmc -lio $LIBS"
 AUTOCONF=autoconf
 
 for i in $USESUBDIRS
diff --git a/hopping_test.c b/hopping_test.c
index 44cf4dabd..14a1299f3 100644
--- a/hopping_test.c
+++ b/hopping_test.c
@@ -52,9 +52,9 @@
 #include "read_input.h"
 #include "start.h"
 #include "boundary.h"
-#include "Hopping_Matrix.h"
-#include "Hopping_Matrix_nocom.h"
-#include "tm_operators.h"
+#include "operator/Hopping_Matrix.h"
+#include "operator/Hopping_Matrix_nocom.h"
+#include "operator/tm_operators.h"
 #include "global.h"
 #include "xchange/xchange.h"
 #include "init_gauge_field.h"
@@ -63,8 +63,7 @@
 #include "init_moment_field.h"
 #include "init_dirac_halfspinor.h"
 #include "test/check_geometry.h"
-#include "xchange_halffield.h"
-#include "D_psi.h"
+#include "operator/D_psi.h"
 #include "phmc.h"
 #include "mpi_init.h"
 #include "io/io_cm.h"
diff --git a/invert.c b/invert.c
index 94c8e9803..f3ae7a157 100644
--- a/invert.c
+++ b/invert.c
@@ -72,7 +72,7 @@
 #include "monomial/monomial.h"
 #include "ranlxd.h"
 #include "phmc.h"
-#include "D_psi.h"
+#include "operator/D_psi.h"
 #include "little_D.h"
 #include "reweighting_factor.h"
 #include "linalg/convert_eo_to_lexic.h"
@@ -88,8 +88,8 @@
 #include <io/utils.h>
 #include "solver/dirac_operator_eigenvectors.h"
 #include "P_M_eta.h"
-#include "tm_operators.h"
-#include "Dov_psi.h"
+#include "operator/tm_operators.h"
+#include "operator/Dov_psi.h"
 #include "solver/spectral_proj.h"
 void usage()
 {
diff --git a/invert_clover_eo.c b/invert_clover_eo.c
index 41c836c71..361b44d34 100644
--- a/invert_clover_eo.c
+++ b/invert_clover_eo.c
@@ -38,10 +38,10 @@
 #include"global.h"
 #include"su3.h"
 #include"linalg_eo.h"
-#include"tm_operators.h"
-#include"Hopping_Matrix.h"
-#include"clovertm_operators.h"
-#include"D_psi.h"
+#include"operator/tm_operators.h"
+#include"operator/Hopping_Matrix.h"
+#include"operator/clovertm_operators.h"
+#include"operator/D_psi.h"
 #include"gamma.h"
 #include"solver/solver.h"
 #include"invert_clover_eo.h"
diff --git a/invert_doublet_eo.c b/invert_doublet_eo.c
index c64f25c8f..640007bb5 100644
--- a/invert_doublet_eo.c
+++ b/invert_doublet_eo.c
@@ -37,14 +37,14 @@
 #include<stdlib.h>
 #include"global.h"
 #include"linalg_eo.h"
-#include"tm_operators.h"
-#include"Hopping_Matrix.h"
-#include"D_psi.h"
+#include"operator/tm_operators.h"
+#include"operator/Hopping_Matrix.h"
+#include"operator/D_psi.h"
 #include"gamma.h"
 #include"solver/solver.h"
 #include"read_input.h"
 #include"xchange/xchange.h"
-#include"tm_operators_nd.h"
+#include"operator/tm_operators_nd.h"
 #include"invert_doublet_eo.h"
 
 
diff --git a/invert_eo.c b/invert_eo.c
index 944398e07..7157e6387 100644
--- a/invert_eo.c
+++ b/invert_eo.c
@@ -37,9 +37,9 @@
 #include<stdlib.h>
 #include"global.h"
 #include"linalg_eo.h"
-#include"tm_operators.h"
-#include"Hopping_Matrix.h"
-#include"D_psi.h"
+#include"operator/tm_operators.h"
+#include"operator/Hopping_Matrix.h"
+#include"operator/D_psi.h"
 #include"gamma.h"
 #include"solver/solver.h"
 #include"read_input.h"
diff --git a/invert_overlap.c b/invert_overlap.c
index 90fed71e7..c5e308f8b 100644
--- a/invert_overlap.c
+++ b/invert_overlap.c
@@ -27,10 +27,10 @@
 #include "solver/cgs_real.h"
 #include "operator.h"
 #include "invert_overlap.h"
-#include "Dov_psi.h"
+#include "operator/Dov_psi.h"
 #include "linalg_eo.h"
 #include "read_input.h"
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "gamma.h"
 #include "solver/cg_her.h"
 
diff --git a/monomial/clover_trlog_monomial.c b/monomial/clover_trlog_monomial.c
index 7215869e4..16b3aa540 100644
--- a/monomial/clover_trlog_monomial.c
+++ b/monomial/clover_trlog_monomial.c
@@ -29,10 +29,10 @@
 #include "su3.h"
 #include "su3adj.h"
 #include "su3spinor.h"
-#include "clovertm_operators.h"
-#include "clover_leaf.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
 #include "monomial/monomial.h"
-#include "Hopping_Matrix.h"
+#include "operator/Hopping_Matrix.h"
 #include "clover_trlog_monomial.h"
 
 void clover_trlog_derivative(const int id, hamiltonian_field_t * const hf) {
diff --git a/monomial/cloverdet_monomial.c b/monomial/cloverdet_monomial.c
index 7bc86c851..e303e31d0 100644
--- a/monomial/cloverdet_monomial.c
+++ b/monomial/cloverdet_monomial.c
@@ -34,16 +34,16 @@
 #include "linalg_eo.h"
 #include "deriv_Sb.h"
 #include "gamma.h"
-#include "tm_operators.h"
-#include "Hopping_Matrix.h"
+#include "operator/tm_operators.h"
+#include "operator/Hopping_Matrix.h"
 #include "solver/chrono_guess.h"
 #include "solver/solver.h"
-#include "clover_leaf.h"
+#include "operator/clover_leaf.h"
 #include "read_input.h"
 #include "hamiltonian_field.h"
 #include "boundary.h"
 #include "monomial/monomial.h"
-#include "clovertm_operators.h"
+#include "operator/clovertm_operators.h"
 #include "cloverdet_monomial.h"
 
 /* think about chronological solver ! */
diff --git a/monomial/cloverdetratio_monomial.c b/monomial/cloverdetratio_monomial.c
index 43932630d..77c3e8edb 100644
--- a/monomial/cloverdetratio_monomial.c
+++ b/monomial/cloverdetratio_monomial.c
@@ -32,13 +32,13 @@
 #include "linalg_eo.h"
 #include "deriv_Sb.h"
 #include "gamma.h"
-#include "tm_operators.h"
-#include "Hopping_Matrix.h"
+#include "operator/tm_operators.h"
+#include "operator/Hopping_Matrix.h"
 #include "solver/chrono_guess.h"
 #include "solver/solver.h"
 #include "read_input.h"
-#include "clovertm_operators.h"
-#include "clover_leaf.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
 #include "monomial/monomial.h"
 #include "boundary.h"
 #include "cloverdetratio_monomial.h"
diff --git a/monomial/clovernd_trlog_monomial.c b/monomial/clovernd_trlog_monomial.c
index d21ada40a..8d4c4c12d 100644
--- a/monomial/clovernd_trlog_monomial.c
+++ b/monomial/clovernd_trlog_monomial.c
@@ -29,10 +29,10 @@
 #include "su3.h"
 #include "su3adj.h"
 #include "su3spinor.h"
-#include "clovertm_operators.h"
-#include "clover_leaf.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
 #include "monomial/monomial.h"
-#include "Hopping_Matrix.h"
+#include "operator/Hopping_Matrix.h"
 #include "clovernd_trlog_monomial.h"
 
 void clovernd_trlog_derivative(const int id, hamiltonian_field_t * const hf) {
diff --git a/monomial/cloverndpoly_monomial.c b/monomial/cloverndpoly_monomial.c
index 5f3d91a59..918a5232d 100644
--- a/monomial/cloverndpoly_monomial.c
+++ b/monomial/cloverndpoly_monomial.c
@@ -31,16 +31,16 @@
 #include "start.h"
 #include "solver/solver.h"
 #include "deriv_Sb.h"
-#include "tm_operators.h"
-#include "tm_operators_nd.h"
-#include "Hopping_Matrix.h"
+#include "operator/tm_operators.h"
+#include "operator/tm_operators_nd.h"
+#include "operator/Hopping_Matrix.h"
 #include "phmc.h"
 #include "Ptilde_nd.h"
 #include "monomial/monomial.h"
 #include "hamiltonian_field.h"
 #include "boundary.h"
-#include "clovertm_operators.h"
-#include "clover_leaf.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
 #include "cloverndpoly_monomial.h"
 
 /********************************************
diff --git a/monomial/det_monomial.c b/monomial/det_monomial.c
index 6fd2938b0..0e17a8528 100644
--- a/monomial/det_monomial.c
+++ b/monomial/det_monomial.c
@@ -30,9 +30,9 @@
 #include "linalg_eo.h"
 #include "deriv_Sb.h"
 #include "deriv_Sb_D_psi.h"
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "hybrid_update.h"
-#include "Hopping_Matrix.h"
+#include "operator/Hopping_Matrix.h"
 #include "solver/chrono_guess.h"
 #include "solver/solver.h"
 #include "read_input.h"
diff --git a/monomial/detratio_monomial.c b/monomial/detratio_monomial.c
index 6ff36d92f..bd3536790 100644
--- a/monomial/detratio_monomial.c
+++ b/monomial/detratio_monomial.c
@@ -31,8 +31,8 @@
 #include "linalg_eo.h"
 #include "deriv_Sb.h"
 #include "deriv_Sb_D_psi.h"
-#include "tm_operators.h"
-#include "Hopping_Matrix.h"
+#include "operator/tm_operators.h"
+#include "operator/Hopping_Matrix.h"
 #include "solver/chrono_guess.h"
 #include "solver/solver.h"
 #include "read_input.h"
diff --git a/monomial/monomial.c b/monomial/monomial.c
index 7029344b0..0be88e108 100644
--- a/monomial/monomial.c
+++ b/monomial/monomial.c
@@ -31,9 +31,9 @@
 #include "su3.h"
 #include "su3adj.h"
 #include "su3spinor.h"
-#include "tm_operators.h"
-#include "clovertm_operators.h"
-#include "clover_leaf.h"
+#include "operator/tm_operators.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
 #include "ranlxd.h"
 #include "sse.h"
 #include "linalg_eo.h"
diff --git a/monomial/nddetratio_monomial.c b/monomial/nddetratio_monomial.c
index 5264f3521..a5fbc1d9c 100644
--- a/monomial/nddetratio_monomial.c
+++ b/monomial/nddetratio_monomial.c
@@ -32,14 +32,14 @@
 #include "start.h"
 #include "solver/solver.h"
 #include "deriv_Sb.h"
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "chebyshev_polynomial.h"
-#include "tm_operators_nd.h"
-#include "Hopping_Matrix.h"
+#include "operator/tm_operators_nd.h"
+#include "operator/Hopping_Matrix.h"
 #include "phmc.h"
 #include "boundary.h"
 #include "gamma.h"
-#include "tm_operators_nd.h"
+#include "operator/tm_operators_nd.h"
 #include "chebyshev_polynomial_nd.h"
 #include "Ptilde_nd.h"
 #include "reweighting_factor_nd.h"
diff --git a/monomial/ndpoly_monomial.c b/monomial/ndpoly_monomial.c
index 84d054742..9868e4564 100644
--- a/monomial/ndpoly_monomial.c
+++ b/monomial/ndpoly_monomial.c
@@ -31,12 +31,12 @@
 #include "start.h"
 #include "solver/solver.h"
 #include "deriv_Sb.h"
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "chebyshev_polynomial.h"
-#include "tm_operators_nd.h"
-#include "Hopping_Matrix.h"
+#include "operator/tm_operators_nd.h"
+#include "operator/Hopping_Matrix.h"
 #include "phmc.h"
-#include "tm_operators_nd.h"
+#include "operator/tm_operators_nd.h"
 #include "chebyshev_polynomial_nd.h"
 #include "Ptilde_nd.h"
 #include "reweighting_factor_nd.h"
@@ -46,8 +46,8 @@
 #include "phmc.h"
 #include "init_chi_spinor_field.h"
 #include "solver/matrix_mult_typedef_nd.h"
-#include "clover_leaf.h"
-#include "clovertm_operators.h"
+#include "operator/clover_leaf.h"
+#include "operator/clovertm_operators.h"
 #include "ndpoly_monomial.h"
 
 extern int phmc_exact_poly;
diff --git a/monomial/poly_monomial.c b/monomial/poly_monomial.c
index 0b3dd5d90..22aaa7b4c 100644
--- a/monomial/poly_monomial.c
+++ b/monomial/poly_monomial.c
@@ -42,12 +42,12 @@
 #include "linalg/diff.h"
 #include "linalg_eo.h"
 #include "deriv_Sb.h"
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "solver/solver.h"
 #include "solver/chrono_guess.h"
 #include "solver/eigenvalues.h"
-#include "tm_operators_nd.h"
-#include "Hopping_Matrix.h"
+#include "operator/tm_operators_nd.h"
+#include "operator/Hopping_Matrix.h"
 #include "hamiltonian_field.h"
 #include "phmc.h"
 
diff --git a/operator.c b/operator.c
index ee5fbb33e..6797f2802 100644
--- a/operator.c
+++ b/operator.c
@@ -34,12 +34,12 @@
 #include "default_input_values.h"
 #include "read_input.h"
 #include "su3.h"
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "linalg_eo.h"
-#include "D_psi.h"
-#include "Dov_psi.h"
-#include "tm_operators_nd.h"
-#include "Hopping_Matrix.h"
+#include "operator/D_psi.h"
+#include "operator/Dov_psi.h"
+#include "operator/tm_operators_nd.h"
+#include "operator/Hopping_Matrix.h"
 #include "invert_eo.h"
 #include "invert_doublet_eo.h"
 #include "invert_overlap.h"
@@ -55,8 +55,8 @@
 #include <io/utils.h>
 #include "test/overlaptests.h"
 #include "solver/index_jd.h"
-#include "clovertm_operators.h"
-#include "clover_leaf.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
 #include "operator.h"
 #include "gettime.h"
 
diff --git a/D_psi.c b/operator/D_psi.c
similarity index 99%
rename from D_psi.c
rename to operator/D_psi.c
index 0a36d2e56..532df15d3 100644
--- a/D_psi.c
+++ b/operator/D_psi.c
@@ -41,11 +41,11 @@
 #include "sse.h"
 #include "boundary.h"
 #ifdef MPI
-# include "xchange_lexicfield.h"
+# include "xchange/xchange.h"
 #endif
 #include "update_backward_gauge.h"
 #include "block.h"
-#include "D_psi.h"
+#include "operator/D_psi.h"
 #include "solver/dirac_operator_eigenvectors.h"
 
 static spinor tmpr;
diff --git a/D_psi.h b/operator/D_psi.h
similarity index 100%
rename from D_psi.h
rename to operator/D_psi.h
diff --git a/Dov_proj.c b/operator/Dov_proj.c
similarity index 100%
rename from Dov_proj.c
rename to operator/Dov_proj.c
diff --git a/Dov_proj.h b/operator/Dov_proj.h
similarity index 100%
rename from Dov_proj.h
rename to operator/Dov_proj.h
diff --git a/Dov_psi.c b/operator/Dov_psi.c
similarity index 100%
rename from Dov_psi.c
rename to operator/Dov_psi.c
diff --git a/Dov_psi.h b/operator/Dov_psi.h
similarity index 100%
rename from Dov_psi.h
rename to operator/Dov_psi.h
diff --git a/Hopping_Matrix.c b/operator/Hopping_Matrix.c
similarity index 99%
rename from Hopping_Matrix.c
rename to operator/Hopping_Matrix.c
index 4ad0ef4c3..0e96635f0 100644
--- a/Hopping_Matrix.c
+++ b/operator/Hopping_Matrix.c
@@ -65,7 +65,7 @@
 #ifdef BGQ
 #  include"DirectPut.h"
 #endif
-#include "Hopping_Matrix.h"
+#include "operator/Hopping_Matrix.h"
 
 #if defined _USE_HALFSPINOR
 #  include "operator/halfspinor_hopping.h"
diff --git a/Hopping_Matrix.h b/operator/Hopping_Matrix.h
similarity index 100%
rename from Hopping_Matrix.h
rename to operator/Hopping_Matrix.h
diff --git a/Hopping_Matrix_nocom.c b/operator/Hopping_Matrix_nocom.c
similarity index 97%
rename from Hopping_Matrix_nocom.c
rename to operator/Hopping_Matrix_nocom.c
index 8db219718..028a26630 100644
--- a/Hopping_Matrix_nocom.c
+++ b/operator/Hopping_Matrix_nocom.c
@@ -45,7 +45,7 @@
 #include "su3.h"
 #include "sse.h"
 #include "boundary.h"
-#include "Hopping_Matrix.h"
+#include "operator/Hopping_Matrix.h"
 
 #define Hopping_Matrix Hopping_Matrix_nocom
 #define _NO_COMM 1
diff --git a/Hopping_Matrix_nocom.h b/operator/Hopping_Matrix_nocom.h
similarity index 100%
rename from Hopping_Matrix_nocom.h
rename to operator/Hopping_Matrix_nocom.h
diff --git a/operator/Makefile.in b/operator/Makefile.in
new file mode 100644
index 000000000..95efb81cc
--- /dev/null
+++ b/operator/Makefile.in
@@ -0,0 +1,97 @@
+
+srcdir = @srcdir@
+top_builddir =  @top_builddir@
+abs_top_builddir = @abs_top_builddir@
+top_srcdir = @top_srcdir@
+abs_top_srcdir = @abs_top_srcdir@
+subdir = linalg
+builddir = @builddir@
+
+CFLAGS = @CFLAGS@
+DEPFLAGS = @DEPFLAGS@
+LDFLAGS = @LDFLAGS@
+DEFS = @DEFS@
+OPTARGS = @OPTARGS@
+SOPTARGS = @SOPTARGS@
+
+AR = @AR@
+RANLIB = @RANLIB@
+CC = @CC@
+CCDEP = @CCDEP@
+CCLD = ${CC}
+LINK = ${CCLD} ${CFLAGS} ${LDFLAGS} ${OPTARGS} -o $@
+LEX = @LEX@
+AUTOCONF = @AUTOCONF@
+DEFS = @DEFS@
+
+INCLUDES = @INCLUDES@
+LDADD =
+#COMPILE = ${CC} ${DEFS} ${INCLUDES} ${CFLAGS}
+COMPILE = ${CC} $(DEFS) ${INCLUDES} ${CFLAGS}
+
+LIBRARIES = liboperator
+liboperator_TARGETS = clovertm_operators clover_leaf tm_operators_nd 
+
+liboperator_STARGETS = Hopping_Matrix_nocom tm_times_Hopping_Matrix Hopping_Matrix \
+	tm_operators tm_sub_Hopping_Matrix D_psi Dov_psi Dov_proj
+
+liboperator_OBJECTS = $(addsuffix .o, ${liboperator_TARGETS})
+liboperator_SOBJECTS = $(addsuffix .o, ${liboperator_STARGETS})
+
+# default rule
+
+all: Makefile dep liboperator.a
+
+# rules for debugging
+debug all-debug: CFLAGS := $(CFLAGS) @DEBUG_FLAG@
+debug all-debug: all
+
+# rules for profiling information
+profile all-profile: CFLAGS := $(filter-out -fomit-frame-pointer,${CFLAGS}) @PROFILE_FLAG@
+profile all-profile: all
+
+
+#include dep rules
+
+-include $(addsuffix .d,${liboperator_TARGETS})
+
+include ${top_srcdir}/Makefile.global
+
+# rule to compile objects
+
+${liboperator_OBJECTS}: %.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/config.h
+	$(COMPILE) ${OPTARGS} -c $<
+
+${liboperator_SOBJECTS}: %.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/config.h
+	$(COMPILE) ${SOPTARGS} -c $<
+
+# rule to make liboperator
+
+liboperator.a: ${liboperator_OBJECTS} ${liboperator_SOBJECTS} Makefile
+	@rm -f liboperator.a
+	@${AR} cru liboperator.a ${liboperator_OBJECTS} ${liboperator_SOBJECTS}
+	@$(RANLIB) liboperator.a
+	@cp liboperator.a ../lib/liboperator.a
+
+# rule to generate .d files
+
+$(addsuffix .d, $(liboperator_TARGETS) ${liboperator_STARGETS}): %.d: ${srcdir}/%.c Makefile
+	@${CCDEP} ${DEPFLAGS} ${INCLUDES} $< > $@
+
+# rule to make dependencies
+
+dep: ${addsuffix .d, ${liboperator_TARGETS} ${liboperator_STARGETS}}
+
+# rules to clean
+
+compile-clean: Makefile
+	rm -f ${$(addsuffix _OBJECTS, ${LIBRARIES})} ${$(addsuffix _SOBJECTS, ${LIBRARIES})} *.d
+
+clean: compile-clean 
+	rm -f $(addsuffix .a, ${LIBRARIES})
+	rm -f ../lib/liboperator.a
+
+distclean: clean
+	rm -f Makefile
+
+.PHONY: all dep clean compile-clean distclean profile all-profile debug all-debug
diff --git a/clover_leaf.c b/operator/clover_leaf.c
similarity index 99%
rename from clover_leaf.c
rename to operator/clover_leaf.c
index 677dc71fb..5176d579d 100644
--- a/clover_leaf.c
+++ b/operator/clover_leaf.c
@@ -49,8 +49,8 @@
 #include "su3.h"
 #include "sse.h"
 #include "su3adj.h"
-#include "clovertm_operators.h"
-#include "clover_leaf.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
 
 const double tiny_t = 1.0e-20;
 
diff --git a/clover_leaf.h b/operator/clover_leaf.h
similarity index 100%
rename from clover_leaf.h
rename to operator/clover_leaf.h
diff --git a/clovertm_operators.c b/operator/clovertm_operators.c
similarity index 99%
rename from clovertm_operators.c
rename to operator/clovertm_operators.c
index 38c750cf9..2a2164d01 100644
--- a/clovertm_operators.c
+++ b/operator/clovertm_operators.c
@@ -35,9 +35,9 @@
 #include "su3.h"
 #include "sse.h"
 #include "linalg_eo.h"
-#include "Hopping_Matrix.h"
+#include "operator/Hopping_Matrix.h"
 #include "tm_operators.h"
-#include "clovertm_operators.h"
+#include "operator/clovertm_operators.h"
 
 
 su3 *** sw;
diff --git a/clovertm_operators.h b/operator/clovertm_operators.h
similarity index 100%
rename from clovertm_operators.h
rename to operator/clovertm_operators.h
diff --git a/tm_operators.c b/operator/tm_operators.c
similarity index 99%
rename from tm_operators.c
rename to operator/tm_operators.c
index 78af0615d..665824f4f 100644
--- a/tm_operators.c
+++ b/operator/tm_operators.c
@@ -31,14 +31,14 @@
 #include <stdio.h>
 #include "global.h"
 #include "su3.h"
-#include "Hopping_Matrix.h"
-#include "Hopping_Matrix_nocom.h"
-#include "tm_times_Hopping_Matrix.h"
-#include "tm_sub_Hopping_Matrix.h"
+#include "operator/Hopping_Matrix.h"
+#include "operator/Hopping_Matrix_nocom.h"
+#include "operator/tm_times_Hopping_Matrix.h"
+#include "operator/tm_sub_Hopping_Matrix.h"
 #include "sse.h"
 #include "linalg_eo.h"
 #include "gamma.h"
-#include "D_psi.h"
+#include "operator/D_psi.h"
 #ifdef BGL
 #  include "bgl.h"
 #endif
diff --git a/tm_operators.h b/operator/tm_operators.h
similarity index 100%
rename from tm_operators.h
rename to operator/tm_operators.h
diff --git a/tm_operators_nd.c b/operator/tm_operators_nd.c
similarity index 99%
rename from tm_operators_nd.c
rename to operator/tm_operators_nd.c
index 90b804d35..e170022f9 100644
--- a/tm_operators_nd.c
+++ b/operator/tm_operators_nd.c
@@ -33,13 +33,13 @@
 #include <math.h>
 #include "global.h"
 #include "su3.h"
-#include "Hopping_Matrix.h"
+#include "operator/Hopping_Matrix.h"
 #include "phmc.h"
 #include "gamma.h"
 #include "linalg_eo.h"
-#include "tm_operators.h"
-#include "clovertm_operators.h"
-#include "tm_operators_nd.h"
+#include "operator/tm_operators.h"
+#include "operator/clovertm_operators.h"
+#include "operator/tm_operators_nd.h"
 
 
 void mul_one_pm_iconst(spinor * const l, spinor * const k, 
diff --git a/tm_operators_nd.h b/operator/tm_operators_nd.h
similarity index 100%
rename from tm_operators_nd.h
rename to operator/tm_operators_nd.h
diff --git a/tm_sub_Hopping_Matrix.c b/operator/tm_sub_Hopping_Matrix.c
similarity index 100%
rename from tm_sub_Hopping_Matrix.c
rename to operator/tm_sub_Hopping_Matrix.c
diff --git a/tm_sub_Hopping_Matrix.h b/operator/tm_sub_Hopping_Matrix.h
similarity index 100%
rename from tm_sub_Hopping_Matrix.h
rename to operator/tm_sub_Hopping_Matrix.h
diff --git a/tm_times_Hopping_Matrix.c b/operator/tm_times_Hopping_Matrix.c
similarity index 100%
rename from tm_times_Hopping_Matrix.c
rename to operator/tm_times_Hopping_Matrix.c
diff --git a/tm_times_Hopping_Matrix.h b/operator/tm_times_Hopping_Matrix.h
similarity index 100%
rename from tm_times_Hopping_Matrix.h
rename to operator/tm_times_Hopping_Matrix.h
diff --git a/phmc.c b/phmc.c
index 3e5ae9690..c5b19bebd 100644
--- a/phmc.c
+++ b/phmc.c
@@ -34,7 +34,7 @@
 #include "init_chi_spinor_field.h"
 #include "chebyshev_polynomial_nd.h"
 #include "Ptilde_nd.h"
-#include "tm_operators_nd.h"
+#include "operator/tm_operators_nd.h"
 #include "phmc.h"
 #include "monomial/monomial.h"
 #include "solver/matrix_mult_typedef_bi.h"
diff --git a/prepare_source.c b/prepare_source.c
index 9e0548c04..b7ff50207 100644
--- a/prepare_source.c
+++ b/prepare_source.c
@@ -40,7 +40,7 @@
 #include "su3.h"
 #include "operator.h"
 #include "linalg_eo.h"
-#include "tm_operators_nd.h"
+#include "operator/tm_operators_nd.h"
 #include "source_generation.h"
 #include "prepare_source.h"
 
diff --git a/reweighting_factor_nd.c b/reweighting_factor_nd.c
index f28a08f5c..48ffc699c 100644
--- a/reweighting_factor_nd.c
+++ b/reweighting_factor_nd.c
@@ -26,8 +26,8 @@
 #include "global.h"
 #include "linalg_eo.h"
 #include "start.h"
-#include "tm_operators.h"
-#include "tm_operators_nd.h"
+#include "operator/tm_operators.h"
+#include "operator/tm_operators_nd.h"
 #include "Ptilde_nd.h"
 #include "phmc.h"
 #include "reweighting_factor_nd.h"
diff --git a/solver/Msap.c b/solver/Msap.c
index 5b8e21115..3d9e45ede 100644
--- a/solver/Msap.c
+++ b/solver/Msap.c
@@ -33,7 +33,7 @@
 #include "gmres.h"
 #include "solver.h"
 #include "block.h"
-#include "Hopping_Matrix.h"
+#include "operator/Hopping_Matrix.h"
 #include "solver_field.h"
 #include "D_psi.h"
 
diff --git a/solver/dfl_projector.c b/solver/dfl_projector.c
index 80e1c07ed..40bc83593 100644
--- a/solver/dfl_projector.c
+++ b/solver/dfl_projector.c
@@ -34,7 +34,7 @@
 #include "block.h"
 #include "linalg/blas.h"
 #include "D_psi.h"
-#include "Hopping_Matrix.h"
+#include "operator/Hopping_Matrix.h"
 #include "little_D.h"
 #include "block.h"
 #include "linalg_eo.h"
diff --git a/solver/eigenvalues_bi.c b/solver/eigenvalues_bi.c
index e2d387bde..e7208de1e 100644
--- a/solver/eigenvalues_bi.c
+++ b/solver/eigenvalues_bi.c
@@ -52,7 +52,7 @@
 #include "solver/jdher_bi.h"
 #include "solver/matrix_mult_typedef_bi.h"
 #include "eigenvalues_bi.h"
-#include "tm_operators_nd.h"
+#include "operator/tm_operators_nd.h"
 
 
 double eigenvalues_bi(int * nr_of_eigenvalues,  
diff --git a/solver/index_jd.c b/solver/index_jd.c
index 2549a9995..a0a53691b 100644
--- a/solver/index_jd.c
+++ b/solver/index_jd.c
@@ -24,7 +24,7 @@
 #include "solver/solver.h"
 #include "solver/jdher.h"
 #include "solver/eigenvalues.h"
-#include "Dov_proj.h"
+#include "operator/Dov_proj.h"
 #include "gamma.h"
 #include "index_jd.h"
 
diff --git a/update_momenta.c b/update_momenta.c
index 1ed29e7d3..bcb572797 100644
--- a/update_momenta.c
+++ b/update_momenta.c
@@ -32,7 +32,7 @@
 #include "su3spinor.h"
 #include "monomial/monomial.h"
 #include "xchange/xchange.h"
-#include "clover_leaf.h"
+#include "operator/clover_leaf.h"
 #include "read_input.h"
 #include "hamiltonian_field.h"
 #include "update_momenta.h"
diff --git a/update_tm.c b/update_tm.c
index 5cdf0a606..02f4f1cbe 100644
--- a/update_tm.c
+++ b/update_tm.c
@@ -44,7 +44,7 @@
 #include "global.h"
 #include "start.h"
 #include "sighandler.h"
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "linalg_eo.h"
 #include "io/gauge.h"
 #include "io/params.h"

From 50824021b96f9afeed3b46d7dc575ae77581de9e Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 19 Oct 2012 11:06:01 +0200
Subject: [PATCH 071/110] overseen path adjustments added

---
 solver/Msap.c                        | 4 ++--
 solver/dfl_projector.c               | 4 ++--
 solver/dirac_operator_eigenvectors.c | 6 +++---
 solver/eigenvalues_bi.c              | 2 +-
 solver/fgmres.c                      | 2 +-
 solver/gcr.c                         | 4 ++--
 solver/poly_precon.c                 | 4 ++--
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/solver/Msap.c b/solver/Msap.c
index 3d9e45ede..77a58427b 100644
--- a/solver/Msap.c
+++ b/solver/Msap.c
@@ -28,14 +28,14 @@
 #include "su3.h"
 #include "start.h"
 #include "linalg_eo.h"
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "boundary.h"
 #include "gmres.h"
 #include "solver.h"
 #include "block.h"
 #include "operator/Hopping_Matrix.h"
 #include "solver_field.h"
-#include "D_psi.h"
+#include "operator/D_psi.h"
 
 void dummy_Di(spinor * const P, spinor * const Q, const int i) {
   Block_D_psi(&block_list[i], P, Q);
diff --git a/solver/dfl_projector.c b/solver/dfl_projector.c
index 40bc83593..cbdad2e32 100644
--- a/solver/dfl_projector.c
+++ b/solver/dfl_projector.c
@@ -33,14 +33,14 @@
 #include <complex.h>
 #include "block.h"
 #include "linalg/blas.h"
-#include "D_psi.h"
+#include "operator/D_psi.h"
 #include "operator/Hopping_Matrix.h"
 #include "little_D.h"
 #include "block.h"
 #include "linalg_eo.h"
 #include "gcr4complex.h"
 #include "generate_dfl_subspace.h"
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "boundary.h"
 #include "Msap.h"
 #include "mr.h"
diff --git a/solver/dirac_operator_eigenvectors.c b/solver/dirac_operator_eigenvectors.c
index bdf8d9943..008e58444 100644
--- a/solver/dirac_operator_eigenvectors.c
+++ b/solver/dirac_operator_eigenvectors.c
@@ -27,10 +27,10 @@
 #include "linalg/lapack.h"
 #include "linalg/blas.h"
 #include "operator.h"
-#include "tm_operators.h"
-#include "D_psi.h"
+#include "operator/tm_operators.h"
+#include "operator/D_psi.h"
 #include "ranlxd.h"
-#include "Dov_psi.h"
+#include "operator/Dov_psi.h"
 #include "init_spinor_field.h"
 
 /*   typedef enum tm_operator_ {PRECWS_DTM,PRECWS_QTM,PRECWS_D_DAGGER_D} tm_operator; */
diff --git a/solver/eigenvalues_bi.c b/solver/eigenvalues_bi.c
index e7208de1e..68f0c15af 100644
--- a/solver/eigenvalues_bi.c
+++ b/solver/eigenvalues_bi.c
@@ -47,7 +47,7 @@
 #include "su3.h"
 #include "linalg_eo.h"
 #include "start.h"
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "solver/solver.h"
 #include "solver/jdher_bi.h"
 #include "solver/matrix_mult_typedef_bi.h"
diff --git a/solver/fgmres.c b/solver/fgmres.c
index 58314849d..283ff0f80 100644
--- a/solver/fgmres.c
+++ b/solver/fgmres.c
@@ -44,7 +44,7 @@
 #include"su3.h"
 #include"linalg_eo.h"
 #include"gmres_precon.h"
-#include"tm_operators.h"
+#include"operator/tm_operators.h"
 #include"sub_low_ev.h"
 #include"poly_precon.h"
 #include "Msap.h"
diff --git a/solver/gcr.c b/solver/gcr.c
index 0ec2db06d..9fe4d990f 100644
--- a/solver/gcr.c
+++ b/solver/gcr.c
@@ -28,10 +28,10 @@
 #include"linalg_eo.h"
 #include"solver/gmres_precon.h"
 #include"start.h"
-#include"tm_operators.h"
+#include"operator/tm_operators.h"
 #include"solver/poly_precon.h"
 #include"solver/cg_her.h"
-#include"D_psi.h"
+#include"operator/D_psi.h"
 #include"Msap.h"
 #include"dfl_projector.h"
 #include "solver_field.h"
diff --git a/solver/poly_precon.c b/solver/poly_precon.c
index b55919c56..249278217 100644
--- a/solver/poly_precon.c
+++ b/solver/poly_precon.c
@@ -27,9 +27,9 @@
 #include "su3.h"
 #include "start.h"
 #include "linalg_eo.h"
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "boundary.h"
-#include "D_psi.h"
+#include "operator/D_psi.h"
 #include "poly_precon.h"
 
 

From 0bfd116c663d451c921807dd45531bfa16cf7c0b Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 19 Oct 2012 11:25:20 +0200
Subject: [PATCH 072/110] typos in Makefiles.in's corrected and distributed
 clover into several files

---
 monomial/Makefile.in               |    2 +-
 operator/Makefile.in               |    5 +-
 operator/clover_accumulate_deriv.c |  207 ++++++
 operator/clover_deriv.c            |  319 ++++++++
 operator/clover_det.c              |  277 +++++++
 operator/clover_invert.c           |  318 ++++++++
 operator/clover_leaf.c             | 1097 ----------------------------
 operator/clover_leaf.h             |   54 ++
 operator/clover_term.c             |  204 ++++++
 solver/eigenvalues.c               |    4 +-
 xchange/Makefile.in                |    2 +-
 11 files changed, 1386 insertions(+), 1103 deletions(-)
 create mode 100644 operator/clover_accumulate_deriv.c
 create mode 100644 operator/clover_deriv.c
 create mode 100644 operator/clover_det.c
 create mode 100644 operator/clover_invert.c
 create mode 100644 operator/clover_term.c

diff --git a/monomial/Makefile.in b/monomial/Makefile.in
index e2007ce6d..cc39c90f2 100644
--- a/monomial/Makefile.in
+++ b/monomial/Makefile.in
@@ -4,7 +4,7 @@ top_builddir =  @top_builddir@
 abs_top_builddir = @abs_top_builddir@
 top_srcdir = @top_srcdir@
 abs_top_srcdir = @abs_top_srcdir@
-subdir = linalg
+subdir = monomial
 builddir = @builddir@
 
 CFLAGS = @CFLAGS@
diff --git a/operator/Makefile.in b/operator/Makefile.in
index 95efb81cc..a6a1be83f 100644
--- a/operator/Makefile.in
+++ b/operator/Makefile.in
@@ -4,7 +4,7 @@ top_builddir =  @top_builddir@
 abs_top_builddir = @abs_top_builddir@
 top_srcdir = @top_srcdir@
 abs_top_srcdir = @abs_top_srcdir@
-subdir = linalg
+subdir = operator
 builddir = @builddir@
 
 CFLAGS = @CFLAGS@
@@ -30,7 +30,8 @@ LDADD =
 COMPILE = ${CC} $(DEFS) ${INCLUDES} ${CFLAGS}
 
 LIBRARIES = liboperator
-liboperator_TARGETS = clovertm_operators clover_leaf tm_operators_nd 
+liboperator_TARGETS = clovertm_operators clover_leaf tm_operators_nd clover_term clover_invert \
+	clover_deriv clover_accumulate_deriv clover_det
 
 liboperator_STARGETS = Hopping_Matrix_nocom tm_times_Hopping_Matrix Hopping_Matrix \
 	tm_operators tm_sub_Hopping_Matrix D_psi Dov_psi Dov_proj
diff --git a/operator/clover_accumulate_deriv.c b/operator/clover_accumulate_deriv.c
new file mode 100644
index 000000000..05eba8f75
--- /dev/null
+++ b/operator/clover_accumulate_deriv.c
@@ -0,0 +1,207 @@
+/***********************************************************************
+ *
+ * Copyright (C) 1995 Ulli Wolff, Stefan Sint
+ *               2001,2005 Martin Hasenbusch
+ *               2011,2012 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#ifdef SSE
+# undef SSE
+#endif
+#ifdef SSE2
+# undef SSE2
+#endif
+#ifdef SSE3
+# undef SSE3
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <errno.h>
+#include <time.h>
+#ifdef MPI
+# include <mpi.h>
+#endif
+#ifdef OMP
+# include <omp.h>
+#endif
+#include "global.h"
+#include "su3.h"
+#include "sse.h"
+#include "su3adj.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
+
+// now we sum up all term from the clover term
+// after sw_spinor and sw_deriv have been called
+
+void sw_all(hamiltonian_field_t * const hf, const double kappa, 
+	    const double c_sw) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+
+  int k,l;
+  int x,xpk,xpl,xmk,xml,xpkml,xplmk,xmkml;
+  const su3 *w1,*w2,*w3,*w4;
+  double ka_csw_8 = kappa*c_sw/8.;
+  su3 ALIGN v1,v2,vv1,vv2,plaq;
+  su3 ALIGN vis[4][4];
+
+#ifdef OMP
+#pragma omp for
+#endif
+  for(x = 0; x < VOLUME; x++) {
+    _minus_itimes_su3_plus_su3(vis[0][1],swm[x][1],swm[x][3]);
+    _su3_minus_su3(vis[0][2],swm[x][1],swm[x][3]);
+    _itimes_su3_minus_su3(vis[0][3],swm[x][2],swm[x][0]);
+    
+    _minus_itimes_su3_plus_su3(vis[2][3],swp[x][1],swp[x][3]);
+    _su3_minus_su3(vis[1][3],swp[x][3],swp[x][1]);
+    _itimes_su3_minus_su3(vis[1][2],swp[x][2],swp[x][0]);
+
+    // project to the traceless anti-hermitian part
+    _su3_dagger(v1,vis[0][1]); 
+    _su3_minus_su3(vis[0][1],vis[0][1],v1);
+    _su3_dagger(v1,vis[0][2]); 
+    _su3_minus_su3(vis[0][2],vis[0][2],v1);
+    _su3_dagger(v1,vis[0][3]); 
+    _su3_minus_su3(vis[0][3],vis[0][3],v1);
+    _su3_dagger(v1,vis[2][3]); 
+    _su3_minus_su3(vis[2][3],vis[2][3],v1);
+    _su3_dagger(v1,vis[1][3]); 
+    _su3_minus_su3(vis[1][3],vis[1][3],v1);
+    _su3_dagger(v1,vis[1][2]); 
+    _su3_minus_su3(vis[1][2],vis[1][2],v1);
+    
+    for(k = 0; k < 4; k++) {
+      for(l = k+1; l < 4; l++) {
+	xpk=g_iup[x][k];
+	xpl=g_iup[x][l];
+	xmk=g_idn[x][k];
+	xml=g_idn[x][l];
+	xpkml=g_idn[xpk][l];
+	xplmk=g_idn[xpl][k];
+	xmkml=g_idn[xml][k];
+	w1=&hf->gaugefield[x][k];
+	w2=&hf->gaugefield[xpk][l];
+	w3=&hf->gaugefield[xpl][k];   /*dag*/
+	w4=&hf->gaugefield[x][l];     /*dag*/
+	
+	_su3_times_su3(v1,*w1,*w2);
+	_su3_times_su3(v2,*w4,*w3);
+	_su3_times_su3d(plaq,v1,v2);
+	
+	_su3_times_su3(vv1,plaq,vis[k][l]);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[x][k], -2.*ka_csw_8, vv1);
+
+	_su3d_times_su3(vv2,*w1,vv1); 
+	_su3_times_su3(vv1,vv2,*w1);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xpk][l], -2.*ka_csw_8, vv1);
+	
+	_su3_times_su3(vv2,vis[k][l],plaq); 
+	_su3_dagger(vv1,vv2);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[x][l], -2.*ka_csw_8, vv1);
+
+	_su3d_times_su3(vv2,*w4,vv1); 
+	_su3_times_su3(vv1,vv2,*w4);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xpl][k], -2.*ka_csw_8, vv1);
+	
+	w1=&hf->gaugefield[x][l];
+	w2=&hf->gaugefield[xplmk][k];   /*dag*/
+	w3=&hf->gaugefield[xmk][l];     /*dag*/
+	w4=&hf->gaugefield[xmk][k];
+	_su3_times_su3d(v1,*w1,*w2);
+	_su3d_times_su3(v2,*w3,*w4);
+	_su3_times_su3(plaq,v1,v2);
+	
+	_su3_times_su3(vv1,plaq,vis[k][l]);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[x][l], -2.*ka_csw_8, vv1);
+	
+	_su3_dagger(vv1,v1); 
+	_su3_times_su3d(vv2,vv1,vis[k][l]);
+	_su3_times_su3d(vv1,vv2,v2);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xplmk][k], -2.*ka_csw_8, vv1);
+
+	_su3_times_su3(vv2,*w3,vv1); 
+	_su3_times_su3d(vv1,vv2,*w3);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xmk][l], -2.*ka_csw_8, vv1);
+
+	_su3_dagger(vv2,vv1);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xmk][k], -2.*ka_csw_8, vv2);
+	
+	w1=&hf->gaugefield[xmk][k];   /*dag*/
+	w2=&hf->gaugefield[xmkml][l]; /*dag*/
+	w3=&hf->gaugefield[xmkml][k];
+	w4=&hf->gaugefield[xml][l];
+	_su3_times_su3(v1,*w2,*w1);
+	_su3_times_su3(v2,*w3,*w4);
+	
+	_su3_times_su3d(vv1,*w1,vis[k][l]);
+	_su3_times_su3d(vv2,vv1,v2);
+	_su3_times_su3(vv1,vv2,*w2);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xmk][k], -2.*ka_csw_8, vv1);
+
+	_su3_times_su3(vv2,*w2,vv1); 
+	_su3_times_su3d(vv1,vv2,*w2);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xmkml][l], -2.*ka_csw_8, vv1);
+
+	_su3_dagger(vv2,vv1);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xmkml][k], -2.*ka_csw_8, vv2);
+
+	_su3d_times_su3(vv1,*w3,vv2); 
+	_su3_times_su3(vv2,vv1,*w3);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xml][l], -2.*ka_csw_8, vv2);
+	
+	w1=&hf->gaugefield[xml][l];   /*dag*/
+	w2=&hf->gaugefield[xml][k];
+	w3=&hf->gaugefield[xpkml][l];
+	w4=&hf->gaugefield[x][k];     /*dag*/
+	_su3d_times_su3(v1,*w1,*w2);
+	_su3_times_su3d(v2,*w3,*w4);
+	
+	_su3_times_su3d(vv1,*w1,vis[k][l]);
+	_su3_times_su3d(vv2,vv1,v2);
+	_su3_times_su3d(vv1,vv2,*w2);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xml][l], -2.*ka_csw_8, vv1);
+	
+	_su3_dagger(vv2,vv1);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xml][k], -2.*ka_csw_8, vv2);
+
+	_su3d_times_su3(vv1,*w2,vv2); 
+	_su3_times_su3(vv2,vv1,*w2);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xpkml][l], -2.*ka_csw_8, vv2);
+
+	_su3_dagger(vv2,v2);  
+	_su3_times_su3d(vv1,vv2,v1);
+	_su3_times_su3d(vv2,vv1,vis[k][l]);
+ 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[x][k], -2.*ka_csw_8, vv2);
+      }
+    }
+  }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+  return;
+}
diff --git a/operator/clover_deriv.c b/operator/clover_deriv.c
new file mode 100644
index 000000000..5db20b46f
--- /dev/null
+++ b/operator/clover_deriv.c
@@ -0,0 +1,319 @@
+/***********************************************************************
+ *
+ * Copyright (C) 1995 Ulli Wolff, Stefan Sint
+ *               2001,2005 Martin Hasenbusch
+ *               2011,2012 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#ifdef SSE
+# undef SSE
+#endif
+#ifdef SSE2
+# undef SSE2
+#endif
+#ifdef SSE3
+# undef SSE3
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <errno.h>
+#include <time.h>
+#ifdef MPI
+# include <mpi.h>
+#endif
+#ifdef OMP
+# include <omp.h>
+#endif
+#include "global.h"
+#include "su3.h"
+#include "sse.h"
+#include "su3adj.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
+
+// this is (-tr(1+T_ee(+mu)) -tr(1+T_ee(-mu)))      
+// (or T_oo of course)
+// 
+// see equation (24) of hep-lat/9603008             
+//
+// or in more detail the insertion matrix at even sites
+// is computed
+// and stored in swm and swp, which are 4 su3 matrices 
+// each per site
+// refereing to upwards or downwards winding paths  
+//
+// swm and swp are representing 6x6 complex matrices
+// (colour matrices)
+//
+// this function depends on mu
+
+void sw_deriv(const int ieo, const double mu) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+  int icy;
+  int ioff;
+  int x;
+  double fac = 1.0000;
+  su3 ALIGN lswp[4], lswm[4];
+
+  /* convention: Tr clover-leaf times insertion */
+  if(ieo == 0) {
+    ioff=0;
+  } 
+  else {
+    ioff = (VOLUME+RAND)/2;
+  }
+  if(fabs(mu) > 0.) fac = 0.5;
+
+#ifndef OMP
+  icy = 0;
+#endif
+
+#ifdef OMP
+#pragma omp for
+#endif
+  for(int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+#ifdef OMP
+    icy = icx - ioff;
+#endif
+    x = g_eo2lexic[icx];
+    /* compute the insertion matrix */
+    _su3_plus_su3(lswp[0], sw_inv[icy][0][1], sw_inv[icy][0][0]);
+    _su3_plus_su3(lswp[1], sw_inv[icy][1][1], sw_inv[icy][1][0]);
+    _su3_plus_su3(lswp[2], sw_inv[icy][2][1], sw_inv[icy][2][0]);
+    _su3_plus_su3(lswp[3], sw_inv[icy][3][1], sw_inv[icy][3][0]);
+
+    _su3_minus_su3(lswm[0], sw_inv[icy][0][1], sw_inv[icy][0][0]);
+    _su3_minus_su3(lswm[1], sw_inv[icy][1][1], sw_inv[icy][1][0]);
+    _su3_minus_su3(lswm[2], sw_inv[icy][2][1], sw_inv[icy][2][0]);
+    _su3_minus_su3(lswm[3], sw_inv[icy][3][1], sw_inv[icy][3][0]);
+    
+    /* add up to swm[] and swp[] */
+    _su3_refac_acc(swm[x][0], fac, lswm[0]);
+    _su3_refac_acc(swm[x][1], fac, lswm[1]);
+    _su3_refac_acc(swm[x][2], fac, lswm[2]);
+    _su3_refac_acc(swm[x][3], fac, lswm[3]);
+    _su3_refac_acc(swp[x][0], fac, lswp[0]);
+    _su3_refac_acc(swp[x][1], fac, lswp[1]);
+    _su3_refac_acc(swp[x][2], fac, lswp[2]);
+    _su3_refac_acc(swp[x][3], fac, lswp[3]);
+    if(fabs(mu) > 0.) {
+      /* compute the insertion matrix */
+      _su3_plus_su3(lswp[0], sw_inv[icy+VOLUME/2][0][1], sw_inv[icy+VOLUME/2][0][0]);
+      _su3_plus_su3(lswp[1], sw_inv[icy+VOLUME/2][1][1], sw_inv[icy+VOLUME/2][1][0]);
+      _su3_plus_su3(lswp[2], sw_inv[icy+VOLUME/2][2][1], sw_inv[icy+VOLUME/2][2][0]);
+      _su3_plus_su3(lswp[3], sw_inv[icy+VOLUME/2][3][1], sw_inv[icy+VOLUME/2][3][0]); 
+
+      _su3_minus_su3(lswm[0], sw_inv[icy+VOLUME/2][0][1], sw_inv[icy+VOLUME/2][0][0]);
+      _su3_minus_su3(lswm[1], sw_inv[icy+VOLUME/2][1][1], sw_inv[icy+VOLUME/2][1][0]);
+      _su3_minus_su3(lswm[2], sw_inv[icy+VOLUME/2][2][1], sw_inv[icy+VOLUME/2][2][0]);
+      _su3_minus_su3(lswm[3], sw_inv[icy+VOLUME/2][3][1], sw_inv[icy+VOLUME/2][3][0]);
+      
+      /* add up to swm[] and swp[] */
+      _su3_refac_acc(swm[x][0], fac, lswm[0]);
+      _su3_refac_acc(swm[x][1], fac, lswm[1]);
+      _su3_refac_acc(swm[x][2], fac, lswm[2]);
+      _su3_refac_acc(swm[x][3], fac, lswm[3]);
+      _su3_refac_acc(swp[x][0], fac, lswp[0]);
+      _su3_refac_acc(swp[x][1], fac, lswp[1]);
+      _su3_refac_acc(swp[x][2], fac, lswp[2]);
+      _su3_refac_acc(swp[x][3], fac, lswp[3]);
+    }
+#ifndef OMP
+    ++icy;
+#endif
+  }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+  return;
+}
+
+void sw_deriv_nd(const int ieo) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+  int icy;
+  int ioff;
+  int x;
+  double fac = 1.0000;
+  su3 ALIGN lswp[4], lswm[4], v;
+  _Complex double ALIGN a0[6][6], a1[6][6], b[6][6], c[6][6];
+
+  /* convention: Tr clover-leaf times insertion */
+  if(ieo == 0) {
+    ioff=0;
+  } 
+  else {
+    ioff = (VOLUME+RAND)/2;
+  }
+
+#ifndef OMP
+  icy = 0;
+#endif
+
+#ifdef OMP
+#pragma omp for
+#endif
+  for(int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+#ifdef OMP
+    icy = icx - ioff;
+#endif
+    x = g_eo2lexic[icx];
+    /* compute the insertion matrix */
+    populate_6x6_matrix(b, &sw[x][0][0], 0, 0);
+    populate_6x6_matrix(b, &sw[x][1][0], 0, 3);
+    _su3_dagger(v, sw[x][1][0]); 
+    populate_6x6_matrix(b, &v, 3, 0);
+    populate_6x6_matrix(b, &sw[x][2][0], 3, 3);
+
+    populate_6x6_matrix(c, &sw_inv[icy][0][0], 0, 0);
+    populate_6x6_matrix(c, &sw_inv[icy][1][0], 0, 3);
+    populate_6x6_matrix(c, &sw_inv[icy][2][0], 3, 3);
+    populate_6x6_matrix(c, &sw_inv[icy][3][0], 3, 0);
+
+    mult_6x6(a0, b, c);
+
+    populate_6x6_matrix(b, &sw[x][0][1], 0, 0);
+    populate_6x6_matrix(b, &sw[x][1][1], 0, 3);
+    _su3_dagger(v, sw[x][1][1]); 
+    populate_6x6_matrix(b, &v, 3, 0);
+    populate_6x6_matrix(b, &sw[x][2][1], 3, 3);
+
+    populate_6x6_matrix(c, &sw_inv[icy][0][1], 0, 0);
+    populate_6x6_matrix(c, &sw_inv[icy][1][1], 0, 3);
+    populate_6x6_matrix(c, &sw_inv[icy][2][1], 3, 3);
+    populate_6x6_matrix(c, &sw_inv[icy][3][1], 3, 0);
+
+    mult_6x6(a1, b, c);
+    add_6x6(b, a1, a0);
+    get_3x3_block_matrix(&lswp[0], b, 0, 0);
+    get_3x3_block_matrix(&lswp[1], b, 0, 3);
+    get_3x3_block_matrix(&lswp[2], b, 3, 3);
+    get_3x3_block_matrix(&lswp[3], b, 3, 0);
+
+    sub_6x6(b, a1, a0);
+    get_3x3_block_matrix(&lswm[0], b, 0, 0);
+    get_3x3_block_matrix(&lswm[1], b, 0, 3);
+    get_3x3_block_matrix(&lswm[2], b, 3, 3);
+    get_3x3_block_matrix(&lswm[3], b, 3, 0);
+    
+    /* add up to swm[] and swp[] */
+    _su3_refac_acc(swm[x][0], fac, lswm[0]);
+    _su3_refac_acc(swm[x][1], fac, lswm[1]);
+    _su3_refac_acc(swm[x][2], fac, lswm[2]);
+    _su3_refac_acc(swm[x][3], fac, lswm[3]);
+    _su3_refac_acc(swp[x][0], fac, lswp[0]);
+    _su3_refac_acc(swp[x][1], fac, lswp[1]);
+    _su3_refac_acc(swp[x][2], fac, lswp[2]);
+    _su3_refac_acc(swp[x][3], fac, lswp[3]);
+#ifndef OMP
+    ++icy;
+#endif
+  }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+  return;
+}
+
+
+// direct product of Y_e(o) and X_e(o) in colour space   
+// with insertion matrix at site x
+// see equation (22) of hep-lat/9603008                  
+// result is again stored in swm and swp                 
+// includes a gamma5 multiplication for kk
+
+void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll, 
+	       const double fac) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+
+  int ioff;
+  int icx;
+  int x;
+  const spinor *r,*s;
+  su3 ALIGN v0,v1,v2,v3;
+  su3 ALIGN u0,u1,u2,u3;
+  su3 ALIGN lswp[4],lswm[4];
+
+  if(ieo == 0) {
+    ioff=0;
+  } 
+  else {
+    ioff=(VOLUME+RAND)/2;
+  }
+  /************************ loop over half of the lattice sites ***********/
+
+#ifdef OMP
+#pragma omp for
+#endif  
+  for(icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+    x = g_eo2lexic[icx];
+    r = kk + icx - ioff;
+    s = ll + icx - ioff;
+    
+    _vector_tensor_vector(v0,(*r).s0,(*s).s0);
+    _vector_tensor_vector(v1,(*r).s0,(*s).s1);
+    _vector_tensor_vector(v2,(*r).s1,(*s).s1);
+    _vector_tensor_vector(v3,(*r).s1,(*s).s0);
+    // mvector takes g5 into account
+    _mvector_tensor_vector(u0,(*r).s2,(*s).s2);
+    _mvector_tensor_vector(u1,(*r).s2,(*s).s3);
+    _mvector_tensor_vector(u2,(*r).s3,(*s).s3);
+    _mvector_tensor_vector(u3,(*r).s3,(*s).s2);
+    
+    /* compute the insertion matrix */
+    _su3_plus_su3(lswp[0],u0,v0);
+    _su3_plus_su3(lswp[1],u1,v1);
+    _su3_plus_su3(lswp[2],u2,v2);
+    _su3_plus_su3(lswp[3],u3,v3);
+
+    _su3_minus_su3(lswm[0],u0,v0);
+    _su3_minus_su3(lswm[1],u1,v1);
+    _su3_minus_su3(lswm[2],u2,v2);
+    _su3_minus_su3(lswm[3],u3,v3);
+    
+    /* add up to swm[0] and swp[0] */
+    _su3_refac_acc(swm[x][0], fac, lswm[0]);
+    _su3_refac_acc(swm[x][1], fac, lswm[1]);
+    _su3_refac_acc(swm[x][2], fac, lswm[2]);
+    _su3_refac_acc(swm[x][3], fac, lswm[3]);
+    _su3_refac_acc(swp[x][0], fac, lswp[0]);
+    _su3_refac_acc(swp[x][1], fac, lswp[1]);
+    _su3_refac_acc(swp[x][2], fac, lswp[2]);
+    _su3_refac_acc(swp[x][3], fac, lswp[3]);
+  }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+  return;
+}
+
+
diff --git a/operator/clover_det.c b/operator/clover_det.c
new file mode 100644
index 000000000..2fc276ffa
--- /dev/null
+++ b/operator/clover_det.c
@@ -0,0 +1,277 @@
+/***********************************************************************
+ *
+ * Copyright (C) 1995 Ulli Wolff, Stefan Sint
+ *               2001,2005 Martin Hasenbusch
+ *               2011,2012 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#ifdef SSE
+# undef SSE
+#endif
+#ifdef SSE2
+# undef SSE2
+#endif
+#ifdef SSE3
+# undef SSE3
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <errno.h>
+#include <time.h>
+#ifdef MPI
+# include <mpi.h>
+#endif
+#ifdef OMP
+# include <omp.h>
+#endif
+#include "global.h"
+#include "su3.h"
+#include "sse.h"
+#include "su3adj.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
+
+#define nm1 5
+void six_det(_Complex double* const rval, _Complex double a[6][6])
+{
+  /* required for thread safety */
+  _Complex double ALIGN sigma,z;
+  _Complex double ALIGN det;
+  double ALIGN p[nm1+1];
+  double ALIGN s,q;
+  int i,j,k;
+  int ifail;
+  ifail=0;
+  /* compute the determinant:*/
+  det = 1.0;
+  
+  for(k = 0; k < nm1; k++) {
+    s=0.0;
+    for(j = k+1; j <= nm1; ++j) {
+      s += conj(a[j][k]) * a[j][k];
+    }
+    s = sqrt(1. + s / (conj(a[k][k]) * a[k][k]));
+    sigma = s * a[k][k];
+    
+    /* determinant */
+    det *= sigma;
+    q   = sigma * conj(sigma);
+    if (q < tiny_t)
+      ifail++;
+    
+    a[k][k] += sigma;
+    p[k]     = sigma * conj(a[k][k]);
+    
+    /* reflect all columns to the right */
+    for(j = k+1; j <= nm1; j++) {
+      z = 0.;
+      for(i = k; i <= nm1; i++) {
+	z += conj(a[i][k]) * a[i][j];
+      }
+      z /= p[k];
+      for(i = k; i <= nm1; i++) {
+	a[i][j] -= z * a[i][k];
+      }
+    }
+  }
+  sigma = a[nm1][nm1];
+  
+  /* determinant */
+  det *= sigma;
+  q = conj(sigma) * sigma;
+  
+  if(q < tiny_t) {
+    ifail++;
+  }
+  if(g_proc_id == 0 && ifail > 0) {
+    fprintf(stderr, "Warning: ifail = %d > 0 in six_det\n", ifail);
+  }
+  *rval = det;
+}
+
+
+double sw_trace(const int ieo, const double mu) {
+  double ALIGN res = 0.0;
+#ifdef MPI
+  double ALIGN mres;
+#endif
+
+#ifdef OMP
+#pragma omp parallel
+  {
+  int thread_num = omp_get_thread_num();
+#endif
+
+  int i,x,ioff;
+  su3 ALIGN v;
+  _Complex double ALIGN a[6][6];
+  double ALIGN tra;
+  double ALIGN ks,kc,tr,ts,tt;
+  _Complex double ALIGN det;
+
+  ks = 0.0;
+  kc = 0.0;
+
+  if(ieo==0) {
+    ioff=0;
+  } 
+  else {
+    ioff=(VOLUME+RAND)/2;
+  }
+  
+#ifdef OMP
+#pragma omp for
+#endif
+  for(int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+    x = g_eo2lexic[icx];
+    for(i=0;i<2;i++) {
+      populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
+      populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
+      _su3_dagger(v, sw[x][1][i]); 
+      populate_6x6_matrix(a, &v, 3, 0);
+      populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
+      // we add the twisted mass term
+      if(i == 0) add_tm(a, mu);
+      else add_tm(a, -mu);
+      // and compute the tr log (or log det)
+      six_det(&det,a);
+      tra = log(conj(det)*det);
+      // we need to compute only the one with +mu
+      // the one with -mu must be the complex conjugate!
+      
+      tr=tra+kc;
+      ts=tr+ks;
+      tt=ts-ks;
+      ks=ts;
+      kc=tr-tt;
+    }
+  }
+  kc=ks+kc;
+
+#ifdef OMP
+  g_omp_acc_re[thread_num] = kc;
+  } /* OpenMP parallel closing brace */
+
+  for(int i = 0; i < omp_num_threads; ++i) {
+    res += g_omp_acc_re[i];
+  }
+#else
+  res=kc;
+#endif
+
+#ifdef MPI
+  MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  return(mres);
+#else
+  return(res);
+#endif
+
+}
+
+
+// This function computes the trace-log part of the clover term
+// in case of even/odd preconditioning in the nd case
+//
+// it is expected that sw_term is called beforehand such that
+// the array sw is populated properly
+//
+// it is tested to deliver bit-identical results to sw_trace
+// if eps is set to zero
+
+double sw_trace_nd(const int ieo, const double mu, const double eps) {
+  double ALIGN res = 0.0;
+#ifdef MPI
+  double ALIGN mres;
+#endif
+
+#ifdef OMP
+#pragma omp parallel
+  {
+  int thread_num = omp_get_thread_num();
+#endif
+
+  int x,ioff;
+  su3 ALIGN v;
+  _Complex double ALIGN a[6][6];
+  double ALIGN tra;
+  double ALIGN ks,kc,tr,ts,tt;
+  _Complex double ALIGN det[2];
+  double se = (eps*eps)*(eps*eps)*(eps*eps);
+  ks=0.0;
+  kc=0.0;
+
+  if(ieo==0) {
+    ioff=0;
+  } 
+  else {
+    ioff=(VOLUME+RAND)/2;
+  }
+
+#ifdef OMP
+#pragma omp for
+#endif
+  for(unsigned int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+    x = g_eo2lexic[icx];
+    for(unsigned int i = 0; i < 2; i++) {
+      populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
+      populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
+      _su3_dagger(v, sw[x][1][i]); 
+      populate_6x6_matrix(a, &v, 3, 0);
+      populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
+      // we add the twisted mass term prop to tau^3
+      if(i == 0) add_tm(a, mu);
+      else add_tm(a, -mu);
+      six_det(&det[i], a);
+    }
+    // and compute the tr log (or log det)
+    // for the 2x2 matrix in flavour space
+    // with eps*tau^1 in the off diagonal
+    tra = log(conj(det[0])*det[0]*conj(det[1])*det[1] - se*se);
+
+    tr=tra+kc;
+    ts=tr+ks;
+    tt=ts-ks;
+    ks=ts;
+    kc=tr-tt;
+  }
+  kc=ks+kc;
+  
+#ifdef OMP
+  g_omp_acc_re[thread_num] = kc;
+  } /* OpenMP parallel closing brace */
+
+  for(int i = 0; i < omp_num_threads; ++i) {
+    res += g_omp_acc_re[i];
+  }
+#else
+  res=kc;
+#endif
+
+#ifdef MPI
+  MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  return(mres);
+#else
+  return(res);
+#endif
+}
diff --git a/operator/clover_invert.c b/operator/clover_invert.c
new file mode 100644
index 000000000..1e2b3dd3b
--- /dev/null
+++ b/operator/clover_invert.c
@@ -0,0 +1,318 @@
+/***********************************************************************
+ *
+ * Copyright (C) 1995 Ulli Wolff, Stefan Sint
+ *               2001,2005 Martin Hasenbusch
+ *               2011,2012 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#ifdef SSE
+# undef SSE
+#endif
+#ifdef SSE2
+# undef SSE2
+#endif
+#ifdef SSE3
+# undef SSE3
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <errno.h>
+#include <time.h>
+#ifdef MPI
+# include <mpi.h>
+#endif
+#ifdef OMP
+# include <omp.h>
+#endif
+#include "global.h"
+#include "su3.h"
+#include "sse.h"
+#include "su3adj.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
+
+/*
+  !--------------------------------------------------------------!
+  !  The subroutine sw_invert is needed for the                  !
+  !  even_odd preconditioned Dirac operator with SW improvement. !
+  !  Details can be found in  the notes sw.ps on tsun.desy.de    !
+  !  by P. Weisz and U. Wolff.                                   !
+  !--------------------------------------------------------------!
+  !  inversion in place of complex matrix a without pivoting     !
+  !  triangularization by householder reflections                 !
+  !  inversion of triangular matrix                              !
+  !  inverse reflections                                          !
+  !--------------------------------------------------------------!
+  !  a square matrix, dimensioned 0:n-1                          !
+  !  itrouble is counted up, when a dangerously small diagonal   !
+  !  element is encountered in the tringular matrix              !
+  !  has to be initialized outside                               !
+  !                                                              !
+  !  Author: U. Wolff, adapted to fortran90 by S. Sint, 29/10/95 !
+  !--------------------------------------------------------------!
+  !  ported to C by M.Hasenbusch Wed Oct 24 15:46:46 MEST 2001   !
+  !______________________________________________________________!
+*/
+
+
+/* six_invert and six_det are called from multiple threads, they are thus
+ * made thread-safe by removing the static keywords but they are NOT
+ * parallelised for OpenMP */
+
+#define nm1 5
+void six_invert(int* ifail ,_Complex double a[6][6])
+{
+  /* required for thread safety */
+  _Complex double ALIGN d[nm1+1],u[nm1+1];
+  _Complex double ALIGN sigma,z;
+  double ALIGN p[nm1+1];
+  double ALIGN s,q;
+  int i,j,k;
+  *ifail=0;
+  for(k = 0; k < nm1; ++k)
+  {
+    s=0.0;
+    for(j = k+1; j <= nm1; ++j)
+      s += conj(a[j][k]) * a[j][k];
+    s = sqrt(1. + s / (conj(a[k][k]) * a[k][k]));
+    sigma = s * a[k][k];
+
+    a[k][k] += sigma;
+    p[k] = conj(sigma) * a[k][k];
+    q = conj(sigma) * sigma;
+    if (q < tiny_t)
+      (*ifail)++;
+    d[k] = -conj(sigma) / q;
+
+    /* reflect all columns to the right */
+    for(j = k+1; j <= nm1; ++j)
+    {
+      z = 0.0;
+      for(i = k; i <= nm1; ++i)
+	z += conj(a[i][k]) * a[i][j];
+      z /= p[k];
+      for(i = k; i <= nm1; ++i)
+	a[i][j] -= z * a[i][k];
+    }
+  }
+  sigma = a[nm1][nm1];
+  q = conj(sigma) * sigma;
+  if (q < tiny_t)
+    (*ifail)++;
+  d[nm1] = conj(sigma) / q;
+
+  /*  inversion of upper triangular matrix in place
+      (diagonal elements done already): */
+
+  for(k = nm1; k >= 0; k--) {
+    for(i = k-1; i >= 0;i--) {
+      z = 0.0;
+      for(j = i+1; j < k; j++)
+	z += a[i][j] * a[j][k];
+      z += a[i][k] * d[k];
+      a[i][k] = -z * d[i];
+    }
+  }     
+  /* execute reflections in reverse order from the right: */
+  
+  a[nm1][nm1] = d[nm1];
+  for(k = nm1-1; k >= 0; k--)
+  {
+    for(j=k;j<=nm1;j++)
+      u[j] = a[j][k];
+    a[k][k] = d[k];
+    for(j = k+1; j <= nm1; j++)
+      a[j][k] = 0.0;
+    for(i = 0; i <= nm1; i++)
+    {
+      z = 0.0;
+      for(j = k; j <= nm1; j++)
+        z += a[i][j] * u[j];
+      z /= p[k];         /* normalization */
+      
+      for(j = k; j <= nm1; j++)
+        a[i][j] -= conj(u[j]) * z; /* reflection */
+    }
+  }
+}
+
+// This function computes the inverse of
+// (1 + T_ee \pm I\mu\gamma_5)
+//
+// + is stored in sw_inv[0-(VOLUME/2-1)] 
+// - is stored in sw_inv[VOLUME/2-(VOLUME-1)]
+
+void sw_invert(const int ieo, const double mu) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+  int icy;
+  int ioff, err=0;
+  int i, x;
+  su3 ALIGN v;
+  _Complex double ALIGN a[6][6];
+
+  if(ieo==0) {
+    ioff=0;
+  } 
+  else {
+    ioff=(VOLUME+RAND)/2;
+  }
+
+#ifndef OMP
+  icy=0;
+#endif
+
+#ifdef OMP
+#pragma omp for
+#endif
+  for(int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
+#ifdef OMP
+    icy = icx - ioff;
+#endif
+    x = g_eo2lexic[icx];
+
+    for(i = 0; i < 2; i++) {
+      populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
+      populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
+      _su3_dagger(v, sw[x][1][i]); 
+      populate_6x6_matrix(a, &v, 3, 0);
+      populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
+      // we add the twisted mass term
+      if(i == 0) add_tm(a, +mu);
+      else add_tm(a, -mu);
+      // and invert the resulting matrix
+
+      six_invert(&err,a); 
+      // here we need to catch the error! 
+      if(err > 0 && g_proc_id == 0) {
+	printf("# inversion failed in six_invert code %d\n", err);
+	err = 0;
+      }
+
+      /*  copy "a" back to sw_inv */
+      get_3x3_block_matrix(&sw_inv[icy][0][i], a, 0, 0);
+      get_3x3_block_matrix(&sw_inv[icy][1][i], a, 0, 3);
+      get_3x3_block_matrix(&sw_inv[icy][2][i], a, 3, 3);
+      get_3x3_block_matrix(&sw_inv[icy][3][i], a, 3, 0);
+    }
+
+    if(fabs(mu) > 0.) {
+      for(i = 0; i < 2; i++) {
+	populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
+	populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
+	_su3_dagger(v, sw[x][1][i]); 
+	populate_6x6_matrix(a, &v, 3, 0);
+	populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
+
+	// we add the twisted mass term
+	if(i == 0) add_tm(a, -mu);
+	else add_tm(a, +mu);
+	// and invert the resulting matrix
+	six_invert(&err,a); 
+	// here we need to catch the error! 
+	if(err > 0 && g_proc_id == 0) {
+	  printf("# %d\n", err);
+	  err = 0;
+	}
+
+	/*  copy "a" back to sw_inv */
+	get_3x3_block_matrix(&sw_inv[icy+VOLUME/2][0][i], a, 0, 0);
+	get_3x3_block_matrix(&sw_inv[icy+VOLUME/2][1][i], a, 0, 3);
+	get_3x3_block_matrix(&sw_inv[icy+VOLUME/2][2][i], a, 3, 3);
+	get_3x3_block_matrix(&sw_inv[icy+VOLUME/2][3][i], a, 3, 0);
+      }
+    }
+#ifndef OMP
+    ++icy;
+#endif
+  }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+  return;
+}
+
+// This function computes
+//
+// 1/((1+T)^2 + barmu^2 - bareps^2)^{-1}
+//
+// for all even x,
+// which is stored in sw_inv[0-(VOLUME/2-1)]
+//
+// it is the complement of sw_invert for the
+// non-degenerate case
+// multiplication with
+// (1+T - i\bar\mu\gamma_5\tau^3 + \bar\epsion\tau^1)
+// must be done elsewhere because of flavour structure
+
+void sw_invert_nd(const double mshift) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+  int err=0;
+  int i, x;
+  su3 ALIGN v;
+  _Complex double ALIGN a[6][6], b[6][6];
+
+#ifdef OMP
+#pragma omp for
+#endif
+  for(int icx = 0; icx < (VOLUME/2); icx++) {
+    x = g_eo2lexic[icx];
+
+    for(i = 0; i < 2; i++) {
+      populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
+      populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
+      _su3_dagger(v, sw[x][1][i]); 
+      populate_6x6_matrix(a, &v, 3, 0);
+      populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
+
+      // compute (1+T)^2 and store in b
+      mult_6x6(b, a, a);
+      // we add the mass shift term, which is a real number
+      add_shift_6x6(b, mshift);
+      // so b = (1+T)^2 + shift
+      // now invert this matrix
+      six_invert(&err, b); 
+      // here we need to catch the error! 
+      if(err > 0 && g_proc_id == 0) {
+	printf("# inversion failed in six_invert_nd code %d\n", err);
+	err = 0;
+      }
+
+      /*  copy "a" back to sw_inv */
+      get_3x3_block_matrix(&sw_inv[icx][0][i], b, 0, 0);
+      get_3x3_block_matrix(&sw_inv[icx][1][i], b, 0, 3);
+      get_3x3_block_matrix(&sw_inv[icx][2][i], b, 3, 3);
+      get_3x3_block_matrix(&sw_inv[icx][3][i], b, 3, 0);
+    }
+  }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+  return;
+}
diff --git a/operator/clover_leaf.c b/operator/clover_leaf.c
index 5176d579d..41df662cb 100644
--- a/operator/clover_leaf.c
+++ b/operator/clover_leaf.c
@@ -56,525 +56,6 @@ const double tiny_t = 1.0e-20;
 
 su3 ** swm, ** swp;
 
-// the clover term is written as
-//
-//   1 + T_{xa\alpha,yb\beta} 
-// = 1 + i csw kappa/2 sigma_munu^alphabeta F_munu^ab(x)delta_xy
-//
-// see hep-lat/9603008 for all glory details
-//
-// per site we have to store two six-by-six complex matrices.
-// As the off-diagonal 3x3 matrices are just inverse to
-// each other, we get away with two times three 3x3 complex matrices
-//
-// these are stored in the array sw[VOLUME][3][2] of type su3
-// where x is the space time index
-// a runs from 0 to 2
-// b runs from 0 to 1
-// sw[x][0][0] is the upper diagonal 3x3 matrix 
-// sw[x][1][0] the upper off-diagnoal 3x3 matrix
-// sw[x][2][0] the lower diagonal 3x3 matrix
-// the lower off-diagonal 3x3 matrix would be the inverser of sw[x][1][0]
-// 
-// identical convention for the second six-by-six matrix
-// just with second index set to 1
-//
-// so the application of the clover term 
-// plus twisted mass term to a spinor would just be
-// 
-// r_0 = sw[0][0] s_0 + sw[1][0] s_1 + i mu s_0
-// r_1 = sw[1][0]^-1 s_0 + sw[2][0] s_1 + i mu s_1
-// r_2 = sw[0][1] s_2 + sw[1][1] s_3 - i mu s_2
-// r_3 = sw[1][1]^-1 s_2 + sw[2][1] s_3 - i mu s_3
-//
-// suppressing space-time indices
-
-void sw_term(const su3 ** const gf, const double kappa, const double c_sw) {
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-
-  int k,l;
-  int x,xpk,xpl,xmk,xml,xpkml,xplmk,xmkml;
-  const su3 *w1,*w2,*w3,*w4;
-  double ka_csw_8 = kappa*c_sw/8.;
-  su3 ALIGN v1,v2,plaq;
-  su3 ALIGN fkl[4][4];
-  su3 ALIGN magnetic[4],electric[4];
-  su3 ALIGN aux;
-  
-
-  /*  compute the clover-leave */
-  /*  l  __   __
-        |  | |  |
-        |__| |__|
-         __   __
-        |  | |  |
-        |__| |__| k  */
-  
-#ifdef OMP
-#pragma omp for
-#endif
-  for(x = 0; x < VOLUME; x++) {
-    for(k = 0; k < 4; k++) {
-      for(l = k+1; l < 4; l++) {
-	xpk=g_iup[x][k];
-	xpl=g_iup[x][l];
-	xmk=g_idn[x][k];
-	xml=g_idn[x][l];
-	xpkml=g_idn[xpk][l];
-	xplmk=g_idn[xpl][k];
-	xmkml=g_idn[xml][k];
-	w1=&gf[x][k];
-	w2=&gf[xpk][l];
-	w3=&gf[xpl][k];
-	w4=&gf[x][l];
-	_su3_times_su3(v1,*w1,*w2);
-	_su3_times_su3(v2,*w4,*w3);
-	_su3_times_su3d(plaq,v1,v2);
-	w1=&gf[x][l];
-	w2=&gf[xplmk][k];
-	w3=&gf[xmk][l];
-	w4=&gf[xmk][k];
-	_su3_times_su3d(v1,*w1,*w2);
-	_su3d_times_su3(v2,*w3,*w4);
-	_su3_times_su3_acc(plaq,v1,v2);
-	w1=&gf[xmk][k];
-	w2=&gf[xmkml][l];
-	w3=&gf[xmkml][k];
-	w4=&gf[xml][l];
-	_su3_times_su3(v1,*w2,*w1);
-	_su3_times_su3(v2,*w3,*w4);
-	_su3d_times_su3_acc(plaq,v1,v2);
-	w1=&gf[xml][l];
-	w2=&gf[xml][k];
-	w3=&gf[xpkml][l];
-	w4=&gf[x][k];
-	_su3d_times_su3(v1,*w1,*w2);
-	_su3_times_su3d(v2,*w3,*w4);
-	_su3_times_su3_acc(plaq,v1,v2);
-	_su3_dagger(v2,plaq); 
-	_su3_minus_su3(fkl[k][l],plaq,v2);
-      }
-    }
-
-    // this is the one in flavour and colour space
-    // twisted mass term is treated in clover, sw_inv and
-    // clover_gamma5 and the corresponding nd versions
-    _su3_one(sw[x][0][0]);
-    _su3_one(sw[x][2][0]);
-    _su3_one(sw[x][0][1]);
-    _su3_one(sw[x][2][1]);
-    
-    for(k = 1; k < 4; k++)
-    {
-      _su3_assign(electric[k], fkl[0][k]);
-    }
-    _su3_assign(magnetic[1], fkl[2][3]);
-    _su3_minus_assign(magnetic[2], fkl[1][3]);
-    _su3_assign(magnetic[3], fkl[1][2]);
-    
-    /*  upper left block 6x6 matrix  */
-    
-    _itimes_su3_minus_su3(aux,electric[3],magnetic[3]);
-    _su3_refac_acc(sw[x][0][0],ka_csw_8,aux);
-    
-    _itimes_su3_minus_su3(aux,electric[1],magnetic[1]);
-    _su3_minus_su3(v2,electric[2],magnetic[2]); 
-    _su3_acc(aux,v2);
-    _real_times_su3(sw[x][1][0],ka_csw_8,aux);
-    
-    _itimes_su3_minus_su3(aux,magnetic[3],electric[3]);
-    _su3_refac_acc(sw[x][2][0],ka_csw_8,aux);
-
-    /*  lower right block 6x6 matrix */
-    
-    _itimes_su3_plus_su3(aux,electric[3],magnetic[3]);
-    _su3_refac_acc(sw[x][0][1],(-ka_csw_8),aux);
-
-    _itimes_su3_plus_su3(aux,electric[1],magnetic[1]);
-    _su3_plus_su3(v2,electric[2],magnetic[2]); 
-    _su3_acc(aux,v2);
-    _real_times_su3(sw[x][1][1],(-ka_csw_8),aux);
-
-    _itimes_su3_plus_su3(aux,magnetic[3],electric[3]);
-    _su3_refac_acc(sw[x][2][1],ka_csw_8,aux);
-  }
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-  return;
-}
-
-/*
-  !--------------------------------------------------------------!
-  !  The subroutine sw_invert is needed for the                  !
-  !  even_odd preconditioned Dirac operator with SW improvement. !
-  !  Details can be found in  the notes sw.ps on tsun.desy.de    !
-  !  by P. Weisz and U. Wolff.                                   !
-  !--------------------------------------------------------------!
-  !  inversion in place of complex matrix a without pivoting     !
-  !  triangularization by householder reflections                 !
-  !  inversion of triangular matrix                              !
-  !  inverse reflections                                          !
-  !--------------------------------------------------------------!
-  !  a square matrix, dimensioned 0:n-1                          !
-  !  itrouble is counted up, when a dangerously small diagonal   !
-  !  element is encountered in the tringular matrix              !
-  !  has to be initialized outside                               !
-  !                                                              !
-  !  Author: U. Wolff, adapted to fortran90 by S. Sint, 29/10/95 !
-  !--------------------------------------------------------------!
-  !  ported to C by M.Hasenbusch Wed Oct 24 15:46:46 MEST 2001   !
-  !______________________________________________________________!
-*/
-
-
-/* six_invert and six_det are called from multiple threads, they are thus
- * made thread-safe by removing the static keywords but they are NOT
- * parallelised for OpenMP */
-
-#define nm1 5
-void six_invert(int* ifail ,_Complex double a[6][6])
-{
-  /* required for thread safety */
-  _Complex double ALIGN d[nm1+1],u[nm1+1];
-  _Complex double ALIGN sigma,z;
-  double ALIGN p[nm1+1];
-  double ALIGN s,q;
-  int i,j,k;
-  *ifail=0;
-  for(k = 0; k < nm1; ++k)
-  {
-    s=0.0;
-    for(j = k+1; j <= nm1; ++j)
-      s += conj(a[j][k]) * a[j][k];
-    s = sqrt(1. + s / (conj(a[k][k]) * a[k][k]));
-    sigma = s * a[k][k];
-
-    a[k][k] += sigma;
-    p[k] = conj(sigma) * a[k][k];
-    q = conj(sigma) * sigma;
-    if (q < tiny_t)
-      (*ifail)++;
-    d[k] = -conj(sigma) / q;
-
-    /* reflect all columns to the right */
-    for(j = k+1; j <= nm1; ++j)
-    {
-      z = 0.0;
-      for(i = k; i <= nm1; ++i)
-	z += conj(a[i][k]) * a[i][j];
-      z /= p[k];
-      for(i = k; i <= nm1; ++i)
-	a[i][j] -= z * a[i][k];
-    }
-  }
-  sigma = a[nm1][nm1];
-  q = conj(sigma) * sigma;
-  if (q < tiny_t)
-    (*ifail)++;
-  d[nm1] = conj(sigma) / q;
-
-  /*  inversion of upper triangular matrix in place
-      (diagonal elements done already): */
-
-  for(k = nm1; k >= 0; k--) {
-    for(i = k-1; i >= 0;i--) {
-      z = 0.0;
-      for(j = i+1; j < k; j++)
-	z += a[i][j] * a[j][k];
-      z += a[i][k] * d[k];
-      a[i][k] = -z * d[i];
-    }
-  }     
-  /* execute reflections in reverse order from the right: */
-  
-  a[nm1][nm1] = d[nm1];
-  for(k = nm1-1; k >= 0; k--)
-  {
-    for(j=k;j<=nm1;j++)
-      u[j] = a[j][k];
-    a[k][k] = d[k];
-    for(j = k+1; j <= nm1; j++)
-      a[j][k] = 0.0;
-    for(i = 0; i <= nm1; i++)
-    {
-      z = 0.0;
-      for(j = k; j <= nm1; j++)
-        z += a[i][j] * u[j];
-      z /= p[k];         /* normalization */
-      
-      for(j = k; j <= nm1; j++)
-        a[i][j] -= conj(u[j]) * z; /* reflection */
-    }
-  }
-}
-    
-void six_det(_Complex double* const rval, _Complex double a[6][6])
-{
-  /* required for thread safety */
-  _Complex double ALIGN sigma,z;
-  _Complex double ALIGN det;
-  double ALIGN p[nm1+1];
-  double ALIGN s,q;
-  int i,j,k;
-  int ifail;
-  ifail=0;
-  /* compute the determinant:*/
-  det = 1.0;
-  
-  for(k = 0; k < nm1; k++) {
-    s=0.0;
-    for(j = k+1; j <= nm1; ++j) {
-      s += conj(a[j][k]) * a[j][k];
-    }
-    s = sqrt(1. + s / (conj(a[k][k]) * a[k][k]));
-    sigma = s * a[k][k];
-    
-    /* determinant */
-    det *= sigma;
-    q   = sigma * conj(sigma);
-    if (q < tiny_t)
-      ifail++;
-    
-    a[k][k] += sigma;
-    p[k]     = sigma * conj(a[k][k]);
-    
-    /* reflect all columns to the right */
-    for(j = k+1; j <= nm1; j++) {
-      z = 0.;
-      for(i = k; i <= nm1; i++) {
-	z += conj(a[i][k]) * a[i][j];
-      }
-      z /= p[k];
-      for(i = k; i <= nm1; i++) {
-	a[i][j] -= z * a[i][k];
-      }
-    }
-  }
-  sigma = a[nm1][nm1];
-  
-  /* determinant */
-  det *= sigma;
-  q = conj(sigma) * sigma;
-  
-  if(q < tiny_t) {
-    ifail++;
-  }
-  if(g_proc_id == 0 && ifail > 0) {
-    fprintf(stderr, "Warning: ifail = %d > 0 in six_det\n", ifail);
-  }
-  *rval = det;
-}
-
-/*definitions needed for the functions sw_trace(int ieo) and sw_trace(int ieo)*/
-inline void populate_6x6_matrix(_Complex double a[6][6], const su3 * const C, const int row, const int col) {
-  a[0+row][0+col] = C->c00;
-  a[0+row][1+col] = C->c01;
-  a[0+row][2+col] = C->c02;
-  a[1+row][0+col] = C->c10;
-  a[1+row][1+col] = C->c11;
-  a[1+row][2+col] = C->c12;
-  a[2+row][0+col] = C->c20;
-  a[2+row][1+col] = C->c21;
-  a[2+row][2+col] = C->c22;
-  return;
-}
-
-inline void get_3x3_block_matrix(su3 * const C, _Complex double a[6][6], const int row, const int col) {
-  C->c00 = a[0+row][0+col];
-  C->c01 = a[0+row][1+col];
-  C->c02 = a[0+row][2+col];
-  C->c10 = a[1+row][0+col];
-  C->c11 = a[1+row][1+col];
-  C->c12 = a[1+row][2+col];
-  C->c20 = a[2+row][0+col];
-  C->c21 = a[2+row][1+col];
-  C->c22 = a[2+row][2+col];
-  return;
-}
-
-// This function computes the trace-log part of the clover term
-// in case of even/odd preconditioning
-//
-// it is expected that sw_term is called beforehand such that
-// the array sw is populated properly
-
-inline void add_tm(_Complex double a[6][6], const double mu) {
-  for(int i = 0; i < 6; i++) {
-    a[i][i] += I*mu;
-  }
-  return;
-}
-
-double sw_trace(const int ieo, const double mu) {
-  double ALIGN res = 0.0;
-#ifdef MPI
-  double ALIGN mres;
-#endif
-
-#ifdef OMP
-#pragma omp parallel
-  {
-  int thread_num = omp_get_thread_num();
-#endif
-
-  int i,x,ioff;
-  su3 ALIGN v;
-  _Complex double ALIGN a[6][6];
-  double ALIGN tra;
-  double ALIGN ks,kc,tr,ts,tt;
-  _Complex double ALIGN det;
-
-  ks = 0.0;
-  kc = 0.0;
-
-  if(ieo==0) {
-    ioff=0;
-  } 
-  else {
-    ioff=(VOLUME+RAND)/2;
-  }
-  
-#ifdef OMP
-#pragma omp for
-#endif
-  for(int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
-    x = g_eo2lexic[icx];
-    for(i=0;i<2;i++) {
-      populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
-      populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
-      _su3_dagger(v, sw[x][1][i]); 
-      populate_6x6_matrix(a, &v, 3, 0);
-      populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
-      // we add the twisted mass term
-      if(i == 0) add_tm(a, mu);
-      else add_tm(a, -mu);
-      // and compute the tr log (or log det)
-      six_det(&det,a);
-      tra = log(conj(det)*det);
-      // we need to compute only the one with +mu
-      // the one with -mu must be the complex conjugate!
-      
-      tr=tra+kc;
-      ts=tr+ks;
-      tt=ts-ks;
-      ks=ts;
-      kc=tr-tt;
-    }
-  }
-  kc=ks+kc;
-
-#ifdef OMP
-  g_omp_acc_re[thread_num] = kc;
-  } /* OpenMP parallel closing brace */
-
-  for(int i = 0; i < omp_num_threads; ++i) {
-    res += g_omp_acc_re[i];
-  }
-#else
-  res=kc;
-#endif
-
-#ifdef MPI
-  MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-  return(mres);
-#else
-  return(res);
-#endif
-
-}
-
-
-// This function computes the trace-log part of the clover term
-// in case of even/odd preconditioning in the nd case
-//
-// it is expected that sw_term is called beforehand such that
-// the array sw is populated properly
-//
-// it is tested to deliver bit-identical results to sw_trace
-// if eps is set to zero
-
-double sw_trace_nd(const int ieo, const double mu, const double eps) {
-  double ALIGN res = 0.0;
-#ifdef MPI
-  double ALIGN mres;
-#endif
-
-#ifdef OMP
-#pragma omp parallel
-  {
-  int thread_num = omp_get_thread_num();
-#endif
-
-  int x,ioff;
-  su3 ALIGN v;
-  _Complex double ALIGN a[6][6];
-  double ALIGN tra;
-  double ALIGN ks,kc,tr,ts,tt;
-  _Complex double ALIGN det[2];
-  double se = (eps*eps)*(eps*eps)*(eps*eps);
-  ks=0.0;
-  kc=0.0;
-
-  if(ieo==0) {
-    ioff=0;
-  } 
-  else {
-    ioff=(VOLUME+RAND)/2;
-  }
-
-#ifdef OMP
-#pragma omp for
-#endif
-  for(unsigned int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
-    x = g_eo2lexic[icx];
-    for(unsigned int i = 0; i < 2; i++) {
-      populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
-      populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
-      _su3_dagger(v, sw[x][1][i]); 
-      populate_6x6_matrix(a, &v, 3, 0);
-      populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
-      // we add the twisted mass term prop to tau^3
-      if(i == 0) add_tm(a, mu);
-      else add_tm(a, -mu);
-      six_det(&det[i], a);
-    }
-    // and compute the tr log (or log det)
-    // for the 2x2 matrix in flavour space
-    // with eps*tau^1 in the off diagonal
-    tra = log(conj(det[0])*det[0]*conj(det[1])*det[1] - se*se);
-
-    tr=tra+kc;
-    ts=tr+ks;
-    tt=ts-ks;
-    ks=ts;
-    kc=tr-tt;
-  }
-  kc=ks+kc;
-  
-#ifdef OMP
-  g_omp_acc_re[thread_num] = kc;
-  } /* OpenMP parallel closing brace */
-
-  for(int i = 0; i < omp_num_threads; ++i) {
-    res += g_omp_acc_re[i];
-  }
-#else
-  res=kc;
-#endif
-
-#ifdef MPI
-  MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-  return(mres);
-#else
-  return(res);
-#endif
-}
-
-
 void mult_6x6(_Complex double a[6][6], _Complex double b[6][6], _Complex double d[6][6]) {
 
   for(int i = 0; i < 6; i++) {
@@ -617,590 +98,12 @@ void copy_6x6(_Complex double a[6][6], const _Complex double b[6][6]) {
   return;
 }
 
-// This function computes the inverse of
-// (1 + T_ee \pm I\mu\gamma_5)
-//
-// + is stored in sw_inv[0-(VOLUME/2-1)] 
-// - is stored in sw_inv[VOLUME/2-(VOLUME-1)]
 
-void sw_invert(const int ieo, const double mu) {
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-  int icy;
-  int ioff, err=0;
-  int i, x;
-  su3 ALIGN v;
-  _Complex double ALIGN a[6][6];
 
-  if(ieo==0) {
-    ioff=0;
-  } 
-  else {
-    ioff=(VOLUME+RAND)/2;
-  }
 
-#ifndef OMP
-  icy=0;
-#endif
 
-#ifdef OMP
-#pragma omp for
-#endif
-  for(int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
-#ifdef OMP
-    icy = icx - ioff;
-#endif
-    x = g_eo2lexic[icx];
-
-    for(i = 0; i < 2; i++) {
-      populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
-      populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
-      _su3_dagger(v, sw[x][1][i]); 
-      populate_6x6_matrix(a, &v, 3, 0);
-      populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
-      // we add the twisted mass term
-      if(i == 0) add_tm(a, +mu);
-      else add_tm(a, -mu);
-      // and invert the resulting matrix
 
-      six_invert(&err,a); 
-      // here we need to catch the error! 
-      if(err > 0 && g_proc_id == 0) {
-	printf("# inversion failed in six_invert code %d\n", err);
-	err = 0;
-      }
-
-      /*  copy "a" back to sw_inv */
-      get_3x3_block_matrix(&sw_inv[icy][0][i], a, 0, 0);
-      get_3x3_block_matrix(&sw_inv[icy][1][i], a, 0, 3);
-      get_3x3_block_matrix(&sw_inv[icy][2][i], a, 3, 3);
-      get_3x3_block_matrix(&sw_inv[icy][3][i], a, 3, 0);
-    }
 
-    if(fabs(mu) > 0.) {
-      for(i = 0; i < 2; i++) {
-	populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
-	populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
-	_su3_dagger(v, sw[x][1][i]); 
-	populate_6x6_matrix(a, &v, 3, 0);
-	populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
-
-	// we add the twisted mass term
-	if(i == 0) add_tm(a, -mu);
-	else add_tm(a, +mu);
-	// and invert the resulting matrix
-	six_invert(&err,a); 
-	// here we need to catch the error! 
-	if(err > 0 && g_proc_id == 0) {
-	  printf("# %d\n", err);
-	  err = 0;
-	}
-
-	/*  copy "a" back to sw_inv */
-	get_3x3_block_matrix(&sw_inv[icy+VOLUME/2][0][i], a, 0, 0);
-	get_3x3_block_matrix(&sw_inv[icy+VOLUME/2][1][i], a, 0, 3);
-	get_3x3_block_matrix(&sw_inv[icy+VOLUME/2][2][i], a, 3, 3);
-	get_3x3_block_matrix(&sw_inv[icy+VOLUME/2][3][i], a, 3, 0);
-      }
-    }
-#ifndef OMP
-    ++icy;
-#endif
-  }
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-  return;
-}
-
-inline void add_shift_6x6(_Complex double a[6][6], const double mshift) {
-  for(int i = 0; i < 6; i++) {
-    a[i][i] += mshift;
-  }
-  return;
-}
-
-// This function computes
-//
-// 1/((1+T)^2 + barmu^2 - bareps^2)^{-1}
-//
-// for all even x,
-// which is stored in sw_inv[0-(VOLUME/2-1)]
-//
-// it is the complement of sw_invert for the
-// non-degenerate case
-// multiplication with
-// (1+T - i\bar\mu\gamma_5\tau^3 + \bar\epsion\tau^1)
-// must be done elsewhere because of flavour structure
-
-void sw_invert_nd(const double mshift) {
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-  int err=0;
-  int i, x;
-  su3 ALIGN v;
-  _Complex double ALIGN a[6][6], b[6][6];
-
-#ifdef OMP
-#pragma omp for
-#endif
-  for(int icx = 0; icx < (VOLUME/2); icx++) {
-    x = g_eo2lexic[icx];
-
-    for(i = 0; i < 2; i++) {
-      populate_6x6_matrix(a, &sw[x][0][i], 0, 0);
-      populate_6x6_matrix(a, &sw[x][1][i], 0, 3);
-      _su3_dagger(v, sw[x][1][i]); 
-      populate_6x6_matrix(a, &v, 3, 0);
-      populate_6x6_matrix(a, &sw[x][2][i], 3, 3);
-
-      // compute (1+T)^2 and store in b
-      mult_6x6(b, a, a);
-      // we add the mass shift term, which is a real number
-      add_shift_6x6(b, mshift);
-      // so b = (1+T)^2 + shift
-      // now invert this matrix
-      six_invert(&err, b); 
-      // here we need to catch the error! 
-      if(err > 0 && g_proc_id == 0) {
-	printf("# inversion failed in six_invert_nd code %d\n", err);
-	err = 0;
-      }
-
-      /*  copy "a" back to sw_inv */
-      get_3x3_block_matrix(&sw_inv[icx][0][i], b, 0, 0);
-      get_3x3_block_matrix(&sw_inv[icx][1][i], b, 0, 3);
-      get_3x3_block_matrix(&sw_inv[icx][2][i], b, 3, 3);
-      get_3x3_block_matrix(&sw_inv[icx][3][i], b, 3, 0);
-    }
-  }
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-  return;
-}
-
-// this is (-tr(1+T_ee(+mu)) -tr(1+T_ee(-mu)))      
-// (or T_oo of course)
-// 
-// see equation (24) of hep-lat/9603008             
-//
-// or in more detail the insertion matrix at even sites
-// is computed
-// and stored in swm and swp, which are 4 su3 matrices 
-// each per site
-// refereing to upwards or downwards winding paths  
-//
-// swm and swp are representing 6x6 complex matrices
-// (colour matrices)
-//
-// this function depends on mu
-
-void sw_deriv(const int ieo, const double mu) {
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-  int icy;
-  int ioff;
-  int x;
-  double fac = 1.0000;
-  su3 ALIGN lswp[4], lswm[4];
-
-  /* convention: Tr clover-leaf times insertion */
-  if(ieo == 0) {
-    ioff=0;
-  } 
-  else {
-    ioff = (VOLUME+RAND)/2;
-  }
-  if(fabs(mu) > 0.) fac = 0.5;
-
-#ifndef OMP
-  icy = 0;
-#endif
-
-#ifdef OMP
-#pragma omp for
-#endif
-  for(int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
-#ifdef OMP
-    icy = icx - ioff;
-#endif
-    x = g_eo2lexic[icx];
-    /* compute the insertion matrix */
-    _su3_plus_su3(lswp[0], sw_inv[icy][0][1], sw_inv[icy][0][0]);
-    _su3_plus_su3(lswp[1], sw_inv[icy][1][1], sw_inv[icy][1][0]);
-    _su3_plus_su3(lswp[2], sw_inv[icy][2][1], sw_inv[icy][2][0]);
-    _su3_plus_su3(lswp[3], sw_inv[icy][3][1], sw_inv[icy][3][0]);
-
-    _su3_minus_su3(lswm[0], sw_inv[icy][0][1], sw_inv[icy][0][0]);
-    _su3_minus_su3(lswm[1], sw_inv[icy][1][1], sw_inv[icy][1][0]);
-    _su3_minus_su3(lswm[2], sw_inv[icy][2][1], sw_inv[icy][2][0]);
-    _su3_minus_su3(lswm[3], sw_inv[icy][3][1], sw_inv[icy][3][0]);
-    
-    /* add up to swm[] and swp[] */
-    _su3_refac_acc(swm[x][0], fac, lswm[0]);
-    _su3_refac_acc(swm[x][1], fac, lswm[1]);
-    _su3_refac_acc(swm[x][2], fac, lswm[2]);
-    _su3_refac_acc(swm[x][3], fac, lswm[3]);
-    _su3_refac_acc(swp[x][0], fac, lswp[0]);
-    _su3_refac_acc(swp[x][1], fac, lswp[1]);
-    _su3_refac_acc(swp[x][2], fac, lswp[2]);
-    _su3_refac_acc(swp[x][3], fac, lswp[3]);
-    if(fabs(mu) > 0.) {
-      /* compute the insertion matrix */
-      _su3_plus_su3(lswp[0], sw_inv[icy+VOLUME/2][0][1], sw_inv[icy+VOLUME/2][0][0]);
-      _su3_plus_su3(lswp[1], sw_inv[icy+VOLUME/2][1][1], sw_inv[icy+VOLUME/2][1][0]);
-      _su3_plus_su3(lswp[2], sw_inv[icy+VOLUME/2][2][1], sw_inv[icy+VOLUME/2][2][0]);
-      _su3_plus_su3(lswp[3], sw_inv[icy+VOLUME/2][3][1], sw_inv[icy+VOLUME/2][3][0]); 
-
-      _su3_minus_su3(lswm[0], sw_inv[icy+VOLUME/2][0][1], sw_inv[icy+VOLUME/2][0][0]);
-      _su3_minus_su3(lswm[1], sw_inv[icy+VOLUME/2][1][1], sw_inv[icy+VOLUME/2][1][0]);
-      _su3_minus_su3(lswm[2], sw_inv[icy+VOLUME/2][2][1], sw_inv[icy+VOLUME/2][2][0]);
-      _su3_minus_su3(lswm[3], sw_inv[icy+VOLUME/2][3][1], sw_inv[icy+VOLUME/2][3][0]);
-      
-      /* add up to swm[] and swp[] */
-      _su3_refac_acc(swm[x][0], fac, lswm[0]);
-      _su3_refac_acc(swm[x][1], fac, lswm[1]);
-      _su3_refac_acc(swm[x][2], fac, lswm[2]);
-      _su3_refac_acc(swm[x][3], fac, lswm[3]);
-      _su3_refac_acc(swp[x][0], fac, lswp[0]);
-      _su3_refac_acc(swp[x][1], fac, lswp[1]);
-      _su3_refac_acc(swp[x][2], fac, lswp[2]);
-      _su3_refac_acc(swp[x][3], fac, lswp[3]);
-    }
-#ifndef OMP
-    ++icy;
-#endif
-  }
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-  return;
-}
-
-void sw_deriv_nd(const int ieo) {
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-  int icy;
-  int ioff;
-  int x;
-  double fac = 1.0000;
-  su3 ALIGN lswp[4], lswm[4], v;
-  _Complex double ALIGN a0[6][6], a1[6][6], b[6][6], c[6][6];
-
-  /* convention: Tr clover-leaf times insertion */
-  if(ieo == 0) {
-    ioff=0;
-  } 
-  else {
-    ioff = (VOLUME+RAND)/2;
-  }
-
-#ifndef OMP
-  icy = 0;
-#endif
-
-#ifdef OMP
-#pragma omp for
-#endif
-  for(int icx = ioff; icx < (VOLUME/2+ioff); icx++) {
-#ifdef OMP
-    icy = icx - ioff;
-#endif
-    x = g_eo2lexic[icx];
-    /* compute the insertion matrix */
-    populate_6x6_matrix(b, &sw[x][0][0], 0, 0);
-    populate_6x6_matrix(b, &sw[x][1][0], 0, 3);
-    _su3_dagger(v, sw[x][1][0]); 
-    populate_6x6_matrix(b, &v, 3, 0);
-    populate_6x6_matrix(b, &sw[x][2][0], 3, 3);
-
-    populate_6x6_matrix(c, &sw_inv[icy][0][0], 0, 0);
-    populate_6x6_matrix(c, &sw_inv[icy][1][0], 0, 3);
-    populate_6x6_matrix(c, &sw_inv[icy][2][0], 3, 3);
-    populate_6x6_matrix(c, &sw_inv[icy][3][0], 3, 0);
-
-    mult_6x6(a0, b, c);
-
-    populate_6x6_matrix(b, &sw[x][0][1], 0, 0);
-    populate_6x6_matrix(b, &sw[x][1][1], 0, 3);
-    _su3_dagger(v, sw[x][1][1]); 
-    populate_6x6_matrix(b, &v, 3, 0);
-    populate_6x6_matrix(b, &sw[x][2][1], 3, 3);
-
-    populate_6x6_matrix(c, &sw_inv[icy][0][1], 0, 0);
-    populate_6x6_matrix(c, &sw_inv[icy][1][1], 0, 3);
-    populate_6x6_matrix(c, &sw_inv[icy][2][1], 3, 3);
-    populate_6x6_matrix(c, &sw_inv[icy][3][1], 3, 0);
-
-    mult_6x6(a1, b, c);
-    add_6x6(b, a1, a0);
-    get_3x3_block_matrix(&lswp[0], b, 0, 0);
-    get_3x3_block_matrix(&lswp[1], b, 0, 3);
-    get_3x3_block_matrix(&lswp[2], b, 3, 3);
-    get_3x3_block_matrix(&lswp[3], b, 3, 0);
-
-    sub_6x6(b, a1, a0);
-    get_3x3_block_matrix(&lswm[0], b, 0, 0);
-    get_3x3_block_matrix(&lswm[1], b, 0, 3);
-    get_3x3_block_matrix(&lswm[2], b, 3, 3);
-    get_3x3_block_matrix(&lswm[3], b, 3, 0);
-    
-    /* add up to swm[] and swp[] */
-    _su3_refac_acc(swm[x][0], fac, lswm[0]);
-    _su3_refac_acc(swm[x][1], fac, lswm[1]);
-    _su3_refac_acc(swm[x][2], fac, lswm[2]);
-    _su3_refac_acc(swm[x][3], fac, lswm[3]);
-    _su3_refac_acc(swp[x][0], fac, lswp[0]);
-    _su3_refac_acc(swp[x][1], fac, lswp[1]);
-    _su3_refac_acc(swp[x][2], fac, lswp[2]);
-    _su3_refac_acc(swp[x][3], fac, lswp[3]);
-#ifndef OMP
-    ++icy;
-#endif
-  }
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-  return;
-}
-
-
-// direct product of Y_e(o) and X_e(o) in colour space   
-// with insertion matrix at site x
-// see equation (22) of hep-lat/9603008                  
-// result is again stored in swm and swp                 
-// includes a gamma5 multiplication for kk
-
-void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll, 
-	       const double fac) {
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-
-  int ioff;
-  int icx;
-  int x;
-  const spinor *r,*s;
-  su3 ALIGN v0,v1,v2,v3;
-  su3 ALIGN u0,u1,u2,u3;
-  su3 ALIGN lswp[4],lswm[4];
-
-  if(ieo == 0) {
-    ioff=0;
-  } 
-  else {
-    ioff=(VOLUME+RAND)/2;
-  }
-  /************************ loop over half of the lattice sites ***********/
-
-#ifdef OMP
-#pragma omp for
-#endif  
-  for(icx = ioff; icx < (VOLUME/2+ioff); icx++) {
-    x = g_eo2lexic[icx];
-    r = kk + icx - ioff;
-    s = ll + icx - ioff;
-    
-    _vector_tensor_vector(v0,(*r).s0,(*s).s0);
-    _vector_tensor_vector(v1,(*r).s0,(*s).s1);
-    _vector_tensor_vector(v2,(*r).s1,(*s).s1);
-    _vector_tensor_vector(v3,(*r).s1,(*s).s0);
-    // mvector takes g5 into account
-    _mvector_tensor_vector(u0,(*r).s2,(*s).s2);
-    _mvector_tensor_vector(u1,(*r).s2,(*s).s3);
-    _mvector_tensor_vector(u2,(*r).s3,(*s).s3);
-    _mvector_tensor_vector(u3,(*r).s3,(*s).s2);
-    
-    /* compute the insertion matrix */
-    _su3_plus_su3(lswp[0],u0,v0);
-    _su3_plus_su3(lswp[1],u1,v1);
-    _su3_plus_su3(lswp[2],u2,v2);
-    _su3_plus_su3(lswp[3],u3,v3);
-
-    _su3_minus_su3(lswm[0],u0,v0);
-    _su3_minus_su3(lswm[1],u1,v1);
-    _su3_minus_su3(lswm[2],u2,v2);
-    _su3_minus_su3(lswm[3],u3,v3);
-    
-    /* add up to swm[0] and swp[0] */
-    _su3_refac_acc(swm[x][0], fac, lswm[0]);
-    _su3_refac_acc(swm[x][1], fac, lswm[1]);
-    _su3_refac_acc(swm[x][2], fac, lswm[2]);
-    _su3_refac_acc(swm[x][3], fac, lswm[3]);
-    _su3_refac_acc(swp[x][0], fac, lswp[0]);
-    _su3_refac_acc(swp[x][1], fac, lswp[1]);
-    _su3_refac_acc(swp[x][2], fac, lswp[2]);
-    _su3_refac_acc(swp[x][3], fac, lswp[3]);
-  }
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-  return;
-}
-
-// now we sum up all term from the clover term
-// after sw_spinor and sw_deriv have been called
-
-void sw_all(hamiltonian_field_t * const hf, const double kappa, 
-	    const double c_sw) {
-#ifdef OMP
-#pragma omp parallel
-  {
-#endif
-
-  int k,l;
-  int x,xpk,xpl,xmk,xml,xpkml,xplmk,xmkml;
-  const su3 *w1,*w2,*w3,*w4;
-  double ka_csw_8 = kappa*c_sw/8.;
-  su3 ALIGN v1,v2,vv1,vv2,plaq;
-  su3 ALIGN vis[4][4];
-
-#ifdef OMP
-#pragma omp for
-#endif
-  for(x = 0; x < VOLUME; x++) {
-    _minus_itimes_su3_plus_su3(vis[0][1],swm[x][1],swm[x][3]);
-    _su3_minus_su3(vis[0][2],swm[x][1],swm[x][3]);
-    _itimes_su3_minus_su3(vis[0][3],swm[x][2],swm[x][0]);
-    
-    _minus_itimes_su3_plus_su3(vis[2][3],swp[x][1],swp[x][3]);
-    _su3_minus_su3(vis[1][3],swp[x][3],swp[x][1]);
-    _itimes_su3_minus_su3(vis[1][2],swp[x][2],swp[x][0]);
-
-    // project to the traceless anti-hermitian part
-    _su3_dagger(v1,vis[0][1]); 
-    _su3_minus_su3(vis[0][1],vis[0][1],v1);
-    _su3_dagger(v1,vis[0][2]); 
-    _su3_minus_su3(vis[0][2],vis[0][2],v1);
-    _su3_dagger(v1,vis[0][3]); 
-    _su3_minus_su3(vis[0][3],vis[0][3],v1);
-    _su3_dagger(v1,vis[2][3]); 
-    _su3_minus_su3(vis[2][3],vis[2][3],v1);
-    _su3_dagger(v1,vis[1][3]); 
-    _su3_minus_su3(vis[1][3],vis[1][3],v1);
-    _su3_dagger(v1,vis[1][2]); 
-    _su3_minus_su3(vis[1][2],vis[1][2],v1);
-    
-    for(k = 0; k < 4; k++) {
-      for(l = k+1; l < 4; l++) {
-	xpk=g_iup[x][k];
-	xpl=g_iup[x][l];
-	xmk=g_idn[x][k];
-	xml=g_idn[x][l];
-	xpkml=g_idn[xpk][l];
-	xplmk=g_idn[xpl][k];
-	xmkml=g_idn[xml][k];
-	w1=&hf->gaugefield[x][k];
-	w2=&hf->gaugefield[xpk][l];
-	w3=&hf->gaugefield[xpl][k];   /*dag*/
-	w4=&hf->gaugefield[x][l];     /*dag*/
-	
-	_su3_times_su3(v1,*w1,*w2);
-	_su3_times_su3(v2,*w4,*w3);
-	_su3_times_su3d(plaq,v1,v2);
-	
-	_su3_times_su3(vv1,plaq,vis[k][l]);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[x][k], -2.*ka_csw_8, vv1);
-
-	_su3d_times_su3(vv2,*w1,vv1); 
-	_su3_times_su3(vv1,vv2,*w1);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xpk][l], -2.*ka_csw_8, vv1);
-	
-	_su3_times_su3(vv2,vis[k][l],plaq); 
-	_su3_dagger(vv1,vv2);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[x][l], -2.*ka_csw_8, vv1);
-
-	_su3d_times_su3(vv2,*w4,vv1); 
-	_su3_times_su3(vv1,vv2,*w4);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xpl][k], -2.*ka_csw_8, vv1);
-	
-	w1=&hf->gaugefield[x][l];
-	w2=&hf->gaugefield[xplmk][k];   /*dag*/
-	w3=&hf->gaugefield[xmk][l];     /*dag*/
-	w4=&hf->gaugefield[xmk][k];
-	_su3_times_su3d(v1,*w1,*w2);
-	_su3d_times_su3(v2,*w3,*w4);
-	_su3_times_su3(plaq,v1,v2);
-	
-	_su3_times_su3(vv1,plaq,vis[k][l]);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[x][l], -2.*ka_csw_8, vv1);
-	
-	_su3_dagger(vv1,v1); 
-	_su3_times_su3d(vv2,vv1,vis[k][l]);
-	_su3_times_su3d(vv1,vv2,v2);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xplmk][k], -2.*ka_csw_8, vv1);
-
-	_su3_times_su3(vv2,*w3,vv1); 
-	_su3_times_su3d(vv1,vv2,*w3);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xmk][l], -2.*ka_csw_8, vv1);
-
-	_su3_dagger(vv2,vv1);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xmk][k], -2.*ka_csw_8, vv2);
-	
-	w1=&hf->gaugefield[xmk][k];   /*dag*/
-	w2=&hf->gaugefield[xmkml][l]; /*dag*/
-	w3=&hf->gaugefield[xmkml][k];
-	w4=&hf->gaugefield[xml][l];
-	_su3_times_su3(v1,*w2,*w1);
-	_su3_times_su3(v2,*w3,*w4);
-	
-	_su3_times_su3d(vv1,*w1,vis[k][l]);
-	_su3_times_su3d(vv2,vv1,v2);
-	_su3_times_su3(vv1,vv2,*w2);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xmk][k], -2.*ka_csw_8, vv1);
-
-	_su3_times_su3(vv2,*w2,vv1); 
-	_su3_times_su3d(vv1,vv2,*w2);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xmkml][l], -2.*ka_csw_8, vv1);
-
-	_su3_dagger(vv2,vv1);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xmkml][k], -2.*ka_csw_8, vv2);
-
-	_su3d_times_su3(vv1,*w3,vv2); 
-	_su3_times_su3(vv2,vv1,*w3);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xml][l], -2.*ka_csw_8, vv2);
-	
-	w1=&hf->gaugefield[xml][l];   /*dag*/
-	w2=&hf->gaugefield[xml][k];
-	w3=&hf->gaugefield[xpkml][l];
-	w4=&hf->gaugefield[x][k];     /*dag*/
-	_su3d_times_su3(v1,*w1,*w2);
-	_su3_times_su3d(v2,*w3,*w4);
-	
-	_su3_times_su3d(vv1,*w1,vis[k][l]);
-	_su3_times_su3d(vv2,vv1,v2);
-	_su3_times_su3d(vv1,vv2,*w2);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xml][l], -2.*ka_csw_8, vv1);
-	
-	_su3_dagger(vv2,vv1);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xml][k], -2.*ka_csw_8, vv2);
-
-	_su3d_times_su3(vv1,*w2,vv2); 
-	_su3_times_su3(vv2,vv1,*w2);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[xpkml][l], -2.*ka_csw_8, vv2);
-
-	_su3_dagger(vv2,v2);  
-	_su3_times_su3d(vv1,vv2,v1);
-	_su3_times_su3d(vv2,vv1,vis[k][l]);
- 	_trace_lambda_mul_add_assign_nonlocal(hf->derivative[x][k], -2.*ka_csw_8, vv2);
-      }
-    }
-  }
-#ifdef OMP
-  } /* OpenMP closing brace */
-#endif
-  return;
-}
 
 su3 * _swp;
 
diff --git a/operator/clover_leaf.h b/operator/clover_leaf.h
index 3171d9cc6..ff942fc87 100644
--- a/operator/clover_leaf.h
+++ b/operator/clover_leaf.h
@@ -25,6 +25,7 @@
 #include "hamiltonian_field.h"
 
 extern su3 ** swm, ** swp;
+extern const double tiny_t;
 
 void sw_term(const su3 ** const gf, const double kappa, const double c_sw);
 double sw_trace(const int ieo, const double mu);
@@ -37,4 +38,57 @@ void sw_spinor(const int ieo, const spinor * const kk, const spinor * const ll,
 void sw_all(hamiltonian_field_t * const hf, const double kappa, const double c_sw);
 int init_swpm(const int V);
 
+void mult_6x6(_Complex double a[6][6], _Complex double b[6][6], _Complex double d[6][6]);
+void add_6x6(_Complex double a[6][6], _Complex double b[6][6], _Complex double d[6][6]);
+void sub_6x6(_Complex double a[6][6], _Complex double b[6][6], _Complex double d[6][6]);
+void copy_6x6(_Complex double a[6][6], const _Complex double b[6][6]);
+
+/*definitions needed for the functions sw_trace(int ieo) and sw_trace(int ieo)*/
+inline void populate_6x6_matrix(_Complex double a[6][6], const su3 * const C, const int row, const int col) {
+  a[0+row][0+col] = C->c00;
+  a[0+row][1+col] = C->c01;
+  a[0+row][2+col] = C->c02;
+  a[1+row][0+col] = C->c10;
+  a[1+row][1+col] = C->c11;
+  a[1+row][2+col] = C->c12;
+  a[2+row][0+col] = C->c20;
+  a[2+row][1+col] = C->c21;
+  a[2+row][2+col] = C->c22;
+  return;
+}
+
+inline void get_3x3_block_matrix(su3 * const C, _Complex double a[6][6], const int row, const int col) {
+  C->c00 = a[0+row][0+col];
+  C->c01 = a[0+row][1+col];
+  C->c02 = a[0+row][2+col];
+  C->c10 = a[1+row][0+col];
+  C->c11 = a[1+row][1+col];
+  C->c12 = a[1+row][2+col];
+  C->c20 = a[2+row][0+col];
+  C->c21 = a[2+row][1+col];
+  C->c22 = a[2+row][2+col];
+  return;
+}
+
+// This function computes the trace-log part of the clover term
+// in case of even/odd preconditioning
+//
+// it is expected that sw_term is called beforehand such that
+// the array sw is populated properly
+
+inline void add_tm(_Complex double a[6][6], const double mu) {
+  for(int i = 0; i < 6; i++) {
+    a[i][i] += I*mu;
+  }
+  return;
+}
+
+inline void add_shift_6x6(_Complex double a[6][6], const double mshift) {
+  for(int i = 0; i < 6; i++) {
+    a[i][i] += mshift;
+  }
+  return;
+}
+
+
 #endif
diff --git a/operator/clover_term.c b/operator/clover_term.c
new file mode 100644
index 000000000..02273048d
--- /dev/null
+++ b/operator/clover_term.c
@@ -0,0 +1,204 @@
+/***********************************************************************
+ *
+ * Copyright (C) 1995 Ulli Wolff, Stefan Sint
+ *               2001,2005 Martin Hasenbusch
+ *               2011,2012 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#ifdef SSE
+# undef SSE
+#endif
+#ifdef SSE2
+# undef SSE2
+#endif
+#ifdef SSE3
+# undef SSE3
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <errno.h>
+#include <time.h>
+#ifdef MPI
+# include <mpi.h>
+#endif
+#ifdef OMP
+# include <omp.h>
+#endif
+#include "global.h"
+#include "su3.h"
+#include "sse.h"
+#include "su3adj.h"
+#include "operator/clovertm_operators.h"
+#include "operator/clover_leaf.h"
+
+// the clover term is written as
+//
+//   1 + T_{xa\alpha,yb\beta} 
+// = 1 + i csw kappa/2 sigma_munu^alphabeta F_munu^ab(x)delta_xy
+//
+// see hep-lat/9603008 for all glory details
+//
+// per site we have to store two six-by-six complex matrices.
+// As the off-diagonal 3x3 matrices are just inverse to
+// each other, we get away with two times three 3x3 complex matrices
+//
+// these are stored in the array sw[VOLUME][3][2] of type su3
+// where x is the space time index
+// a runs from 0 to 2
+// b runs from 0 to 1
+// sw[x][0][0] is the upper diagonal 3x3 matrix 
+// sw[x][1][0] the upper off-diagnoal 3x3 matrix
+// sw[x][2][0] the lower diagonal 3x3 matrix
+// the lower off-diagonal 3x3 matrix would be the inverser of sw[x][1][0]
+// 
+// identical convention for the second six-by-six matrix
+// just with second index set to 1
+//
+// so the application of the clover term 
+// plus twisted mass term to a spinor would just be
+// 
+// r_0 = sw[0][0] s_0 + sw[1][0] s_1 + i mu s_0
+// r_1 = sw[1][0]^-1 s_0 + sw[2][0] s_1 + i mu s_1
+// r_2 = sw[0][1] s_2 + sw[1][1] s_3 - i mu s_2
+// r_3 = sw[1][1]^-1 s_2 + sw[2][1] s_3 - i mu s_3
+//
+// suppressing space-time indices
+
+void sw_term(const su3 ** const gf, const double kappa, const double c_sw) {
+#ifdef OMP
+#pragma omp parallel
+  {
+#endif
+
+  int k,l;
+  int x,xpk,xpl,xmk,xml,xpkml,xplmk,xmkml;
+  const su3 *w1,*w2,*w3,*w4;
+  double ka_csw_8 = kappa*c_sw/8.;
+  su3 ALIGN v1,v2,plaq;
+  su3 ALIGN fkl[4][4];
+  su3 ALIGN magnetic[4],electric[4];
+  su3 ALIGN aux;
+  
+
+  /*  compute the clover-leave */
+  /*  l  __   __
+        |  | |  |
+        |__| |__|
+         __   __
+        |  | |  |
+        |__| |__| k  */
+  
+#ifdef OMP
+#pragma omp for
+#endif
+  for(x = 0; x < VOLUME; x++) {
+    for(k = 0; k < 4; k++) {
+      for(l = k+1; l < 4; l++) {
+	xpk=g_iup[x][k];
+	xpl=g_iup[x][l];
+	xmk=g_idn[x][k];
+	xml=g_idn[x][l];
+	xpkml=g_idn[xpk][l];
+	xplmk=g_idn[xpl][k];
+	xmkml=g_idn[xml][k];
+	w1=&gf[x][k];
+	w2=&gf[xpk][l];
+	w3=&gf[xpl][k];
+	w4=&gf[x][l];
+	_su3_times_su3(v1,*w1,*w2);
+	_su3_times_su3(v2,*w4,*w3);
+	_su3_times_su3d(plaq,v1,v2);
+	w1=&gf[x][l];
+	w2=&gf[xplmk][k];
+	w3=&gf[xmk][l];
+	w4=&gf[xmk][k];
+	_su3_times_su3d(v1,*w1,*w2);
+	_su3d_times_su3(v2,*w3,*w4);
+	_su3_times_su3_acc(plaq,v1,v2);
+	w1=&gf[xmk][k];
+	w2=&gf[xmkml][l];
+	w3=&gf[xmkml][k];
+	w4=&gf[xml][l];
+	_su3_times_su3(v1,*w2,*w1);
+	_su3_times_su3(v2,*w3,*w4);
+	_su3d_times_su3_acc(plaq,v1,v2);
+	w1=&gf[xml][l];
+	w2=&gf[xml][k];
+	w3=&gf[xpkml][l];
+	w4=&gf[x][k];
+	_su3d_times_su3(v1,*w1,*w2);
+	_su3_times_su3d(v2,*w3,*w4);
+	_su3_times_su3_acc(plaq,v1,v2);
+	_su3_dagger(v2,plaq); 
+	_su3_minus_su3(fkl[k][l],plaq,v2);
+      }
+    }
+
+    // this is the one in flavour and colour space
+    // twisted mass term is treated in clover, sw_inv and
+    // clover_gamma5 and the corresponding nd versions
+    _su3_one(sw[x][0][0]);
+    _su3_one(sw[x][2][0]);
+    _su3_one(sw[x][0][1]);
+    _su3_one(sw[x][2][1]);
+    
+    for(k = 1; k < 4; k++)
+    {
+      _su3_assign(electric[k], fkl[0][k]);
+    }
+    _su3_assign(magnetic[1], fkl[2][3]);
+    _su3_minus_assign(magnetic[2], fkl[1][3]);
+    _su3_assign(magnetic[3], fkl[1][2]);
+    
+    /*  upper left block 6x6 matrix  */
+    
+    _itimes_su3_minus_su3(aux,electric[3],magnetic[3]);
+    _su3_refac_acc(sw[x][0][0],ka_csw_8,aux);
+    
+    _itimes_su3_minus_su3(aux,electric[1],magnetic[1]);
+    _su3_minus_su3(v2,electric[2],magnetic[2]); 
+    _su3_acc(aux,v2);
+    _real_times_su3(sw[x][1][0],ka_csw_8,aux);
+    
+    _itimes_su3_minus_su3(aux,magnetic[3],electric[3]);
+    _su3_refac_acc(sw[x][2][0],ka_csw_8,aux);
+
+    /*  lower right block 6x6 matrix */
+    
+    _itimes_su3_plus_su3(aux,electric[3],magnetic[3]);
+    _su3_refac_acc(sw[x][0][1],(-ka_csw_8),aux);
+
+    _itimes_su3_plus_su3(aux,electric[1],magnetic[1]);
+    _su3_plus_su3(v2,electric[2],magnetic[2]); 
+    _su3_acc(aux,v2);
+    _real_times_su3(sw[x][1][1],(-ka_csw_8),aux);
+
+    _itimes_su3_plus_su3(aux,magnetic[3],electric[3]);
+    _su3_refac_acc(sw[x][2][1],ka_csw_8,aux);
+  }
+#ifdef OMP
+  } /* OpenMP closing brace */
+#endif
+  return;
+}
diff --git a/solver/eigenvalues.c b/solver/eigenvalues.c
index b6a057b77..1f81444b2 100644
--- a/solver/eigenvalues.c
+++ b/solver/eigenvalues.c
@@ -44,12 +44,12 @@
 #include <io/gauge.h>
 #include <io/spinor.h>
 #include <io/utils.h>
-#include "tm_operators.h"
+#include "operator/tm_operators.h"
 #include "solver/solver.h"
 #include "solver/jdher.h"
 #include "solver/matrix_mult_typedef.h"
 #include "linalg_eo.h"
-#include "Dov_psi.h"
+#include "operator/Dov_psi.h"
 #include "eigenvalues.h"
 #include "gettime.h"
 
diff --git a/xchange/Makefile.in b/xchange/Makefile.in
index dc6f7e1bd..d5a0c9ca4 100644
--- a/xchange/Makefile.in
+++ b/xchange/Makefile.in
@@ -4,7 +4,7 @@ top_builddir =  @top_builddir@
 abs_top_builddir = @abs_top_builddir@
 top_srcdir = @top_srcdir@
 abs_top_srcdir = @abs_top_srcdir@
-subdir = linalg
+subdir = xchange
 builddir = @builddir@
 
 CFLAGS = @CFLAGS@

From a2279bfb6ddf4df90721c99f872fdfc964f1fc62 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 19 Oct 2012 11:54:07 +0200
Subject: [PATCH 073/110] moved init related stuff into subdir init

---
 LapH_ev.c                                             |  4 +---
 Makefile.in                                           | 11 +++--------
 benchmark.c                                           |  7 +------
 check_locallity.c                                     |  6 +-----
 configure.in                                          |  4 ++--
 gen_sources.c                                         |  3 +--
 hmc_tm.c                                              | 10 +---------
 hopping_test.c                                        |  6 +-----
 init_bispinor_field.c => init/init_bispinor_field.c   |  0
 init_bispinor_field.h => init/init_bispinor_field.h   |  0
 .../init_chi_spinor_field.c                           |  0
 .../init_chi_spinor_field.h                           |  0
 .../init_dirac_halfspinor.c                           |  0
 .../init_dirac_halfspinor.h                           |  0
 init_gauge_field.c => init/init_gauge_field.c         |  0
 init_gauge_field.h => init/init_gauge_field.h         |  0
 init_gauge_tmp.c => init/init_gauge_tmp.c             |  0
 init_gauge_tmp.h => init/init_gauge_tmp.h             |  0
 .../init_geometry_indices.c                           |  0
 .../init_geometry_indices.h                           |  0
 init_jacobi_field.c => init/init_jacobi_field.c       |  0
 init_jacobi_field.h => init/init_jacobi_field.h       |  0
 init_moment_field.c => init/init_moment_field.c       |  0
 init_moment_field.h => init/init_moment_field.h       |  0
 .../init_omp_accumulators.c                           |  0
 .../init_omp_accumulators.h                           |  0
 init_spinor_field.c => init/init_spinor_field.c       |  0
 init_spinor_field.h => init/init_spinor_field.h       |  0
 .../init_stout_smear_vars.c                           |  0
 .../init_stout_smear_vars.h                           |  0
 invert.c                                              |  9 +--------
 monomial/ndpoly_monomial.c                            |  2 +-
 operator.c                                            |  1 -
 operator/Dov_psi.c                                    |  1 -
 operator/Hopping_Matrix.c                             |  2 +-
 operator/tm_sub_Hopping_Matrix.c                      |  2 +-
 operator/tm_times_Hopping_Matrix.c                    |  2 +-
 phmc.c                                                |  3 +--
 solver/dirac_operator_eigenvectors.c                  |  1 -
 spinor_fft.c                                          |  2 +-
 test/check_overlap.c                                  |  8 +-------
 test/test_eigenvalues.c                               |  7 +------
 test_lemon.c                                          |  3 +--
 update_tm.c                                           |  2 +-
 xchange/xchange_halffield.c                           |  2 +-
 45 files changed, 23 insertions(+), 75 deletions(-)
 rename init_bispinor_field.c => init/init_bispinor_field.c (100%)
 rename init_bispinor_field.h => init/init_bispinor_field.h (100%)
 rename init_chi_spinor_field.c => init/init_chi_spinor_field.c (100%)
 rename init_chi_spinor_field.h => init/init_chi_spinor_field.h (100%)
 rename init_dirac_halfspinor.c => init/init_dirac_halfspinor.c (100%)
 rename init_dirac_halfspinor.h => init/init_dirac_halfspinor.h (100%)
 rename init_gauge_field.c => init/init_gauge_field.c (100%)
 rename init_gauge_field.h => init/init_gauge_field.h (100%)
 rename init_gauge_tmp.c => init/init_gauge_tmp.c (100%)
 rename init_gauge_tmp.h => init/init_gauge_tmp.h (100%)
 rename init_geometry_indices.c => init/init_geometry_indices.c (100%)
 rename init_geometry_indices.h => init/init_geometry_indices.h (100%)
 rename init_jacobi_field.c => init/init_jacobi_field.c (100%)
 rename init_jacobi_field.h => init/init_jacobi_field.h (100%)
 rename init_moment_field.c => init/init_moment_field.c (100%)
 rename init_moment_field.h => init/init_moment_field.h (100%)
 rename init_omp_accumulators.c => init/init_omp_accumulators.c (100%)
 rename init_omp_accumulators.h => init/init_omp_accumulators.h (100%)
 rename init_spinor_field.c => init/init_spinor_field.c (100%)
 rename init_spinor_field.h => init/init_spinor_field.h (100%)
 rename init_stout_smear_vars.c => init/init_stout_smear_vars.c (100%)
 rename init_stout_smear_vars.h => init/init_stout_smear_vars.h (100%)

diff --git a/LapH_ev.c b/LapH_ev.c
index 93e1d6aa8..8442b133c 100644
--- a/LapH_ev.c
+++ b/LapH_ev.c
@@ -50,11 +50,9 @@
 #include "read_input.h"
 #include "start.h"
 #include "xchange/xchange.h"
-#include "init_gauge_field.h"
-#include "init_geometry_indices.h"
+#include "init/init.h"
 #include "mpi_init.h"
 #include "solver/eigenvalues_Jacobi.h"
-#include "init_jacobi_field.h"
 
 int main(int argc,char *argv[])
 {
diff --git a/Makefile.in b/Makefile.in
index 75ffc92f2..9da193912 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -52,17 +52,12 @@ MODULES = read_input gamma hybrid_update measure_gauge_action start \
 	source_generation boundary update_tm ranlxd  \
 	mpi_init deriv_Sb deriv_Sb_D_psi ranlxs \
 	geometry_eo invert_overlap \
-	init_moment_field init_gauge_tmp prepare_source \
-	init_gauge_field init_geometry_indices init_spinor_field \
-	init_dirac_halfspinor \
-	chebyshev_polynomial_nd Ptilde_nd  \
-	init_chi_spinor_field reweighting_factor_nd \
-	init_bispinor_field \
+	prepare_source chebyshev_polynomial_nd Ptilde_nd  \
+	reweighting_factor_nd \
 	online_measurement update_momenta integrator  phmc \
 	little_D block operator measurements pion_norm \
 	temporalgauge spinor_fft X_psi P_M_eta \
-	jacobi init_jacobi_field \
-	fatal_error invert_clover_eo gettime @SPI_FILES@ init_omp_accumulators
+	jacobi fatal_error invert_clover_eo gettime @SPI_FILES@
 
 ## the GPU modules (all .cu files in $GPUDIR)
 GPUSOURCES := $(wildcard $(srcdir)/$(GPUDIR)/*.cu)
diff --git a/benchmark.c b/benchmark.c
index 1968fba7e..8bef32b6d 100644
--- a/benchmark.c
+++ b/benchmark.c
@@ -44,7 +44,6 @@
 #endif
 #ifdef OMP
 # include <omp.h>
-# include "init_omp_accumulators.h"
 #endif
 #include "gettime.h"
 #include "su3.h"
@@ -59,11 +58,7 @@
 #include "operator/tm_operators.h"
 #include "global.h"
 #include "xchange/xchange.h"
-#include "init_gauge_field.h"
-#include "init_geometry_indices.h"
-#include "init_spinor_field.h"
-#include "init_moment_field.h"
-#include "init_dirac_halfspinor.h"
+#include "init/init.h"
 #include "test/check_geometry.h"
 #include "operator/D_psi.h"
 #include "phmc.h"
diff --git a/check_locallity.c b/check_locallity.c
index 49cc92dbf..3711edccd 100644
--- a/check_locallity.c
+++ b/check_locallity.c
@@ -54,11 +54,7 @@
 #include "sighandler.h"
 #include "boundary.h"
 #include "solver/solver.h"
-#include "init_gauge_field.h"
-#include "init_geometry_indices.h"
-#include "init_spinor_field.h"
-#include "init_moment_field.h"
-#include "init_dirac_halfspinor.h"
+#include "init/init.h"
 #include "smearing/stout.h"
 #include "su3spinor.h"
 #include "invert_eo.h"
diff --git a/configure.in b/configure.in
index 35e30f55c..3929f03e5 100644
--- a/configure.in
+++ b/configure.in
@@ -40,7 +40,7 @@ AC_CHECK_PROG(CCDEP, gcc, "gcc", "$CC")
 LDFLAGS="$LDFLAGS -L\${HOME}/lib -L\${top_builddir}/lib"
 CCLD=${CC}
 
-USESUBDIRS="buffers cu io solver linalg monomial xchange operator"
+USESUBDIRS="buffers cu io solver linalg monomial xchange operator init"
 
 AC_CHECK_HEADERS([stdint.h],
 [ dnl for inttypes.h and stdint.h for uint_xxx types
@@ -897,7 +897,7 @@ if test ! -e tests/regressions; then
 fi
 
 
-LIBS="-lhmc -lmonomial -loperator -lsolver -lxchange -llinalg -lhmc -lio $LIBS"
+LIBS="-lhmc -lmonomial -loperator -lsolver -lxchange -linit -llinalg -lhmc -lio $LIBS"
 AUTOCONF=autoconf
 
 for i in $USESUBDIRS
diff --git a/gen_sources.c b/gen_sources.c
index 3049b4f16..478b778c6 100644
--- a/gen_sources.c
+++ b/gen_sources.c
@@ -48,10 +48,9 @@
 #include "read_input.h"
 #include "mpi_init.h"
 #include "source_generation.h"
-#include "init_geometry_indices.h"
+#include "init/init.h"
 #include "linalg_eo.h"
 #include "phmc.h"
-#include "init_spinor_field.h"
 
 
 void usage() {
diff --git a/hmc_tm.c b/hmc_tm.c
index c8e3e47c0..9fb71c351 100644
--- a/hmc_tm.c
+++ b/hmc_tm.c
@@ -43,7 +43,6 @@
 #endif
 #ifdef OMP
 # include <omp.h>
-# include "init_omp_accumulators.h"
 #endif
 #include "global.h"
 #include "git_hash.h"
@@ -62,14 +61,7 @@
 #include "mpi_init.h"
 #include "sighandler.h"
 #include "update_tm.h"
-#include "init_gauge_field.h"
-#include "init_geometry_indices.h"
-#include "init_spinor_field.h"
-#include "init_moment_field.h"
-#include "init_gauge_tmp.h"
-#include "init_dirac_halfspinor.h"
-#include "init_bispinor_field.h"
-#include "init_chi_spinor_field.h"
+#include "init/init.h"
 #include "test/check_geometry.h"
 #include "boundary.h"
 #include "phmc.h"
diff --git a/hopping_test.c b/hopping_test.c
index 14a1299f3..bedb82b52 100644
--- a/hopping_test.c
+++ b/hopping_test.c
@@ -57,11 +57,7 @@
 #include "operator/tm_operators.h"
 #include "global.h"
 #include "xchange/xchange.h"
-#include "init_gauge_field.h"
-#include "init_geometry_indices.h"
-#include "init_spinor_field.h"
-#include "init_moment_field.h"
-#include "init_dirac_halfspinor.h"
+#include "init/init.h"
 #include "test/check_geometry.h"
 #include "operator/D_psi.h"
 #include "phmc.h"
diff --git a/init_bispinor_field.c b/init/init_bispinor_field.c
similarity index 100%
rename from init_bispinor_field.c
rename to init/init_bispinor_field.c
diff --git a/init_bispinor_field.h b/init/init_bispinor_field.h
similarity index 100%
rename from init_bispinor_field.h
rename to init/init_bispinor_field.h
diff --git a/init_chi_spinor_field.c b/init/init_chi_spinor_field.c
similarity index 100%
rename from init_chi_spinor_field.c
rename to init/init_chi_spinor_field.c
diff --git a/init_chi_spinor_field.h b/init/init_chi_spinor_field.h
similarity index 100%
rename from init_chi_spinor_field.h
rename to init/init_chi_spinor_field.h
diff --git a/init_dirac_halfspinor.c b/init/init_dirac_halfspinor.c
similarity index 100%
rename from init_dirac_halfspinor.c
rename to init/init_dirac_halfspinor.c
diff --git a/init_dirac_halfspinor.h b/init/init_dirac_halfspinor.h
similarity index 100%
rename from init_dirac_halfspinor.h
rename to init/init_dirac_halfspinor.h
diff --git a/init_gauge_field.c b/init/init_gauge_field.c
similarity index 100%
rename from init_gauge_field.c
rename to init/init_gauge_field.c
diff --git a/init_gauge_field.h b/init/init_gauge_field.h
similarity index 100%
rename from init_gauge_field.h
rename to init/init_gauge_field.h
diff --git a/init_gauge_tmp.c b/init/init_gauge_tmp.c
similarity index 100%
rename from init_gauge_tmp.c
rename to init/init_gauge_tmp.c
diff --git a/init_gauge_tmp.h b/init/init_gauge_tmp.h
similarity index 100%
rename from init_gauge_tmp.h
rename to init/init_gauge_tmp.h
diff --git a/init_geometry_indices.c b/init/init_geometry_indices.c
similarity index 100%
rename from init_geometry_indices.c
rename to init/init_geometry_indices.c
diff --git a/init_geometry_indices.h b/init/init_geometry_indices.h
similarity index 100%
rename from init_geometry_indices.h
rename to init/init_geometry_indices.h
diff --git a/init_jacobi_field.c b/init/init_jacobi_field.c
similarity index 100%
rename from init_jacobi_field.c
rename to init/init_jacobi_field.c
diff --git a/init_jacobi_field.h b/init/init_jacobi_field.h
similarity index 100%
rename from init_jacobi_field.h
rename to init/init_jacobi_field.h
diff --git a/init_moment_field.c b/init/init_moment_field.c
similarity index 100%
rename from init_moment_field.c
rename to init/init_moment_field.c
diff --git a/init_moment_field.h b/init/init_moment_field.h
similarity index 100%
rename from init_moment_field.h
rename to init/init_moment_field.h
diff --git a/init_omp_accumulators.c b/init/init_omp_accumulators.c
similarity index 100%
rename from init_omp_accumulators.c
rename to init/init_omp_accumulators.c
diff --git a/init_omp_accumulators.h b/init/init_omp_accumulators.h
similarity index 100%
rename from init_omp_accumulators.h
rename to init/init_omp_accumulators.h
diff --git a/init_spinor_field.c b/init/init_spinor_field.c
similarity index 100%
rename from init_spinor_field.c
rename to init/init_spinor_field.c
diff --git a/init_spinor_field.h b/init/init_spinor_field.h
similarity index 100%
rename from init_spinor_field.h
rename to init/init_spinor_field.h
diff --git a/init_stout_smear_vars.c b/init/init_stout_smear_vars.c
similarity index 100%
rename from init_stout_smear_vars.c
rename to init/init_stout_smear_vars.c
diff --git a/init_stout_smear_vars.h b/init/init_stout_smear_vars.h
similarity index 100%
rename from init_stout_smear_vars.h
rename to init/init_stout_smear_vars.h
diff --git a/invert.c b/invert.c
index f3ae7a157..2890026e3 100644
--- a/invert.c
+++ b/invert.c
@@ -41,7 +41,6 @@
 #endif
 #ifdef OMP
 # include <omp.h>
-# include "init_omp_accumulators.h"
 #endif
 #include "global.h"
 #include "git_hash.h"
@@ -60,13 +59,7 @@
 #include "sighandler.h"
 #include "boundary.h"
 #include "solver/solver.h"
-#include "init_gauge_field.h"
-#include "init_geometry_indices.h"
-#include "init_spinor_field.h"
-#include "init_moment_field.h"
-#include "init_dirac_halfspinor.h"
-#include "init_bispinor_field.h"
-#include "init_chi_spinor_field.h"
+#include "init/init.h"
 #include "smearing/stout.h"
 #include "invert_eo.h"
 #include "monomial/monomial.h"
diff --git a/monomial/ndpoly_monomial.c b/monomial/ndpoly_monomial.c
index 9868e4564..6aa45af33 100644
--- a/monomial/ndpoly_monomial.c
+++ b/monomial/ndpoly_monomial.c
@@ -44,7 +44,7 @@
 #include "hamiltonian_field.h"
 #include "boundary.h"
 #include "phmc.h"
-#include "init_chi_spinor_field.h"
+#include "init/init_chi_spinor_field.h"
 #include "solver/matrix_mult_typedef_nd.h"
 #include "operator/clover_leaf.h"
 #include "operator/clovertm_operators.h"
diff --git a/operator.c b/operator.c
index 6797f2802..7279696ec 100644
--- a/operator.c
+++ b/operator.c
@@ -45,7 +45,6 @@
 #include "invert_overlap.h"
 #include "invert_clover_eo.h"
 #include "boundary.h"
-#include "init_chi_spinor_field.h"
 #include "start.h"
 #include "solver/eigenvalues.h"
 #include "solver/solver.h"
diff --git a/operator/Dov_psi.c b/operator/Dov_psi.c
index da133e0d0..d95e2cf4c 100644
--- a/operator/Dov_psi.c
+++ b/operator/Dov_psi.c
@@ -55,7 +55,6 @@
 #include "solver/sub_low_ev.h"
 #include "Dov_psi.h"
 #include "solver/dirac_operator_eigenvectors.h"
-#include "init_spinor_field.h"
 
 void addproj_q_invsqrt(spinor * const Q, spinor * const P, const int n, const int N);
 /* |R>=rnorm^2 Q^2 |S> */
diff --git a/operator/Hopping_Matrix.c b/operator/Hopping_Matrix.c
index 0e96635f0..d75d2e781 100644
--- a/operator/Hopping_Matrix.c
+++ b/operator/Hopping_Matrix.c
@@ -60,7 +60,7 @@
 #  include "xchange/xchange.h"
 #endif
 #include "boundary.h"
-#include "init_dirac_halfspinor.h"
+#include "init/init_dirac_halfspinor.h"
 #include "update_backward_gauge.h"
 #ifdef BGQ
 #  include"DirectPut.h"
diff --git a/operator/tm_sub_Hopping_Matrix.c b/operator/tm_sub_Hopping_Matrix.c
index bb63dc2a8..dd96ca221 100644
--- a/operator/tm_sub_Hopping_Matrix.c
+++ b/operator/tm_sub_Hopping_Matrix.c
@@ -45,7 +45,7 @@
 #  include "xchange/xchange.h"
 #endif
 #include "boundary.h"
-#include "init_dirac_halfspinor.h"
+#include "init/init_dirac_halfspinor.h"
 #include "update_backward_gauge.h"
 #include "tm_sub_Hopping_Matrix.h"
 
diff --git a/operator/tm_times_Hopping_Matrix.c b/operator/tm_times_Hopping_Matrix.c
index 7b9dc882d..183bf737b 100644
--- a/operator/tm_times_Hopping_Matrix.c
+++ b/operator/tm_times_Hopping_Matrix.c
@@ -45,7 +45,7 @@
 #  include "xchange/xchange.h"
 #endif
 #include "boundary.h"
-#include "init_dirac_halfspinor.h"
+#include "init/init_dirac_halfspinor.h"
 #include "update_backward_gauge.h"
 #include "tm_times_Hopping_Matrix.h"
 
diff --git a/phmc.c b/phmc.c
index c5b19bebd..9233606ca 100644
--- a/phmc.c
+++ b/phmc.c
@@ -28,10 +28,9 @@
 #include "global.h"
 
 #include "read_input.h"
-#include "init_bispinor_field.h"
 #include "solver/eigenvalues_bi.h"
 #include "solver/solver.h"
-#include "init_chi_spinor_field.h"
+#include "init/init.h"
 #include "chebyshev_polynomial_nd.h"
 #include "Ptilde_nd.h"
 #include "operator/tm_operators_nd.h"
diff --git a/solver/dirac_operator_eigenvectors.c b/solver/dirac_operator_eigenvectors.c
index 008e58444..26c288831 100644
--- a/solver/dirac_operator_eigenvectors.c
+++ b/solver/dirac_operator_eigenvectors.c
@@ -31,7 +31,6 @@
 #include "operator/D_psi.h"
 #include "ranlxd.h"
 #include "operator/Dov_psi.h"
-#include "init_spinor_field.h"
 
 /*   typedef enum tm_operator_ {PRECWS_DTM,PRECWS_QTM,PRECWS_D_DAGGER_D} tm_operator; */
 
diff --git a/spinor_fft.c b/spinor_fft.c
index 6b9f50152..6fe193a68 100644
--- a/spinor_fft.c
+++ b/spinor_fft.c
@@ -20,7 +20,7 @@
 
 #include "spinor_fft.h"
 #include "mpi_init.h"
-#include "init_spinor_field.h"
+#include "init/init.h"
 
 #ifdef HAVE_FFTW
   #include <fftw3.h>
diff --git a/test/check_overlap.c b/test/check_overlap.c
index a9d89e79c..3fff46272 100644
--- a/test/check_overlap.c
+++ b/test/check_overlap.c
@@ -58,13 +58,7 @@
 #include "sighandler.h"
 #include "boundary.h"
 #include "solver/solver.h"
-#include "init_gauge_field.h"
-#include "init_geometry_indices.h"
-#include "init_spinor_field.h"
-#include "init_moment_field.h"
-#include "init_dirac_halfspinor.h"
-#include "init_bispinor_field.h"
-#include "init_chi_spinor_field.h"
+#include "init/init.h"
 #include "xchange_halffield.h"
 #include "stout_smear.h"
 #include "invert_eo.h"
diff --git a/test/test_eigenvalues.c b/test/test_eigenvalues.c
index dbb213d29..02f00b96a 100644
--- a/test/test_eigenvalues.c
+++ b/test/test_eigenvalues.c
@@ -58,12 +58,7 @@
 #include "sighandler.h"
 #include "hybrid_update.h"
 #include "update_tm.h"
-#include "init_gauge_field.h"
-#include "init_geometry_indices.h"
-#include "init_spinor_field.h"
-#include "init_bispinor_field.h"
-#include "init_moment_field.h"
-#include "init_gauge_tmp.h"
+#include "init/init.h"
 #include "test/check_geometry.h"
 #include "boundary.h"
 #include "polyakov_loop.h"
diff --git a/test_lemon.c b/test_lemon.c
index 8353f08f9..bbbf097ba 100644
--- a/test_lemon.c
+++ b/test_lemon.c
@@ -52,8 +52,7 @@
 #include "boundary.h"
 #include "global.h"
 #include "xchange/xchange.h"
-#include "init_gauge_field.h"
-#include "init_geometry_indices.h"
+#include "init/init.h"
 #include "measure_gauge_action.h"
 #include "mpi_init.h"
 
diff --git a/update_tm.c b/update_tm.c
index 02f4f1cbe..87680a6e7 100644
--- a/update_tm.c
+++ b/update_tm.c
@@ -55,7 +55,7 @@
 #include "expo.h"
 #include "xchange/xchange.h"
 #include "measure_rectangles.h"
-#include "init_gauge_tmp.h"
+#include "init/init_gauge_tmp.h"
 #include "monomial/monomial.h"
 #include "integrator.h"
 #include "hamiltonian_field.h"
diff --git a/xchange/xchange_halffield.c b/xchange/xchange_halffield.c
index d82e53f9d..73106a26e 100644
--- a/xchange/xchange_halffield.c
+++ b/xchange/xchange_halffield.c
@@ -41,7 +41,7 @@
 #endif
 #include "mpi_init.h"
 #include "su3.h"
-#include "init_dirac_halfspinor.h"
+#include "init/init_dirac_halfspinor.h"
 #include "xchange_halffield.h"
 
 #if (defined _USE_HALFSPINOR)

From 1f5beabb569d34de096d3d621950342e81c6b87f Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 19 Oct 2012 11:54:17 +0200
Subject: [PATCH 074/110] moved init related stuff into subdir init

---
 init/Makefile.in | 99 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 init/Makefile.in

diff --git a/init/Makefile.in b/init/Makefile.in
new file mode 100644
index 000000000..42e4ba1e2
--- /dev/null
+++ b/init/Makefile.in
@@ -0,0 +1,99 @@
+
+srcdir = @srcdir@
+top_builddir =  @top_builddir@
+abs_top_builddir = @abs_top_builddir@
+top_srcdir = @top_srcdir@
+abs_top_srcdir = @abs_top_srcdir@
+subdir = init
+builddir = @builddir@
+
+CFLAGS = @CFLAGS@
+DEPFLAGS = @DEPFLAGS@
+LDFLAGS = @LDFLAGS@
+DEFS = @DEFS@
+OPTARGS = @OPTARGS@
+SOPTARGS = @SOPTARGS@
+
+AR = @AR@
+RANLIB = @RANLIB@
+CC = @CC@
+CCDEP = @CCDEP@
+CCLD = ${CC}
+LINK = ${CCLD} ${CFLAGS} ${LDFLAGS} ${OPTARGS} -o $@
+LEX = @LEX@
+AUTOCONF = @AUTOCONF@
+DEFS = @DEFS@
+
+INCLUDES = @INCLUDES@
+LDADD =
+#COMPILE = ${CC} ${DEFS} ${INCLUDES} ${CFLAGS}
+COMPILE = ${CC} $(DEFS) ${INCLUDES} ${CFLAGS}
+
+LIBRARIES = libinit
+libinit_TARGETS = init_moment_field init_gauge_tmp init_gauge_field \
+	init_geometry_indices init_spinor_field init_dirac_halfspinor \
+	init_chi_spinor_field init_bispinor_field init_jacobi_field \
+	init_omp_accumulators
+
+libinit_STARGETS = 
+
+libinit_OBJECTS = $(addsuffix .o, ${libinit_TARGETS})
+libinit_SOBJECTS = $(addsuffix .o, ${libinit_STARGETS})
+
+# default rule
+
+all: Makefile dep libinit.a
+
+# rules for debugging
+debug all-debug: CFLAGS := $(CFLAGS) @DEBUG_FLAG@
+debug all-debug: all
+
+# rules for profiling information
+profile all-profile: CFLAGS := $(filter-out -fomit-frame-pointer,${CFLAGS}) @PROFILE_FLAG@
+profile all-profile: all
+
+
+#include dep rules
+
+-include $(addsuffix .d,${libinit_TARGETS})
+
+include ${top_srcdir}/Makefile.global
+
+# rule to compile objects
+
+${libinit_OBJECTS}: %.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/config.h
+	$(COMPILE) ${OPTARGS} -c $<
+
+${libinit_SOBJECTS}: %.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/config.h
+	$(COMPILE) ${SOPTARGS} -c $<
+
+# rule to make libinit
+
+libinit.a: ${libinit_OBJECTS} ${libinit_SOBJECTS} Makefile
+	@rm -f libinit.a
+	@${AR} cru libinit.a ${libinit_OBJECTS} ${libinit_SOBJECTS}
+	@$(RANLIB) libinit.a
+	@cp libinit.a ../lib/libinit.a
+
+# rule to generate .d files
+
+$(addsuffix .d, $(libinit_TARGETS) ${libinit_STARGETS}): %.d: ${srcdir}/%.c Makefile
+	@${CCDEP} ${DEPFLAGS} ${INCLUDES} $< > $@
+
+# rule to make dependencies
+
+dep: ${addsuffix .d, ${libinit_TARGETS} ${libinit_STARGETS}}
+
+# rules to clean
+
+compile-clean: Makefile
+	rm -f ${$(addsuffix _OBJECTS, ${LIBRARIES})} ${$(addsuffix _SOBJECTS, ${LIBRARIES})} *.d
+
+clean: compile-clean 
+	rm -f $(addsuffix .a, ${LIBRARIES})
+	rm -f ../lib/libinit.a
+
+distclean: clean
+	rm -f Makefile
+
+.PHONY: all dep clean compile-clean distclean profile all-profile debug all-debug

From cf07fffe80ec84f2379cb1cc01bd125f68f12c97 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 19 Oct 2012 12:05:06 +0200
Subject: [PATCH 075/110] some inconsistencies due to new directories fixed

---
 configure.in        |  2 +-
 init/init.h         | 41 +++++++++++++++++++++++++++++++++++++++++
 test/overlaptests.c |  2 +-
 xchange/Makefile.in |  2 +-
 xchange/xchange.h   |  3 +++
 5 files changed, 47 insertions(+), 3 deletions(-)
 create mode 100644 init/init.h

diff --git a/configure.in b/configure.in
index 3929f03e5..ed85be644 100644
--- a/configure.in
+++ b/configure.in
@@ -897,7 +897,7 @@ if test ! -e tests/regressions; then
 fi
 
 
-LIBS="-lhmc -lmonomial -loperator -lsolver -lxchange -linit -llinalg -lhmc -lio $LIBS"
+LIBS="-lhmc -lmonomial -loperator -lsolver -linit -llinalg -lhmc -lxchange -lio $LIBS"
 AUTOCONF=autoconf
 
 for i in $USESUBDIRS
diff --git a/init/init.h b/init/init.h
new file mode 100644
index 000000000..eeab4fd4b
--- /dev/null
+++ b/init/init.h
@@ -0,0 +1,41 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifndef _INIT_H
+#define _INIT_H
+
+#include "init/init_bispinor_field.h"
+#include "init/init_chi_spinor_field.h"
+#include "init/init_dirac_halfspinor.h"
+#include "init/init_gauge_field.h"
+#include "init/init_gauge_tmp.h"
+#include "init/init_geometry_indices.h"
+#ifdef WITHLAP
+#  include "init/init_jacobi_field.h"
+#endif
+#include "init/init_moment_field.h"
+#include "init/init_spinor_field.h"
+#include "init/init_stout_smear_vars.h"
+#ifdef OMP
+# include <omp.h>
+# include "init/init_omp_accumulators.h"
+#endif
+
+
+#endif
diff --git a/test/overlaptests.c b/test/overlaptests.c
index 3efa87854..86e097867 100644
--- a/test/overlaptests.c
+++ b/test/overlaptests.c
@@ -16,7 +16,7 @@
 #include "linalg_eo.h"
 #include "start.h"
 #ifdef MPI
-# include "xchange.h"
+# include "xchange/xchange.h"
 #endif
 #include "read_input.h"
 #include "boundary.h"
diff --git a/xchange/Makefile.in b/xchange/Makefile.in
index d5a0c9ca4..30b619f7f 100644
--- a/xchange/Makefile.in
+++ b/xchange/Makefile.in
@@ -32,7 +32,7 @@ COMPILE = ${CC} $(DEFS) ${INCLUDES} ${CFLAGS}
 LIBRARIES = libxchange
 libxchange_TARGETS = xchange_deri xchange_field xchange_gauge xchange_halffield \
 	xchange_lexicfield xchange_2fields xchange_field_tslice \
-	xchange_jacobi 
+	xchange_jacobi
 
 libxchange_STARGETS = 
 
diff --git a/xchange/xchange.h b/xchange/xchange.h
index ffdfa9a48..66f68024a 100644
--- a/xchange/xchange.h
+++ b/xchange/xchange.h
@@ -24,6 +24,9 @@
 #include "xchange/xchange_deri.h"
 #include "xchange/xchange_halffield.h"
 #include "xchange/xchange_jacobi.h"
+#include "xchange/xchange_2fields.h"
+#include "xchange/xchange_lexicfield.h"
+
 #  ifdef _USE_TSPLITPAR
 #    include "xchange/xchange_field_tslice.h"
 #  endif

From 25601c94c401b0cdc0bd399153450ab3a65a1115 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 19 Oct 2012 12:08:04 +0200
Subject: [PATCH 076/110] missing include file added

---
 operator/Dov_psi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/operator/Dov_psi.c b/operator/Dov_psi.c
index d95e2cf4c..68231dbce 100644
--- a/operator/Dov_psi.c
+++ b/operator/Dov_psi.c
@@ -54,6 +54,7 @@
 #include "solver/eigenvalues.h"
 #include "solver/sub_low_ev.h"
 #include "Dov_psi.h"
+#include "init/init.h"
 #include "solver/dirac_operator_eigenvectors.h"
 
 void addproj_q_invsqrt(spinor * const Q, spinor * const P, const int n, const int N);

From 8c28462bc2ac4e2b084101f305d99b167ece0a7c Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 19 Oct 2012 13:43:56 +0200
Subject: [PATCH 077/110] wrong include paths fixed

---
 solver/generate_dfl_subspace.c | 2 +-
 solver/sumr.c                  | 2 +-
 test/overlaptests.c            | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/solver/generate_dfl_subspace.c b/solver/generate_dfl_subspace.c
index fb7a9e14e..a65b4ab94 100644
--- a/solver/generate_dfl_subspace.c
+++ b/solver/generate_dfl_subspace.c
@@ -34,7 +34,7 @@
 #include <complex.h>
 #include "start.h"
 #include "ranlxs.h"
-#include "D_psi.h"
+#include "operator/D_psi.h"
 #include "poly_precon.h"
 #include "Msap.h"
 #include "gmres_precon.h"
diff --git a/solver/sumr.c b/solver/sumr.c
index 312dbccab..e96ab4112 100644
--- a/solver/sumr.c
+++ b/solver/sumr.c
@@ -53,7 +53,7 @@
 #include "gamma.h"
 #include "solver/eigenvalues.h"
 #include "solver/sub_low_ev.h"
-#include "Dov_psi.h"
+#include "operator/Dov_psi.h"
 #include "solver_field.h"
 #include "sumr.h"
 
diff --git a/test/overlaptests.c b/test/overlaptests.c
index 86e097867..b3f810752 100644
--- a/test/overlaptests.c
+++ b/test/overlaptests.c
@@ -21,7 +21,7 @@
 #include "read_input.h"
 #include "boundary.h"
 #include "linalg/convert_eo_to_lexic.h"
-#include "Dov_psi.h"
+#include "operator/Dov_psi.h"
 
 #include "overlaptests.h"
 #include "gamma.h"

From ee5f969aa51238f18abbcd4823a4d0694a847bcc Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 30 Oct 2012 11:04:24 +0100
Subject: [PATCH 078/110] DEFS was missing in the dep rules, fixed

---
 buffers/Makefile.in  | 2 +-
 cu/Makefile.in       | 2 +-
 init/Makefile.in     | 2 +-
 io/Makefile.in       | 2 +-
 linalg/Makefile.in   | 2 +-
 monomial/Makefile.in | 2 +-
 operator/Makefile.in | 3 ++-
 smearing/Makefile.in | 2 +-
 solver/Makefile.in   | 2 +-
 xchange/Makefile.in  | 2 +-
 10 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/buffers/Makefile.in b/buffers/Makefile.in
index dce158d1e..4c9a2806c 100644
--- a/buffers/Makefile.in
+++ b/buffers/Makefile.in
@@ -78,7 +78,7 @@ libbuffers.a: ${libbuffers_OBJECTS} Makefile
 # rule to generate .d files
 
 $(addsuffix .d,$(libbuffers_TARGETS)): %.d: ${srcdir}/%.c Makefile
-	@$(CCDEP) ${DEPFLAGS} ${INCLUDES} $< > $@
+	@$(CCDEP) ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
 
 # rule to make dependencies
 
diff --git a/cu/Makefile.in b/cu/Makefile.in
index a0a076e72..7d63019cc 100644
--- a/cu/Makefile.in
+++ b/cu/Makefile.in
@@ -59,7 +59,7 @@ libcu.a: ${libcu_OBJECTS} Makefile
 # rule to generate .d files
 
 $(addsuffix .d,$(libcu_TARGETS)): %.d: ${srcdir}/%.c Makefile
-	@$(CCDEP) ${DEPFLAGS} ${INCLUDES} $< > $@
+	@$(CCDEP) ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
 
 # rule to make dependencies
 
diff --git a/init/Makefile.in b/init/Makefile.in
index 42e4ba1e2..ef7a22eda 100644
--- a/init/Makefile.in
+++ b/init/Makefile.in
@@ -78,7 +78,7 @@ libinit.a: ${libinit_OBJECTS} ${libinit_SOBJECTS} Makefile
 # rule to generate .d files
 
 $(addsuffix .d, $(libinit_TARGETS) ${libinit_STARGETS}): %.d: ${srcdir}/%.c Makefile
-	@${CCDEP} ${DEPFLAGS} ${INCLUDES} $< > $@
+	@${CCDEP} ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
 
 # rule to make dependencies
 
diff --git a/io/Makefile.in b/io/Makefile.in
index 7b6f877ef..70ab69c58 100644
--- a/io/Makefile.in
+++ b/io/Makefile.in
@@ -111,7 +111,7 @@ libio.a: ${libio_OBJECTS} Makefile
 # rule to generate .d files
 
 $(addsuffix .d,$(libio_TARGETS)): %.d: ${srcdir}/%.c Makefile
-	@$(CCDEP) ${DEPFLAGS} ${INCLUDES} $< > $@
+	@$(CCDEP) ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
 
 # rule to make dependencies
 
diff --git a/linalg/Makefile.in b/linalg/Makefile.in
index 2385cf7da..d7e749859 100644
--- a/linalg/Makefile.in
+++ b/linalg/Makefile.in
@@ -88,7 +88,7 @@ liblinalg.a: ${liblinalg_OBJECTS} ${liblinalg_SOBJECTS} Makefile
 # rule to generate .d files
 
 $(addsuffix .d, $(liblinalg_TARGETS) ${liblinalg_STARGETS}): %.d: ${srcdir}/%.c Makefile
-	@${CCDEP} ${DEPFLAGS} ${INCLUDES} $< > $@
+	@${CCDEP} ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
 
 # rule to make dependencies
 
diff --git a/monomial/Makefile.in b/monomial/Makefile.in
index cc39c90f2..c0064d82c 100644
--- a/monomial/Makefile.in
+++ b/monomial/Makefile.in
@@ -78,7 +78,7 @@ libmonomial.a: ${libmonomial_OBJECTS} ${libmonomial_SOBJECTS} Makefile
 # rule to generate .d files
 
 $(addsuffix .d, $(libmonomial_TARGETS) ${libmonomial_STARGETS}): %.d: ${srcdir}/%.c Makefile
-	@${CCDEP} ${DEPFLAGS} ${INCLUDES} $< > $@
+	@${CCDEP} ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
 
 # rule to make dependencies
 
diff --git a/operator/Makefile.in b/operator/Makefile.in
index a6a1be83f..5742a10de 100644
--- a/operator/Makefile.in
+++ b/operator/Makefile.in
@@ -77,7 +77,8 @@ liboperator.a: ${liboperator_OBJECTS} ${liboperator_SOBJECTS} Makefile
 # rule to generate .d files
 
 $(addsuffix .d, $(liboperator_TARGETS) ${liboperator_STARGETS}): %.d: ${srcdir}/%.c Makefile
-	@${CCDEP} ${DEPFLAGS} ${INCLUDES} $< > $@
+	@${CCDEP} ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
+
 
 # rule to make dependencies
 
diff --git a/smearing/Makefile.in b/smearing/Makefile.in
index 802a69d8f..408565722 100644
--- a/smearing/Makefile.in
+++ b/smearing/Makefile.in
@@ -75,7 +75,7 @@ libsmear.a: ${libsmear_OBJECTS} Makefile
 # rule to generate .d files
 
 $(addsuffix .d,$(libsmear_TARGETS)): %.d: ${srcdir}/%.c Makefile
-	@$(CCDEP) ${DEPFLAGS} ${INCLUDES} $< > $@
+	@$(CCDEP) ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
 
 # rule to make dependencies
 
diff --git a/solver/Makefile.in b/solver/Makefile.in
index 708eee4bc..85d6d266f 100644
--- a/solver/Makefile.in
+++ b/solver/Makefile.in
@@ -80,7 +80,7 @@ libsolver.a: ${libsolver_OBJECTS} Makefile
 # rule to generate .d files
 
 $(addsuffix .d,$(libsolver_TARGETS)): %.d: ${srcdir}/%.c Makefile
-	@$(CCDEP) ${DEPFLAGS} ${INCLUDES} $< > $@
+	@$(CCDEP) ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
 
 # rule to make dependencies
 
diff --git a/xchange/Makefile.in b/xchange/Makefile.in
index 30b619f7f..bfea6a61b 100644
--- a/xchange/Makefile.in
+++ b/xchange/Makefile.in
@@ -77,7 +77,7 @@ libxchange.a: ${libxchange_OBJECTS} ${libxchange_SOBJECTS} Makefile
 # rule to generate .d files
 
 $(addsuffix .d, $(libxchange_TARGETS) ${libxchange_STARGETS}): %.d: ${srcdir}/%.c Makefile
-	@${CCDEP} ${DEPFLAGS} ${INCLUDES} $< > $@
+	@${CCDEP} ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
 
 # rule to make dependencies
 

From a46e8c01c3d34904fd8b9f2ede57438427c1fc83 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Mon, 29 Oct 2012 12:31:23 +0100
Subject: [PATCH 079/110] OpenMP reductions add the reduction variable in an
 undefined order, resulting in differing rounding errors from call to call. To
 alleviate this we use the omp accumulator arrays and collect the result
 manually.

---
 linalg/assign_mul_add_r_and_square.c | 58 +++++++++++++++++++---------
 1 file changed, 39 insertions(+), 19 deletions(-)

diff --git a/linalg/assign_mul_add_r_and_square.c b/linalg/assign_mul_add_r_and_square.c
index c61c0e5e2..a8921af05 100644
--- a/linalg/assign_mul_add_r_and_square.c
+++ b/linalg/assign_mul_add_r_and_square.c
@@ -27,6 +27,7 @@
 #include <complex.h>
 #ifdef OMP
 # include <omp.h>
+# include <global.h>
 #endif
 #include "su3.h"
 #include "assign_mul_add_r_and_square.h"
@@ -42,15 +43,16 @@ double assign_mul_add_r_and_square(spinor * const R, const double c, spinor * co
 #endif
 
 #ifdef OMP
-#pragma omp parallel reduction(+: res)
+#pragma omp parallel
   {
+  int thread_num = omp_get_thread_num();
 #endif
   vector4double x0, x1, x2, x3, x4, x5, y0, y1, y2, y3, y4, y5;
   vector4double z0, z1, z2, z3, z4, z5, k;
   vector4double r0, r1, r2, r3, r4, r5;
   double *s, *r;
   double ALIGN _c = c;
-  res = 0.0;
+  double ALIGN ds = 0.0;
 #ifndef OMP
   __prefetch_by_load(S);
   __prefetch_by_load(R);
@@ -72,7 +74,6 @@ double assign_mul_add_r_and_square(spinor * const R, const double c, spinor * co
 #ifdef OMP
 #pragma omp for 
 #endif
-  //#pragma unroll(4)
   for(int i = 0; i < N; i++) {
     s=(double*)((spinor *) S + i);
     r=(double*)((spinor *) R + i);
@@ -114,11 +115,19 @@ double assign_mul_add_r_and_square(spinor * const R, const double c, spinor * co
   x2 = vec_add(r4, r5);
   y0 = vec_add(x0, x1);
   y1 = vec_add(x2, y0);
-  res = y1[0] + y1[1] + y1[2] + y1[3];
+  ds = y1[0] + y1[1] + y1[2] + y1[3];
 
 #ifdef OMP
+  g_omp_acc_re[thread_num] = ds;
   } /* OpenMP closing brace */
-#endif  
+
+  for(int i = 0; i < omp_num_threads; ++i) {
+    res += g_omp_acc_re[i];
+  }
+#else
+  res = ds;
+#endif
+
 #  ifdef MPI
   if(parallel) {
     MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
@@ -141,12 +150,14 @@ double assign_mul_add_r_and_square(spinor * const R, const double c, const spino
 #endif
 
 #ifdef OMP
-#pragma omp parallel reduction(+ : res)
+#pragma omp parallel
   {
+  int thread_num = omp_get_thread_num();
 #endif
   spinor *r;
   const spinor *s;
-  res = 0.0;
+  double ALIGN ds = 0.0;
+
   /* Change due to even-odd preconditioning : VOLUME   to VOLUME/2 */   
 #ifdef OMP
 #pragma omp for 
@@ -156,36 +167,45 @@ double assign_mul_add_r_and_square(spinor * const R, const double c, const spino
     s = S + ix;
     
     r->s0.c0 = c * r->s0.c0 + s->s0.c0;
-    res += creal(r->s0.c0)*creal(r->s0.c0) + cimag(r->s0.c0)*cimag(r->s0.c0);
+    ds += creal(r->s0.c0)*creal(r->s0.c0) + cimag(r->s0.c0)*cimag(r->s0.c0);
     r->s0.c1 = c * r->s0.c1 + s->s0.c1;
-    res += creal(r->s0.c1)*creal(r->s0.c1) + cimag(r->s0.c1)*cimag(r->s0.c1);
+    ds += creal(r->s0.c1)*creal(r->s0.c1) + cimag(r->s0.c1)*cimag(r->s0.c1);
     r->s0.c2 = c * r->s0.c2 + s->s0.c2;    
-    res += creal(r->s0.c2)*creal(r->s0.c2) + cimag(r->s0.c2)*cimag(r->s0.c2);
+    ds += creal(r->s0.c2)*creal(r->s0.c2) + cimag(r->s0.c2)*cimag(r->s0.c2);
 
     r->s1.c0 = c * r->s1.c0 + s->s1.c0;
-    res += creal(r->s1.c0)*creal(r->s1.c0) + cimag(r->s1.c0)*cimag(r->s1.c0);
+    ds += creal(r->s1.c0)*creal(r->s1.c0) + cimag(r->s1.c0)*cimag(r->s1.c0);
     r->s1.c1 = c * r->s1.c1 + s->s1.c1;
-    res += creal(r->s1.c1)*creal(r->s1.c1) + cimag(r->s1.c1)*cimag(r->s1.c1);
+    ds += creal(r->s1.c1)*creal(r->s1.c1) + cimag(r->s1.c1)*cimag(r->s1.c1);
     r->s1.c2 = c * r->s1.c2 + s->s1.c2;    
-    res += creal(r->s1.c2)*creal(r->s1.c2) + cimag(r->s1.c2)*cimag(r->s1.c2);
+    ds += creal(r->s1.c2)*creal(r->s1.c2) + cimag(r->s1.c2)*cimag(r->s1.c2);
 
     r->s2.c0 = c * r->s2.c0 + s->s2.c0;
-    res += creal(r->s2.c0)*creal(r->s2.c0) + cimag(r->s2.c0)*cimag(r->s2.c0);
+    ds += creal(r->s2.c0)*creal(r->s2.c0) + cimag(r->s2.c0)*cimag(r->s2.c0);
     r->s2.c1 = c * r->s2.c1 + s->s2.c1;
-    res += creal(r->s2.c1)*creal(r->s2.c1) + cimag(r->s2.c1)*cimag(r->s2.c1);
+    ds += creal(r->s2.c1)*creal(r->s2.c1) + cimag(r->s2.c1)*cimag(r->s2.c1);
     r->s2.c2 = c * r->s2.c2 + s->s2.c2;    
-    res += creal(r->s2.c2)*creal(r->s2.c2) + cimag(r->s2.c2)*cimag(r->s2.c2);
+    ds += creal(r->s2.c2)*creal(r->s2.c2) + cimag(r->s2.c2)*cimag(r->s2.c2);
 
     r->s3.c0 = c * r->s3.c0 + s->s3.c0;
-    res += creal(r->s3.c0)*creal(r->s3.c0) + cimag(r->s3.c0)*cimag(r->s3.c0);
+    ds += creal(r->s3.c0)*creal(r->s3.c0) + cimag(r->s3.c0)*cimag(r->s3.c0);
     r->s3.c1 = c * r->s3.c1 + s->s3.c1;
-    res += creal(r->s3.c1)*creal(r->s3.c1) + cimag(r->s3.c1)*cimag(r->s3.c1);
+    ds += creal(r->s3.c1)*creal(r->s3.c1) + cimag(r->s3.c1)*cimag(r->s3.c1);
     r->s3.c2 = c * r->s3.c2 + s->s3.c2;   
-    res += creal(r->s3.c2)*creal(r->s3.c2) + cimag(r->s3.c2)*cimag(r->s3.c2);
+    ds += creal(r->s3.c2)*creal(r->s3.c2) + cimag(r->s3.c2)*cimag(r->s3.c2);
   }
+
 #ifdef OMP
+  g_omp_acc_re[thread_num] = ds;
   } /* OpenMP closing brace */
+
+  for(int i = 0; i < omp_num_threads; ++i) {
+    res += g_omp_acc_re[i];
+  }
+#else
+  res = ds;
 #endif
+
 #  ifdef MPI
   if(parallel) {
     MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);

From c1a9737d84f2bf1f73ca0646b50f1e11782d0a61 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Mon, 29 Oct 2012 13:02:30 +0100
Subject: [PATCH 080/110] define inline helper functions for clover term as
 'static inline' in accordance with C99 standard

---
 operator/clover_deriv.c  |  1 +
 operator/clover_det.c    |  1 +
 operator/clover_inline.h | 68 ++++++++++++++++++++++++++++++++++++++++
 operator/clover_invert.c |  1 +
 operator/clover_leaf.h   | 48 ----------------------------
 5 files changed, 71 insertions(+), 48 deletions(-)
 create mode 100644 operator/clover_inline.h

diff --git a/operator/clover_deriv.c b/operator/clover_deriv.c
index 5db20b46f..47f9f77de 100644
--- a/operator/clover_deriv.c
+++ b/operator/clover_deriv.c
@@ -51,6 +51,7 @@
 #include "su3adj.h"
 #include "operator/clovertm_operators.h"
 #include "operator/clover_leaf.h"
+#include "operator/clover_inline.h"
 
 // this is (-tr(1+T_ee(+mu)) -tr(1+T_ee(-mu)))      
 // (or T_oo of course)
diff --git a/operator/clover_det.c b/operator/clover_det.c
index 2fc276ffa..d52072b3d 100644
--- a/operator/clover_det.c
+++ b/operator/clover_det.c
@@ -51,6 +51,7 @@
 #include "su3adj.h"
 #include "operator/clovertm_operators.h"
 #include "operator/clover_leaf.h"
+#include "operator/clover_inline.h"
 
 #define nm1 5
 void six_det(_Complex double* const rval, _Complex double a[6][6])
diff --git a/operator/clover_inline.h b/operator/clover_inline.h
new file mode 100644
index 000000000..50e053a2d
--- /dev/null
+++ b/operator/clover_inline.h
@@ -0,0 +1,68 @@
+/***********************************************************************
+ *
+ * Copyright (C) 2005 Martin Hasenbusch
+ *               2011 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+/*definitions needed for the functions sw_trace(int ieo) and sw_trace(int ieo)*/
+static inline void populate_6x6_matrix(_Complex double a[6][6], const su3 * const C, const int row, const int col) {
+  a[0+row][0+col] = C->c00;
+  a[0+row][1+col] = C->c01;
+  a[0+row][2+col] = C->c02;
+  a[1+row][0+col] = C->c10;
+  a[1+row][1+col] = C->c11;
+  a[1+row][2+col] = C->c12;
+  a[2+row][0+col] = C->c20;
+  a[2+row][1+col] = C->c21;
+  a[2+row][2+col] = C->c22;
+  return;
+}
+
+static inline void get_3x3_block_matrix(su3 * const C, _Complex double a[6][6], const int row, const int col) {
+  C->c00 = a[0+row][0+col];
+  C->c01 = a[0+row][1+col];
+  C->c02 = a[0+row][2+col];
+  C->c10 = a[1+row][0+col];
+  C->c11 = a[1+row][1+col];
+  C->c12 = a[1+row][2+col];
+  C->c20 = a[2+row][0+col];
+  C->c21 = a[2+row][1+col];
+  C->c22 = a[2+row][2+col];
+  return;
+}
+
+// This function computes the trace-log part of the clover term
+// in case of even/odd preconditioning
+//
+// it is expected that sw_term is called beforehand such that
+// the array sw is populated properly
+
+static inline void add_tm(_Complex double a[6][6], const double mu) {
+  for(int i = 0; i < 6; i++) {
+    a[i][i] += I*mu;
+  }
+  return;
+}
+
+static inline void add_shift_6x6(_Complex double a[6][6], const double mshift) {
+  for(int i = 0; i < 6; i++) {
+    a[i][i] += mshift;
+  }
+  return;
+}
+
diff --git a/operator/clover_invert.c b/operator/clover_invert.c
index 1e2b3dd3b..b9a241bb6 100644
--- a/operator/clover_invert.c
+++ b/operator/clover_invert.c
@@ -51,6 +51,7 @@
 #include "su3adj.h"
 #include "operator/clovertm_operators.h"
 #include "operator/clover_leaf.h"
+#include "operator/clover_inline.h"
 
 /*
   !--------------------------------------------------------------!
diff --git a/operator/clover_leaf.h b/operator/clover_leaf.h
index ff942fc87..44db299cf 100644
--- a/operator/clover_leaf.h
+++ b/operator/clover_leaf.h
@@ -43,52 +43,4 @@ void add_6x6(_Complex double a[6][6], _Complex double b[6][6], _Complex double d
 void sub_6x6(_Complex double a[6][6], _Complex double b[6][6], _Complex double d[6][6]);
 void copy_6x6(_Complex double a[6][6], const _Complex double b[6][6]);
 
-/*definitions needed for the functions sw_trace(int ieo) and sw_trace(int ieo)*/
-inline void populate_6x6_matrix(_Complex double a[6][6], const su3 * const C, const int row, const int col) {
-  a[0+row][0+col] = C->c00;
-  a[0+row][1+col] = C->c01;
-  a[0+row][2+col] = C->c02;
-  a[1+row][0+col] = C->c10;
-  a[1+row][1+col] = C->c11;
-  a[1+row][2+col] = C->c12;
-  a[2+row][0+col] = C->c20;
-  a[2+row][1+col] = C->c21;
-  a[2+row][2+col] = C->c22;
-  return;
-}
-
-inline void get_3x3_block_matrix(su3 * const C, _Complex double a[6][6], const int row, const int col) {
-  C->c00 = a[0+row][0+col];
-  C->c01 = a[0+row][1+col];
-  C->c02 = a[0+row][2+col];
-  C->c10 = a[1+row][0+col];
-  C->c11 = a[1+row][1+col];
-  C->c12 = a[1+row][2+col];
-  C->c20 = a[2+row][0+col];
-  C->c21 = a[2+row][1+col];
-  C->c22 = a[2+row][2+col];
-  return;
-}
-
-// This function computes the trace-log part of the clover term
-// in case of even/odd preconditioning
-//
-// it is expected that sw_term is called beforehand such that
-// the array sw is populated properly
-
-inline void add_tm(_Complex double a[6][6], const double mu) {
-  for(int i = 0; i < 6; i++) {
-    a[i][i] += I*mu;
-  }
-  return;
-}
-
-inline void add_shift_6x6(_Complex double a[6][6], const double mshift) {
-  for(int i = 0; i < 6; i++) {
-    a[i][i] += mshift;
-  }
-  return;
-}
-
-
 #endif

From efc5cca225d9df91c93d9a1c69b6a15101c567a1 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Mon, 29 Oct 2012 13:01:03 +0100
Subject: [PATCH 081/110] add empty definition of prefetch_halfspinor to
 non-vectorized version of halfspinor_hopping

---
 operator/halfspinor_hopping.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/operator/halfspinor_hopping.h b/operator/halfspinor_hopping.h
index fa1cfc956..bc7f29222 100644
--- a/operator/halfspinor_hopping.h
+++ b/operator/halfspinor_hopping.h
@@ -1149,6 +1149,7 @@
 #else
 
 #define _prefetch_spinor(s)
+#define _prefetch_halfspinor(hs)
 #define _prefetch_su3(U)
 
 #define _hop_t_p_pre32()				\

From 411f79db4518300d3f6abefd0dd947dd2ab9785d Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Wed, 31 Oct 2012 17:08:38 +0100
Subject: [PATCH 082/110] Optimize build for parallel make. Move away from
 "all-recursive" construct in favor of a "subdirectories" construct without a
 loop.

$(SUBDIRS):
  $(MAKE) --directory=$@

This allows make to parallelize building files from different directories
at the same time. Up to now, make only built files from each directory in
parallel, resulting in the "operator" directory taking a long time to build
with highly optimizing compilers.

In addition, shuffle build order to encourage modules that take a long time
to compile to start early in the compilation process.
---
 Makefile.global      | 17 +++--------------
 Makefile.in          | 33 ++++++++++-----------------------
 configure.in         |  3 ++-
 operator/Makefile.in | 11 ++---------
 4 files changed, 17 insertions(+), 47 deletions(-)

diff --git a/Makefile.global b/Makefile.global
index 42400f9a1..5369f4d99 100644
--- a/Makefile.global
+++ b/Makefile.global
@@ -23,23 +23,13 @@ $(abs_top_builddir)/config.h: $(top_srcdir)/config.h.in $(abs_top_builddir)/conf
 $(top_srcdir)/configure: $(top_srcdir)/configure.in 
 	-( cd $(top_srcdir) && $(AUTOCONF) )
 
-#extern modules
-
-$(addsuffix .o,$(EXTERNMODULES)): %.o:
-	( cd $(dir $@) && ${MAKE} $(notdir $@) )
-
-#extern libs
-
-$(LINKLIBS): %.a: Makefile $(abs_top_builddir)/config.status $(top_srcdir)/configure
-	@( cd $(dir $@) && ${MAKE} $(notdir $@) )
-
 #dep rules
 
 # PROGRAMS_WITH_GIT_HASH require git_hash.h which is dynamically built by a phony make target
 # to prevent too frequent building of git_hash (slowing down the build)
 # we filter the list of all objects and treat these separately
 $(addsuffix .d, $(filter-out ${PROGRAMS_WITH_GIT_HASH},${ALLOBJ})): %.d: ${srcdir}/%.c Makefile
-	@ $(CCDEP) ${DEPFLAGS} ${CPPFLAGS} ${INCLUDES} ${DEFS} $< > $@
+	@ $(CCDEP) ${DEPFLAGS} ${DEFS} ${INCLUDES} $< > $@
 
 # dirty hack to prevent make from entering an infinite loop because a phony target is given as a real
 # dependency (make will build invert.d and hmc_tm.d indefinitely)
@@ -49,13 +39,12 @@ $(addsuffix .d, $(filter-out ${PROGRAMS_WITH_GIT_HASH},${ALLOBJ})): %.d: ${srcdi
 # irrelevant because it will be rebuilt during the compilation of either invert or hmc_tm
 ifneq (git_hash.h, $(findstring git_hash.h,$(wildcard $(top_srcdir)/git_has*.h)))
 $(addsuffix .d, $(filter ${PROGRAMS_WITH_GIT_HASH},${ALLOBJ})): %.d: ${srcdir}/%.c ${top_srcdir}/git_hash.h Makefile
-	@ $(CCDEP) ${DEPFLAGS} ${CPPFLAGS} ${INCLUDES} ${DEFS} $< > $@
+	@ $(CCDEP) ${DEPFLAGS} ${DEFS} ${INCLUDES} $< > $@
 else
 $(addsuffix .d, $(filter ${PROGRAMS_WITH_GIT_HASH},${ALLOBJ})): %.d: ${srcdir}/%.c Makefile
-	@ $(CCDEP) ${DEPFLAGS} ${CPPFLAGS} ${INCLUDES} ${DEFS} $< > $@
+	@ $(CCDEP) ${DEPFLAGS} ${DEFS} ${INCLUDES} $< > $@
 endif
 
-
 ${top_builddir}/fixed_volume.h: ${top_srcdir}/fixed_volume.h.in ${top_builddir}/config.status
 	cd ${abs_top_builddir} && CONFIG_FILES=fixed_volume.h CONFIG_HEADERS= $(SHELL) ${top_builddir}/config.status
 
diff --git a/Makefile.in b/Makefile.in
index 9da193912..20225e744 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -31,12 +31,8 @@ USESUBDIRS = @USESUBDIRS@
 NVCC = @NVCC@
 GPUMPICOMPILER = @GPUMPICOMPILER@
 
-
 INCLUDES = @INCLUDES@
 LINK = $(CCLD) -o $@ ${LDFLAGS}
-LINKLIBS = ${top_builddir}/linalg/liblinalg.a  \
-	${top_builddir}/solver/libsolver.a ${top_builddir}/io/libio.a \
-	${top_builddir}/buffers/libbuffers.a $(top_builddir)/cu/libcu.a
 
 COMPILE = ${CC} ${DEFS} ${INCLUDES} -o $@ ${CFLAGS}
 
@@ -66,7 +62,6 @@ GPUOBJECTS := $(patsubst $(srcdir)/$(GPUDIR)/%.cu, $(GPUDIR)/%.o, $(GPUSOURCES))
 #GPUSOURCES_C := $(wildcard $(srcdir)/$(GPUDIR)/*.c)
 #GPUOBJECTS_C := $(patsubst $(srcdir)/$(GPUDIR)/%.c, $(GPUDIR)/%.o, $(GPUSOURCES_C))
 
-
 NOOPTMOD = test/check_xchange test/check_geometry
 
 PROGRAMS = hmc_tm benchmark invert gen_sources  \
@@ -78,18 +73,15 @@ SUBDIRS = ${USESUBDIRS}
 # delete the default suffix rules
 .SUFFIXES:
 
-all: Makefile all-recursive dep hmc_tm invert
+all: Makefile dep $(SUBDIRS) hmc_tm invert benchmark
 
-#ifneq (,$(findstring lapack,${LIBS}))
-#all: Makefile all-recursive dep hmc_tm invert
-#else
-#all: Makefile all-recursive dep hmc_tm invert
-#endif
+$(SUBDIRS):
+	$(MAKE) --directory=$@
 
 # run the GIT-VERSION-GEN script to generate version information in git_hash.h
 # making sure that we run in the correct directory
 ${top_srcdir}/git_hash.h:
-	@cd @srcdir@ && sh GIT-VERSION-GEN
+	@ ( cd @srcdir@ && sh GIT-VERSION-GEN )
 
 -include $(addsuffix .d,$(ALLOBJ))
 
@@ -106,9 +98,6 @@ libhmc.a: ${addsuffix .o, ${MODULES} ${SMODULES}} Makefile
 	@$(RANLIB) libhmc.a
 	@cp libhmc.a ${top_builddir}/lib/libhmc.a
 
-#${addsuffix .o, ${ALLOBJ}}: %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir)/config.h
-#	${COMPILE} ${OPTARGS} -c $<
-
 $(addsuffix .o,$(filter-out ${NOOPTMOD},${MODULES})): %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir)/config.h
 	${COMPILE} ${OPTARGS} -c $<
 
@@ -122,7 +111,7 @@ ${addsuffix .o, ${SMODULES}}: %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir
 ${addsuffix .o, ${PROGRAMS}}: %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir)/config.h ${top_srcdir}/git_hash.h
 	${COMPILE} ${OPTARGS} -c $<
 
-${PROGRAMS}: %: %.o libhmc.a all-recursive
+${PROGRAMS}: %: %.o libhmc.a $(SUBDIRS)
 	 ${LINK} $@.o $(GPUOBJECTS) $(GPUOBJECTS_C) $(LIBS)
 
 # The rules for unit tests are kept in a separate file for tidyness
@@ -152,15 +141,13 @@ compile-clean: compile-clean-recursive Makefile
 	rm -f *.o *.d test/*.o test/*.d tests/*.o tests/*.d
 
 clean: clean-recursive Makefile
-	rm -f hmc_tm invert *.o *.d test/*.o test/*.d tests/*.o tests/*.d
+	rm -f benchmark hmc_tm invert *.o *.d test/*.o test/*.d tests/*.o tests/*.d
 
 distclean: distclean-recursive Makefile
-	rm -f hmc_tm hybrid *.o *.d *~ Makefile config.log config.status fixed_volume.h
+	rm -f benchmark hmc_tm invert *.o *.d *~ Makefile config.log config.status fixed_volume.h
 	rm -f config.h
 
-.PHONY: all ${top_srcdir}/git_hash.h clean compile-clean distclean dep install \
-	$(PROGRAMS) all-recursive \
-	all-debug-recursive all-profile-recursive \
+.PHONY: all ${SUBDIRS} ${top_srcdir}/git_hash.h clean compile-clean distclean dep install \
+	all-recursive all-debug-recursive all-profile-recursive \
 	clean-recursive distclean-recursive \
-	compile-clean-recursive $(LINKLIBS) \
-	tests libhmc.a
+	compile-clean-recursive
diff --git a/configure.in b/configure.in
index ed85be644..fe7e452e9 100644
--- a/configure.in
+++ b/configure.in
@@ -40,7 +40,8 @@ AC_CHECK_PROG(CCDEP, gcc, "gcc", "$CC")
 LDFLAGS="$LDFLAGS -L\${HOME}/lib -L\${top_builddir}/lib"
 CCLD=${CC}
 
-USESUBDIRS="buffers cu io solver linalg monomial xchange operator init"
+# compilation in operator is slowest so we do it first, saves time in parallel compiles
+USESUBDIRS="operator linalg solver monomial buffers cu io xchange init"
 
 AC_CHECK_HEADERS([stdint.h],
 [ dnl for inttypes.h and stdint.h for uint_xxx types
diff --git a/operator/Makefile.in b/operator/Makefile.in
index 5742a10de..14638dbb9 100644
--- a/operator/Makefile.in
+++ b/operator/Makefile.in
@@ -30,8 +30,8 @@ LDADD =
 COMPILE = ${CC} $(DEFS) ${INCLUDES} ${CFLAGS}
 
 LIBRARIES = liboperator
-liboperator_TARGETS = clovertm_operators clover_leaf tm_operators_nd clover_term clover_invert \
-	clover_deriv clover_accumulate_deriv clover_det
+liboperator_TARGETS = clover_accumulate_deriv clover_deriv clovertm_operators clover_leaf \
+  tm_operators_nd clover_term clover_invert clover_det
 
 liboperator_STARGETS = Hopping_Matrix_nocom tm_times_Hopping_Matrix Hopping_Matrix \
 	tm_operators tm_sub_Hopping_Matrix D_psi Dov_psi Dov_proj
@@ -51,15 +51,12 @@ debug all-debug: all
 profile all-profile: CFLAGS := $(filter-out -fomit-frame-pointer,${CFLAGS}) @PROFILE_FLAG@
 profile all-profile: all
 
-
 #include dep rules
-
 -include $(addsuffix .d,${liboperator_TARGETS})
 
 include ${top_srcdir}/Makefile.global
 
 # rule to compile objects
-
 ${liboperator_OBJECTS}: %.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/config.h
 	$(COMPILE) ${OPTARGS} -c $<
 
@@ -67,7 +64,6 @@ ${liboperator_SOBJECTS}: %.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/con
 	$(COMPILE) ${SOPTARGS} -c $<
 
 # rule to make liboperator
-
 liboperator.a: ${liboperator_OBJECTS} ${liboperator_SOBJECTS} Makefile
 	@rm -f liboperator.a
 	@${AR} cru liboperator.a ${liboperator_OBJECTS} ${liboperator_SOBJECTS}
@@ -75,13 +71,10 @@ liboperator.a: ${liboperator_OBJECTS} ${liboperator_SOBJECTS} Makefile
 	@cp liboperator.a ../lib/liboperator.a
 
 # rule to generate .d files
-
 $(addsuffix .d, $(liboperator_TARGETS) ${liboperator_STARGETS}): %.d: ${srcdir}/%.c Makefile
 	@${CCDEP} ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
 
-
 # rule to make dependencies
-
 dep: ${addsuffix .d, ${liboperator_TARGETS} ${liboperator_STARGETS}}
 
 # rules to clean

From c8657f80cc752b11e532c3c5ff6532abba6381cc Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Thu, 1 Nov 2012 17:24:21 +0100
Subject: [PATCH 083/110] no iteration number IO for CLOVERNDTRLOG monomial

---
 update_tm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/update_tm.c b/update_tm.c
index 87680a6e7..f742a9412 100644
--- a/update_tm.c
+++ b/update_tm.c
@@ -366,6 +366,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy,
 	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != SFGAUGE 
 	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != NDPOLY
 	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != NDCLOVER
+	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != CLOVERNDTRLOG
 	   && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != CLOVERTRLOG ) {
           fprintf(datafile,"%d %d ",  monomial_list[ Integrator.mnls_per_ts[i][j] ].iter0, 
                   monomial_list[ Integrator.mnls_per_ts[i][j] ].iter1);

From 200bbb9b155870f5803aee9a4e69f766ad7c0665 Mon Sep 17 00:00:00 2001
From: "Albert Deuzeman (ITP Bern)" <deuzeman@itp.unibe.ch>
Date: Tue, 6 Nov 2012 11:51:34 +0100
Subject: [PATCH 084/110] Fix alignment in terms of bytes and relax overly
 aggressive values for ALIGN_BASE from bits.

---
 configure.in | 123 +++++++++++++++++++++++++--------------------------
 1 file changed, 60 insertions(+), 63 deletions(-)

diff --git a/configure.in b/configure.in
index 673682a59..27c5d9f92 100644
--- a/configure.in
+++ b/configure.in
@@ -361,39 +361,36 @@ DEPFLAGS="$DEPFLAGS"
 
   AC_MSG_CHECKING(what alignment we want for arrays)
   AC_ARG_WITH(alignment,
-    [AS_HELP_STRING([--with-alignment[=n]], [align arrays to 0, 16, 32, 64 or 128 bits [default=auto]])],
+    [AS_HELP_STRING([--with-alignment[=n]], [Automatically or expliclty align arrays to byte number: auto, none, 2, 4, 8, 16 [default=auto]])],
     withalign=$withval, withalign=auto)
-  if test "$withalign" = "no"; then
-    withalign=0
-  fi
-  if test "$withalign" = "0"; then
+  if test "$withalign" = "none"; then
     AC_MSG_RESULT(none)
     AC_DEFINE(ALIGN_BASE, 0x00, [Align base])
     AC_DEFINE(ALIGN, [])
+  elif test $withalign = 2; then
+    AC_MSG_RESULT(2 bytes)
+    AC_DEFINE(ALIGN_BASE, 0x01, [Align base])
+    AC_DEFINE(ALIGN, [__attribute__ ((aligned (2)))])
+  elif test $withalign = 4; then
+    AC_MSG_RESULT(4 bytes)
+    AC_DEFINE(ALIGN_BASE, 0x03, [Align base])
+    AC_DEFINE(ALIGN, [__attribute__ ((aligned (4)))])
+  elif test $withalign = 8; then
+    AC_MSG_RESULT(8 bytes)
+    AC_DEFINE(ALIGN_BASE, 0x07, [Align base])
+    AC_DEFINE(ALIGN, [__attribute__ ((aligned (8)))])
   elif test $withalign = 16; then
-    AC_MSG_RESULT(16 bits)
-    AC_DEFINE(ALIGN_BASE, 0x0f, [Align base])
+    AC_MSG_RESULT(16 bytes)
+    AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
     AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
-  elif test $withalign = 32; then
-    AC_MSG_RESULT(32 bits)
-    AC_DEFINE(ALIGN_BASE, 0x1f, [Align base])
-    AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
-  elif test $withalign = 64; then
-    AC_MSG_RESULT(64 bits)
-    AC_DEFINE(ALIGN_BASE, 0x3f, [Align base])
-    AC_DEFINE(ALIGN, [__attribute__ ((aligned (64)))])
-  elif test $withalign = 128; then
-    AC_MSG_RESULT(128 bits)
-    AC_DEFINE(ALIGN_BASE, 0x7f, [Align base])
-    AC_DEFINE(ALIGN, [__attribute__ ((aligned (128)))])
   elif test $withalign = auto; then
-    withautoalign=0
+    withautoalign=1
     AC_MSG_RESULT(auto)
     AC_DEFINE(ALIGN_BASE, 0x00, [Align base])
     AC_DEFINE(ALIGN, [], [])
   else
     AC_MSG_RESULT(Unusable value for array alignment)
-    AC_MSG_ERROR([Only alignment to 0, 16, 32, 64 or 128 bits, or auto alignment available])
+    AC_MSG_ERROR([Allowed values are: auto, none, 2, 4, 8 16])
   fi
 
 
@@ -408,14 +405,14 @@ if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
     AC_MSG_RESULT(yes)
     AC_DEFINE(P4,1,Use Pentium4 instructions)
     if test $withalign = auto; then
-      if test $withautoalign = 0 || test $withautoalign = 16 ||test $withautoalign = 32; then
-        AC_MSG_RESULT(changing array alignment to 64 bits for P4 instructions)
-        AC_DEFINE(ALIGN_BASE, 0x3f, [Align base])
-        AC_DEFINE(ALIGN, [__attribute__ ((aligned (64)))])
-        withautoalign=64
+      if test $withautoalign = 1 || test $withautoalign = 2 ||test $withautoalign = 4; then
+        AC_MSG_RESULT(changing array alignment to 8 bytes for P4 instructions)
+        AC_DEFINE(ALIGN_BASE, 0x07, [Align base])
+        AC_DEFINE(ALIGN, [__attribute__ ((aligned (8)))])
+        withautoalign=8
       fi
-    elif test $withalign = 0 || test $withalign = 16 ||test $withalign = 32; then
-      AC_MSG_ERROR([alignment incompatible with P4 instructions (64 bits required)!])
+    elif test $withalign = none || test $withalign = 2 ||test $withalign = 4; then
+      AC_MSG_ERROR([alignment incompatible with P4 instructions (8 bytes minimum required)!])
     fi
   else
     AC_MSG_RESULT(no)
@@ -429,14 +426,14 @@ if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
     AC_MSG_RESULT(yes)
     AC_DEFINE(OPTERON,1,Use Opteron instructions)
     if test $withalign = auto; then
-      if test $withautoalign = 0 || test $withautoalign = 16; then
-        AC_MSG_RESULT(changing array alignment to 32 bits for Opteron instructions)
-        AC_DEFINE(ALIGN_BASE, 0x1f, [Align base])
-        AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
-        withautoalign=0x1f
+      if test $withautoalign = 1 || test $withautoalign = 2; then
+        AC_MSG_RESULT(changing array alignment to 4 bytes for Opteron instructions)
+        AC_DEFINE(ALIGN_BASE, 0x03, [Align base])
+        AC_DEFINE(ALIGN, [__attribute__ ((aligned (4)))])
+        withautoalign=4
       fi
-    elif test $withalign = 0 || test $withalign = 16; then
-      AC_MSG_ERROR([alignment incompatible with Opteron instructions (32 bits required)!])
+    elif test $withalign = none || test $withalign = 2; then
+      AC_MSG_ERROR([alignment incompatible with Opteron instructions (4 bytes required)!])
     fi
   else
     AC_MSG_RESULT(no)
@@ -449,14 +446,14 @@ if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
   if test $enable_sse2 = yes; then
     AC_MSG_RESULT(yes)
     if test $withalign = auto; then
-      if test $withautoalign = 0 || test $withautoalign = 16; then
-        AC_MSG_WARN(changing array alignment to 32 bits for SSE2 instructions)
-        AC_DEFINE(ALIGN_BASE, 0x1f, [Align base])
-        AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
-        withautoalign=0x32
+      if test $withautoalign = 1 || test $withautoalign = 2; then
+        AC_MSG_WARN(changing array alignment to 4 bytes for SSE2 instructions)
+        AC_DEFINE(ALIGN_BASE, 0x03, [Align base])
+        AC_DEFINE(ALIGN, [__attribute__ ((aligned (4)))])
+        withautoalign=4
       fi
-    elif test $withalign = 0 || test $withalign = 16; then
-      AC_MSG_ERROR([alignment incompatible with SSE2 instructions (32 bits required)]!)
+    elif test $withalign = none || test $withalign = 2; then
+      AC_MSG_ERROR([alignment incompatible with SSE2 instructions (4 bytes required)]!)
     fi
   else
     AC_MSG_RESULT(no)
@@ -469,14 +466,14 @@ if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
   if test $enable_sse3 = yes; then
     AC_MSG_RESULT(yes)
     if test $withalign = auto; then
-      if test $withautoalign = 0 || test $withautoalign = 16; then
-        AC_MSG_RESULT(changing array alignment to 32 bits for SSE3 instructions)
-        AC_DEFINE(ALIGN_BASE, 0x1f, [Align base])
-        AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
-        withautoalign=32
+      if test $withautoalign = 1 || test $withautoalign = 2; then
+        AC_MSG_RESULT(changing array alignment to 4 bytes for SSE3 instructions)
+        AC_DEFINE(ALIGN_BASE, 0x03, [Align base])
+        AC_DEFINE(ALIGN, [__attribute__ ((aligned (4)))])
+        withautoalign=4
       fi
-    elif test $withalign = 0 || test $withalign = 16; then
-      AC_MSG_ERROR([alignment incompatible with SSE2 instructions (32 bits required)])
+    elif test $withalign = none || test $withalign = 2; then
+      AC_MSG_ERROR([alignment incompatible with SSE2 instructions (4 bytes required)])
     fi
   else
     AC_MSG_RESULT(no)
@@ -619,14 +616,14 @@ elif test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm" && test "$host_
   SOPTARGS="-O3"
   AC_DEFINE(BGL,1,[Optimize for Blue Gene/L])
   if test $withalign = auto; then
-    if test $withautoalign = 0; then
-      AC_MSG_RESULT(changing array alignment to 16 bits for BGL instructions)
-      AC_DEFINE(ALIGN_BASE, 0x0f, [Align base])
-      AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
-      withautoalign=16
+    if test $withautoalign = 1; then
+      AC_MSG_RESULT(changing array alignment to 2 bytes for BGL instructions)
+      AC_DEFINE(ALIGN_BASE, 0x01, [Align base])
+      AC_DEFINE(ALIGN, [__attribute__ ((aligned (2)))])
+      withautoalign=2
     fi
-  elif test $withalign = 0; then
-    AC_MSG_ERROR([alignment incompatible with BGL instructions (16 bits required)!])
+  elif test $withalign = none; then
+    AC_MSG_ERROR([alignment incompatible with BGL instructions (2 bytes required)!])
   fi
 
   if test "$XLC" = "yes"; then
@@ -660,14 +657,14 @@ elif test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm" && test "$host_
   AC_DEFINE(BGL,1,[Optimize for Blue Gene/L])
   AC_DEFINE(BGP,1,[Optimize for Blue Gene/P])
   if test $withalign = auto; then
-    if test $withautoalign = 0; then
-      AC_MSG_RESULT(changing array alignment to 16 bits for BGP instructions)
-      AC_DEFINE(ALIGN_BASE, 0x0f, [Align base])
-      AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
-      withautoalign=16
+    if test $withautoalign = 1; then
+      AC_MSG_RESULT(changing array alignment to 2 bytes for BGP instructions)
+      AC_DEFINE(ALIGN_BASE, 0x01, [Align base])
+      AC_DEFINE(ALIGN, [__attribute__ ((aligned (2)))])
+      withautoalign=2
     fi
-  elif test $withalign = 0; then
-    AC_MSG_ERROR([alignment incompatible with BGP instructions (16 bits required)!])
+  elif test $withalign = none; then
+    AC_MSG_ERROR([alignment incompatible with BGP instructions (2 bytes required)!])
   fi
 
   if test "$XLC" = "yes"; then

From 3d61665685296e2bab9c44641a48b6029d400869 Mon Sep 17 00:00:00 2001
From: "Albert Deuzeman (ITP Bern)" <deuzeman@itp.unibe.ch>
Date: Wed, 7 Nov 2012 11:49:13 +0100
Subject: [PATCH 085/110] Changed alignment values for P4 and Opteron, added
 BG/Q QPX alignment support (32 bytes).

---
 configure.in | 134 ++++++++++++++++++++++++++-------------------------
 1 file changed, 69 insertions(+), 65 deletions(-)

diff --git a/configure.in b/configure.in
index 27c5d9f92..9071c4f7c 100644
--- a/configure.in
+++ b/configure.in
@@ -359,39 +359,32 @@ AC_SUBST(SPI_FILES)
 INCLUDES="$INCLUDES -I\$(HOME)/include/ -I. -I\${abs_top_builddir}/  -I\${abs_top_srcdir}/ -I${lime_dir}/include/ -I${lemon_dir}/include/"
 DEPFLAGS="$DEPFLAGS"
 
-  AC_MSG_CHECKING(what alignment we want for arrays)
-  AC_ARG_WITH(alignment,
-    [AS_HELP_STRING([--with-alignment[=n]], [Automatically or expliclty align arrays to byte number: auto, none, 2, 4, 8, 16 [default=auto]])],
-    withalign=$withval, withalign=auto)
-  if test "$withalign" = "none"; then
-    AC_MSG_RESULT(none)
-    AC_DEFINE(ALIGN_BASE, 0x00, [Align base])
-    AC_DEFINE(ALIGN, [])
-  elif test $withalign = 2; then
-    AC_MSG_RESULT(2 bytes)
-    AC_DEFINE(ALIGN_BASE, 0x01, [Align base])
-    AC_DEFINE(ALIGN, [__attribute__ ((aligned (2)))])
-  elif test $withalign = 4; then
-    AC_MSG_RESULT(4 bytes)
-    AC_DEFINE(ALIGN_BASE, 0x03, [Align base])
-    AC_DEFINE(ALIGN, [__attribute__ ((aligned (4)))])
-  elif test $withalign = 8; then
-    AC_MSG_RESULT(8 bytes)
-    AC_DEFINE(ALIGN_BASE, 0x07, [Align base])
-    AC_DEFINE(ALIGN, [__attribute__ ((aligned (8)))])
-  elif test $withalign = 16; then
-    AC_MSG_RESULT(16 bytes)
-    AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
-    AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
-  elif test $withalign = auto; then
-    withautoalign=1
-    AC_MSG_RESULT(auto)
-    AC_DEFINE(ALIGN_BASE, 0x00, [Align base])
-    AC_DEFINE(ALIGN, [], [])
-  else
-    AC_MSG_RESULT(Unusable value for array alignment)
-    AC_MSG_ERROR([Allowed values are: auto, none, 2, 4, 8 16])
-  fi
+AC_MSG_CHECKING(what alignment we want for arrays)
+AC_ARG_WITH(alignment,
+  [AS_HELP_STRING([--with-alignment[=n]], [Automatically or expliclty align arrays to byte number: auto, none, 16, 32 [default=auto]])],
+  withalign=$withval, withalign=auto)
+if test "$withalign" = "none"; then
+  AC_MSG_RESULT(none)
+  withalign=1
+  AC_DEFINE(ALIGN_BASE, 0x00, [Align base])
+  AC_DEFINE(ALIGN, [])
+elif test $withalign = 16; then
+  AC_MSG_RESULT(16 bytes)
+  AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
+  AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
+elif test $withalign = 32; then
+  AC_MSG_RESULT(32 bytes)
+  AC_DEFINE(ALIGN_BASE, 0x1F, [Align base])
+  AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
+elif test $withalign = auto; then
+  withautoalign=1
+  AC_MSG_RESULT(auto)
+  AC_DEFINE(ALIGN_BASE, 0x00, [Align base])
+  AC_DEFINE(ALIGN, [], [])
+else
+  AC_MSG_RESULT(Unusable value for array alignment)
+  AC_MSG_ERROR([Allowed values are: auto, none, 16, 32])
+fi
 
 
 dnl in the following we check for extra options
@@ -405,14 +398,14 @@ if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
     AC_MSG_RESULT(yes)
     AC_DEFINE(P4,1,Use Pentium4 instructions)
     if test $withalign = auto; then
-      if test $withautoalign = 1 || test $withautoalign = 2 ||test $withautoalign = 4; then
-        AC_MSG_RESULT(changing array alignment to 8 bytes for P4 instructions)
-        AC_DEFINE(ALIGN_BASE, 0x07, [Align base])
-        AC_DEFINE(ALIGN, [__attribute__ ((aligned (8)))])
-        withautoalign=8
+      if test $withautoalign -lt 16; then
+        AC_MSG_RESULT(changing array alignment to 16 bytes for P4 instructions)
+        AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
+        AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
+        withautoalign=16
       fi
-    elif test $withalign = none || test $withalign = 2 ||test $withalign = 4; then
-      AC_MSG_ERROR([alignment incompatible with P4 instructions (8 bytes minimum required)!])
+    elif test $withalign -lt 16; then
+      AC_MSG_ERROR([alignment incompatible with P4 instructions (16 bytes required)!])
     fi
   else
     AC_MSG_RESULT(no)
@@ -426,14 +419,14 @@ if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
     AC_MSG_RESULT(yes)
     AC_DEFINE(OPTERON,1,Use Opteron instructions)
     if test $withalign = auto; then
-      if test $withautoalign = 1 || test $withautoalign = 2; then
-        AC_MSG_RESULT(changing array alignment to 4 bytes for Opteron instructions)
-        AC_DEFINE(ALIGN_BASE, 0x03, [Align base])
-        AC_DEFINE(ALIGN, [__attribute__ ((aligned (4)))])
-        withautoalign=4
+      if test $withautoalign -lt 16; then
+        AC_MSG_RESULT(changing array alignment to 16 bytes for Opteron instructions)
+        AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
+        AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
+        withautoalign=16
       fi
-    elif test $withalign = none || test $withalign = 2; then
-      AC_MSG_ERROR([alignment incompatible with Opteron instructions (4 bytes required)!])
+    elif test $withalign -lt 16; then
+      AC_MSG_ERROR([alignment incompatible with Opteron instructions (16 bytes required)!])
     fi
   else
     AC_MSG_RESULT(no)
@@ -445,15 +438,8 @@ if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
     enable_sse2=$enableval, enable_sse2=no)
   if test $enable_sse2 = yes; then
     AC_MSG_RESULT(yes)
-    if test $withalign = auto; then
-      if test $withautoalign = 1 || test $withautoalign = 2; then
-        AC_MSG_WARN(changing array alignment to 4 bytes for SSE2 instructions)
-        AC_DEFINE(ALIGN_BASE, 0x03, [Align base])
-        AC_DEFINE(ALIGN, [__attribute__ ((aligned (4)))])
-        withautoalign=4
-      fi
-    elif test $withalign = none || test $withalign = 2; then
-      AC_MSG_ERROR([alignment incompatible with SSE2 instructions (4 bytes required)]!)
+    if test $withalign != auto && test $withalign -lt 16; then
+      AC_MSG_ERROR([alignment incompatible with SSE2 instructions (16 bytes required)])
     fi
   else
     AC_MSG_RESULT(no)
@@ -465,19 +451,37 @@ if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
     enable_sse3=$enableval, enable_sse3=no)
   if test $enable_sse3 = yes; then
     AC_MSG_RESULT(yes)
-    if test $withalign = auto; then
-      if test $withautoalign = 1 || test $withautoalign = 2; then
-        AC_MSG_RESULT(changing array alignment to 4 bytes for SSE3 instructions)
-        AC_DEFINE(ALIGN_BASE, 0x03, [Align base])
-        AC_DEFINE(ALIGN, [__attribute__ ((aligned (4)))])
-        withautoalign=4
-      fi
-    elif test $withalign = none || test $withalign = 2; then
-      AC_MSG_ERROR([alignment incompatible with SSE2 instructions (4 bytes required)])
+    if test $withalign != auto && $withalign -lt 16; then
+      AC_MSG_ERROR([alignment incompatible with SSE3 instructions (16 bytes required)])
     fi
   else
     AC_MSG_RESULT(no)
   fi
+
+  if test "$enable_sse2" = "yes" || test "$enable_sse3" = "yes"; then
+    if test $withalign = auto; then
+      if test $withautoalign -lt 16; then
+        AC_MSG_RESULT(changing array alignment to 16 bytes for SSE instructions)
+        AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
+        AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
+        withautoalign=16
+      fi
+    fi
+  fi
+fi
+
+dnl We here check for alignment issues with QPX instructions -- this flag has been set earlier
+if test $enable_qpx = yes; then
+  if test $withalign = auto; then
+    if test $withautoalign -lt 32; then
+      AC_MSG_RESULT(changing array alignment to 32 bytes for use of QPX instructions on BG/Q)
+      AC_DEFINE(ALIGN_BASE, 0x1F, [Align base])
+      AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
+      withautoalign=32
+    fi
+  elif test $withalign -lt 32; then
+    AC_MSG_ERROR([alignment incompatible with QPX instructions (32 bytes required)])
+  fi
 fi
 
 AC_MSG_CHECKING(whether we want to use gprof as profiler)

From 94678d8e15bd84f049645d90e843e4f076eabf64 Mon Sep 17 00:00:00 2001
From: "Albert Deuzeman (ITP Bern)" <deuzeman@itp.unibe.ch>
Date: Wed, 7 Nov 2012 11:59:50 +0100
Subject: [PATCH 086/110] Change alignment to an 'enable', rather than a 'with'
 option.

---
 configure.in | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/configure.in b/configure.in
index 9071c4f7c..1e73628c8 100644
--- a/configure.in
+++ b/configure.in
@@ -360,9 +360,9 @@ INCLUDES="$INCLUDES -I\$(HOME)/include/ -I. -I\${abs_top_builddir}/  -I\${abs_to
 DEPFLAGS="$DEPFLAGS"
 
 AC_MSG_CHECKING(what alignment we want for arrays)
-AC_ARG_WITH(alignment,
-  [AS_HELP_STRING([--with-alignment[=n]], [Automatically or expliclty align arrays to byte number: auto, none, 16, 32 [default=auto]])],
-  withalign=$withval, withalign=auto)
+AC_ARG_ENABLE(alignment,
+  [AS_HELP_STRING([--enable-alignment[=n]], [Automatically or expliclty align arrays to byte number: auto, none, 16, 32 [default=auto]])],
+  withalign=$enableval, withalign=auto)
 if test "$withalign" = "none"; then
   AC_MSG_RESULT(none)
   withalign=1
@@ -386,7 +386,6 @@ else
   AC_MSG_ERROR([Allowed values are: auto, none, 16, 32])
 fi
 
-
 dnl in the following we check for extra options
 if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
 
@@ -399,7 +398,7 @@ if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
     AC_DEFINE(P4,1,Use Pentium4 instructions)
     if test $withalign = auto; then
       if test $withautoalign -lt 16; then
-        AC_MSG_RESULT(changing array alignment to 16 bytes for P4 instructions)
+        AC_MSG_RESULT(increasing array alignment to 16 bytes for P4 instructions)
         AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
         AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
         withautoalign=16
@@ -420,7 +419,7 @@ if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
     AC_DEFINE(OPTERON,1,Use Opteron instructions)
     if test $withalign = auto; then
       if test $withautoalign -lt 16; then
-        AC_MSG_RESULT(changing array alignment to 16 bytes for Opteron instructions)
+        AC_MSG_RESULT(increasing array alignment to 16 bytes for Opteron instructions)
         AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
         AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
         withautoalign=16
@@ -461,7 +460,7 @@ if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
   if test "$enable_sse2" = "yes" || test "$enable_sse3" = "yes"; then
     if test $withalign = auto; then
       if test $withautoalign -lt 16; then
-        AC_MSG_RESULT(changing array alignment to 16 bytes for SSE instructions)
+        AC_MSG_RESULT(increasing array alignment to 16 bytes for SSE instructions)
         AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
         AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
         withautoalign=16
@@ -474,7 +473,7 @@ dnl We here check for alignment issues with QPX instructions -- this flag has be
 if test $enable_qpx = yes; then
   if test $withalign = auto; then
     if test $withautoalign -lt 32; then
-      AC_MSG_RESULT(changing array alignment to 32 bytes for use of QPX instructions on BG/Q)
+      AC_MSG_RESULT(increasing array alignment to 32 bytes for use of QPX instructions on BG/Q)
       AC_DEFINE(ALIGN_BASE, 0x1F, [Align base])
       AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
       withautoalign=32

From b5635eb502404252d54102c5e92530c44c7d68ef Mon Sep 17 00:00:00 2001
From: "Albert Deuzeman (ITP Bern)" <deuzeman@itp.unibe.ch>
Date: Thu, 8 Nov 2012 14:23:15 +0100
Subject: [PATCH 087/110] Add consistent BG auto-alignment.

---
 configure.in | 51 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/configure.in b/configure.in
index 1e73628c8..8e4645f02 100644
--- a/configure.in
+++ b/configure.in
@@ -483,6 +483,37 @@ if test $enable_qpx = yes; then
   fi
 fi
 
+dnl Check for alignment associated with (non-QPX) BG optimization.
+dnl This will also result in using 32 byte alignment on MareNostrum, but that should be fairly innocuous.
+if test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm" && test "$host_os" = "blrts"; then
+  if test $withalign = auto; then
+    if test $withautoalign -lt 16; then
+      AC_MSG_RESULT(increasing array alignment to 16 bytes for BG/L optimization)
+      AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
+      AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
+      withautoalign=16
+    fi
+  fi
+elif test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm" && test "$host_os" = "bprts"; then
+  if test $withalign = auto; then
+    if test $withautoalign -lt 16; then
+      AC_MSG_RESULT(increasing array alignment to 16 bytes for BG/P optimization)
+      AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
+      AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
+      withautoalign=16
+    fi
+  fi
+elif test "$host_cpu" = "powerpc64" && test "$host_vendor" = "unknown" && test "$host_os" = "linux-gnu"; then
+  if test $withalign = auto; then
+    if test $withautoalign -lt 32; then
+      AC_MSG_RESULT(increasing array alignment to 32 bytes for BG/Q and generic POWER optimization)
+      AC_DEFINE(ALIGN_BASE, 0x1F, [Align base])
+      AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
+      withautoalign=32
+    fi
+  fi
+fi
+
 AC_MSG_CHECKING(whether we want to use gprof as profiler)
 AC_ARG_WITH(gprof,
   AS_HELP_STRING([--with-gprof], [use of gprof profiler [default=no]]),
@@ -618,16 +649,6 @@ elif test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm" && test "$host_
   OPTARGS="-O3"
   SOPTARGS="-O3"
   AC_DEFINE(BGL,1,[Optimize for Blue Gene/L])
-  if test $withalign = auto; then
-    if test $withautoalign = 1; then
-      AC_MSG_RESULT(changing array alignment to 2 bytes for BGL instructions)
-      AC_DEFINE(ALIGN_BASE, 0x01, [Align base])
-      AC_DEFINE(ALIGN, [__attribute__ ((aligned (2)))])
-      withautoalign=2
-    fi
-  elif test $withalign = none; then
-    AC_MSG_ERROR([alignment incompatible with BGL instructions (2 bytes required)!])
-  fi
 
   if test "$XLC" = "yes"; then
     CFLAGS="-qsrcmsg $CFLAGS"
@@ -659,16 +680,6 @@ elif test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm" && test "$host_
   SOPTARGS="-O3"
   AC_DEFINE(BGL,1,[Optimize for Blue Gene/L])
   AC_DEFINE(BGP,1,[Optimize for Blue Gene/P])
-  if test $withalign = auto; then
-    if test $withautoalign = 1; then
-      AC_MSG_RESULT(changing array alignment to 2 bytes for BGP instructions)
-      AC_DEFINE(ALIGN_BASE, 0x01, [Align base])
-      AC_DEFINE(ALIGN, [__attribute__ ((aligned (2)))])
-      withautoalign=2
-    fi
-  elif test $withalign = none; then
-    AC_MSG_ERROR([alignment incompatible with BGP instructions (2 bytes required)!])
-  fi
 
   if test "$XLC" = "yes"; then
     CFLAGS="-qsrcmsg $CFLAGS"

From 25af88db79f2a80cf6be5568c95c3c5cb7c0a14d Mon Sep 17 00:00:00 2001
From: Andreas Nube <annube@ifh.de>
Date: Fri, 16 Nov 2012 10:54:14 +0100
Subject: [PATCH 088/110] removed "readin" for initial store counter to make
 config file runnable from a fresh directory

---
 sample-input/sample-hmc-poly.input | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sample-input/sample-hmc-poly.input b/sample-input/sample-hmc-poly.input
index c29312bdc..53434d883 100644
--- a/sample-input/sample-hmc-poly.input
+++ b/sample-input/sample-hmc-poly.input
@@ -20,7 +20,6 @@ CGMaxIter = 1000
 UseEvenOdd = yes
 ReversibilityCheck = yes
 ReversibilityCheckIntervall = 100
-InitialStoreCounter = readin
 DebugLevel = 1
 StartCondition = hot
 ComputeEVs = no

From c02b77668ce01b4f8bb8b2e5da30d42c2a83cbf7 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Thu, 22 Nov 2012 15:49:59 +0100
Subject: [PATCH 089/110] generalize initialization of clover_trlog_monomial
 and clovernd_trlog_monomial to multiple CLOVERDET or NDCLOVER monomials in
 the same action

---
 monomial/monomial.c | 99 ++++++++++++++++++++++-----------------------
 1 file changed, 49 insertions(+), 50 deletions(-)

diff --git a/monomial/monomial.c b/monomial/monomial.c
index 0be88e108..1bc98bdf3 100644
--- a/monomial/monomial.c
+++ b/monomial/monomial.c
@@ -46,8 +46,10 @@
 monomial monomial_list[max_no_monomials];
 int no_monomials = 0;
 int no_gauge_monomials = 0;
-int clover_trlog_monomial = 0;
-int clovernd_trlog_monomial = 0;
+int clover_monomials[max_no_monomials];
+int clovernd_monomials[max_no_monomials];
+int no_clover_monomials = 0;
+int no_clovernd_monomials = 0;
 static spinor * _pf;
 spinor ** w_fields;
 const int no_wfields = 4;
@@ -187,11 +189,8 @@ int init_monomials(const int V, const int even_odd_flag) {
 	monomial_list[i].Qp = &Qsw_plus_psi;
 	monomial_list[i].Qm = &Qsw_minus_psi;
 	init_swpm(VOLUME);
-	clover_trlog_monomial = 1;
-	// the following we need to save for the trlog monomial
-	sw_mu = monomial_list[i].mu;
-	sw_k = monomial_list[i].kappa;
-	sw_c = monomial_list[i].c_sw;
+  clover_monomials[no_clover_monomials] = i;
+  no_clover_monomials++;
 	if(g_proc_id == 0 && g_debug_level > 1) {
 	  printf("# Initialised monomial of type CLOVERDET, no_monomials= %d\n", no_monomials);
 	}
@@ -262,11 +261,8 @@ int init_monomials(const int V, const int even_odd_flag) {
 	monomial_list[i].derivativefunction = &cloverndpoly_derivative;
 	monomial_list[i].pf2 = __pf+no*V;
 	monomial_list[i].even_odd_flag = 1;
-	clovernd_trlog_monomial = 1;
-	swn_c = monomial_list[i].c_sw;
-	swn_k = monomial_list[i].kappa;
-	swn_mubar = monomial_list[i].mubar;
-	swn_epsbar = monomial_list[i].epsbar;
+	clovernd_monomials[no_clovernd_monomials] = i;
+  no_clovernd_monomials++;
 	//monomial_list[i].Qsq = &Qsw_pm_ndpsi;
 	//monomial_list[i].Qp = &Qsw_ndpsi;
 	//monomial_list[i].Qm = &Qsw_dagger_ndpsi;
@@ -315,45 +311,48 @@ int init_monomials(const int V, const int even_odd_flag) {
     monomial_list[i].id = i;
     monomial_list[i].even_odd_flag = even_odd_flag;
   }
-  if(clover_trlog_monomial && even_odd_flag) {
-    monomial_list[no_monomials].type = CLOVERTRLOG;
-    strcpy( monomial_list[no_monomials].name, "CLOVERTRLOG");
-    add_monomial(CLOVERTRLOG);
-    monomial_list[no_monomials-1].pf = NULL;
-    monomial_list[no_monomials-1].id = no_monomials-1;
-    // set the parameters according to cloverdet monomial
-    // this need alltogether a more general approach
-    monomial_list[no_monomials-1].c_sw = sw_c;
-    monomial_list[no_monomials-1].mu = sw_mu;
-    monomial_list[no_monomials-1].kappa = sw_k;
-    monomial_list[no_monomials-1].hbfunction = &clover_trlog_heatbath;
-    monomial_list[no_monomials-1].accfunction = &clover_trlog_acc;
-    monomial_list[no_monomials-1].derivativefunction = NULL;
-    monomial_list[no_monomials-1].timescale = 0;
-    monomial_list[no_monomials-1].even_odd_flag = even_odd_flag;
-    if(g_proc_id == 0 && g_debug_level > 1) {
-      printf("# Initialised clover_trlog_monomial, no_monomials= %d\n", no_monomials);
+  /* initialize clovertrlog and cloverndtrlog monomials for all clover and clovernd monomials*/
+  if( even_odd_flag ) {
+    for( int j = 0; j < no_clover_monomials; j++ ) {
+      monomial_list[no_monomials].type = CLOVERTRLOG;
+      strcpy( monomial_list[no_monomials].name, "CLOVERTRLOG");
+      add_monomial(CLOVERTRLOG);
+      monomial_list[no_monomials-1].pf = NULL;
+      monomial_list[no_monomials-1].id = no_monomials-1;
+      // set the parameters according to cloverdet monomial
+      // this need alltogether a more general approach
+      monomial_list[no_monomials-1].c_sw = monomial_list[clover_monomials[j]].c_sw;
+      monomial_list[no_monomials-1].mu = monomial_list[clover_monomials[j]].mu;
+      monomial_list[no_monomials-1].kappa = monomial_list[clover_monomials[j]].kappa;
+      monomial_list[no_monomials-1].hbfunction = &clover_trlog_heatbath;
+      monomial_list[no_monomials-1].accfunction = &clover_trlog_acc;
+      monomial_list[no_monomials-1].derivativefunction = NULL;
+      monomial_list[no_monomials-1].timescale = 0;
+      monomial_list[no_monomials-1].even_odd_flag = even_odd_flag;
+      if(g_proc_id == 0 && g_debug_level > 1) {
+        printf("# Initialised clover_trlog_monomial, no_monomials= %d\n", no_monomials);
+      }
     }
-  }
-  if(clovernd_trlog_monomial && even_odd_flag) {
-    monomial_list[no_monomials].type = CLOVERNDTRLOG;
-    strcpy( monomial_list[no_monomials].name, "CLOVERNDTRLOG");
-    add_monomial(CLOVERNDTRLOG);
-    monomial_list[no_monomials-1].pf = NULL;
-    monomial_list[no_monomials-1].id = no_monomials-1;
-    // set the parameters according to cloverdet monomial
-    // this need alltogether a more general approach
-    monomial_list[no_monomials-1].c_sw = swn_c;
-    monomial_list[no_monomials-1].mubar = swn_mubar;
-    monomial_list[no_monomials-1].epsbar = swn_epsbar;
-    monomial_list[no_monomials-1].kappa = swn_k;
-    monomial_list[no_monomials-1].hbfunction = &clovernd_trlog_heatbath;
-    monomial_list[no_monomials-1].accfunction = &clovernd_trlog_acc;
-    monomial_list[no_monomials-1].derivativefunction = NULL;
-    monomial_list[no_monomials-1].timescale = 0;
-    monomial_list[no_monomials-1].even_odd_flag = 1;
-    if(g_proc_id == 0 && g_debug_level > 1) {
-      printf("# Initialised clovernd_trlog_monomial, no_monomials= %d\n", no_monomials);
+   for( int j = 0; j < no_clovernd_monomials; j++ ) { 
+      monomial_list[no_monomials].type = CLOVERNDTRLOG;
+      strcpy( monomial_list[no_monomials].name, "CLOVERNDTRLOG");
+      add_monomial(CLOVERNDTRLOG);
+      monomial_list[no_monomials-1].pf = NULL;
+      monomial_list[no_monomials-1].id = no_monomials-1;
+      // set the parameters according to cloverdet monomial
+      // this need alltogether a more general approach
+      monomial_list[no_monomials-1].c_sw = monomial_list[clovernd_monomials[j]].c_sw;
+      monomial_list[no_monomials-1].mubar = monomial_list[clovernd_monomials[j]].mubar;
+      monomial_list[no_monomials-1].epsbar = monomial_list[clovernd_monomials[j]].epsbar;
+      monomial_list[no_monomials-1].kappa = monomial_list[clovernd_monomials[j]].kappa;
+      monomial_list[no_monomials-1].hbfunction = &clovernd_trlog_heatbath;
+      monomial_list[no_monomials-1].accfunction = &clovernd_trlog_acc;
+      monomial_list[no_monomials-1].derivativefunction = NULL;
+      monomial_list[no_monomials-1].timescale = 0;
+      monomial_list[no_monomials-1].even_odd_flag = 1;
+      if(g_proc_id == 0 && g_debug_level > 1) {
+        printf("# Initialised clovernd_trlog_monomial, no_monomials= %d\n", no_monomials);
+      }
     }
   }
 

From d9d22ff363e783ff5dada726434d63878568f1b8 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 25 Nov 2012 15:00:13 +0100
Subject: [PATCH 090/110] refurbished the ReproduceRandomNumbers input
 parameter

Setting

ReproduceRandomNumbers = yes

in the input file should now produce really identical random numbers
not depending on MPI or scalar.

Between scalar and MPI I do see a difference in the 11th digit of
deltaH, which is rouding.
---
 P_M_eta.c                          |   2 +-
 Ptilde_nd.c                        |   6 +-
 Ptilde_nd.h                        |   2 +-
 benchmark.c                        |   4 +-
 chebyshev_polynomial.c             |   8 +-
 chebyshev_polynomial.h             |   2 +-
 chebyshev_polynomial_nd.c          |   6 +-
 chebyshev_polynomial_nd.h          |   2 +-
 hopping_test.c                     |   2 +-
 hybrid_update.c                    | 154 +++++-------
 invert.c                           |   6 +-
 monomial/clover_trlog_monomial.c   |   2 +-
 monomial/cloverdet_monomial.c      |   4 +-
 monomial/cloverdetratio_monomial.c |   4 +-
 monomial/clovernd_trlog_monomial.c |   2 +-
 monomial/cloverndpoly_monomial.c   |   6 +-
 monomial/det_monomial.c            |   6 +-
 monomial/detratio_monomial.c       |   6 +-
 monomial/gauge_monomial.c          |   2 +-
 monomial/ndpoly_monomial.c         |  10 +-
 monomial/poly_monomial.c           |   2 +-
 monomial/sf_gauge_monomial.c       |   2 +-
 reweighting_factor.c               |   9 +-
 reweighting_factor_nd.c            |   6 +-
 reweighting_factor_nd.h            |   2 +-
 solver/dfl_projector.c             |  12 +-
 solver/dfl_projector.h             |   6 +-
 solver/mode_number.c               |   4 +-
 start.c                            | 364 ++++++++++++++++-------------
 start.h                            |   7 +-
 update_tm.c                        |   3 +-
 31 files changed, 326 insertions(+), 327 deletions(-)

diff --git a/P_M_eta.c b/P_M_eta.c
index 5c2263089..14e8454b9 100644
--- a/P_M_eta.c
+++ b/P_M_eta.c
@@ -391,7 +391,7 @@ void Check_Approximation(double const mstar) {
   Sin   =calloc(VOLUMEPLUSRAND, sizeof(spinor));
 #endif
 
-  random_spinor_field(Sin, VOLUME, 1);
+  random_spinor_field_lexic(Sin, 0);
 
   s_ = calloc(4*VOLUMEPLUSRAND+1, sizeof(spinor));
   s  = calloc(4, sizeof(spinor*));
diff --git a/Ptilde_nd.c b/Ptilde_nd.c
index f20c70350..3a003063f 100644
--- a/Ptilde_nd.c
+++ b/Ptilde_nd.c
@@ -238,7 +238,7 @@ double chebtilde_eval(int M, double *dd, double s){
 void degree_of_Ptilde(int * _degree, double ** coefs,
 		      const double EVMin, const double EVMax,
 		      const int sloppy_degree, const double acc, 
-		      matrix_mult_nd Qsq) {
+		      matrix_mult_nd Qsq, const int repro) {
   int i, j;
   double temp, temp2;
   int degree;
@@ -307,8 +307,8 @@ void degree_of_Ptilde(int * _degree, double ** coefs,
   if(g_debug_level > 0) {
     /* Ptilde P S P  Ptilde X - X */
     /* for random spinor X        */
-    random_spinor_field(ss,VOLUME/2, 1);
-    random_spinor_field(sc,VOLUME/2, 1);
+    random_spinor_field_eo(ss, repro);
+    random_spinor_field_eo(sc, repro);
 
     Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &ss[0], &sc[0], Qsq);
     Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0], Qsq);
diff --git a/Ptilde_nd.h b/Ptilde_nd.h
index 438777220..dc1cb7200 100644
--- a/Ptilde_nd.h
+++ b/Ptilde_nd.h
@@ -34,6 +34,6 @@ double chebtilde_eval(int M, double *dd, double s);
 void degree_of_Ptilde(int * _degree, double ** coefs, 
 		      const double EVMin, const double EVMax,
 		      const int sloppy_degree, const double acc,
-		      matrix_mult_nd Qsw);
+		      matrix_mult_nd Qsw, const int repro);
 
 #endif
diff --git a/benchmark.c b/benchmark.c
index 8bef32b6d..041019cbe 100644
--- a/benchmark.c
+++ b/benchmark.c
@@ -268,7 +268,7 @@ int main(int argc,char *argv[])
     j_max=2048;
     sdt=0.;
     for (k = 0; k < k_max; k++) {
-      random_spinor_field(g_spinor_field[k], VOLUME/2, 0);
+      random_spinor_field_eo(g_spinor_field[k], reproduce_randomnumber_flag);
     }
     
     while(sdt < 30.) {
@@ -366,7 +366,7 @@ int main(int argc,char *argv[])
     j_max=1;
     sdt=0.;
     for (k=0;k<k_max;k++) {
-      random_spinor_field(g_spinor_field[k], VOLUME, 0);
+      random_spinor_field_lexic(g_spinor_field[k], reproduce_randomnumber_flag);
     }
     
     while(sdt < 3.) {
diff --git a/chebyshev_polynomial.c b/chebyshev_polynomial.c
index caa04667d..05893bbf1 100644
--- a/chebyshev_polynomial.c
+++ b/chebyshev_polynomial.c
@@ -241,7 +241,7 @@ double stopeps=5.0e-16;
 int dop_n_cheby=0;
 double * dop_cheby_coef;
 
-void degree_of_polynomial(){
+void degree_of_polynomial(const int repro){
   int i;
   double temp;
   static int ini=0;
@@ -289,11 +289,11 @@ void degree_of_polynomial(){
    aux3c=calloc(VOLUMEPLUSRAND/2, sizeof(spinor));
 #endif
 
-chebyshev_polynomial(cheb_evmin, cheb_evmax, dop_cheby_coef, N_CHEBYMAX, 0.25);
+   chebyshev_polynomial(cheb_evmin, cheb_evmax, dop_cheby_coef, N_CHEBYMAX, 0.25);
 
    temp=1.0;
-   random_spinor_field(ss,VOLUME/2);
-   random_spinor_field(sc,VOLUME/2);
+   random_spinor_field_eo(ss, repro);
+   random_spinor_field_eo(sc, repro);
 /*   assign(&sc[0], &ss[0],VOLUME/2);
 
   Qtm_pm_psi(&auxs[0], &ss[0]);
diff --git a/chebyshev_polynomial.h b/chebyshev_polynomial.h
index 4a076513c..71c85f17b 100644
--- a/chebyshev_polynomial.h
+++ b/chebyshev_polynomial.h
@@ -31,6 +31,6 @@ void chebyshev_polynomial(double a, double b, double c[], int n, double exponent
 
 void QdaggerQ_power(spinor *R_s, spinor *R_c, double *c, int n, spinor *S_s, spinor *S_c);
 
-void degree_of_polynomial();
+void degree_of_polynomial(const int repro);
 
 #endif
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index 5f805b843..16eccb209 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -104,7 +104,7 @@ double cheb_eval(int M, double *c, double s){
 
 void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
 			     const double EVMin, const double EVMax,
-			     matrix_mult_nd Qsq) { 
+			     matrix_mult_nd Qsq, const int repro) { 
   double temp, temp2;
   int degree_of_p = *_degree_of_p + 1;
 
@@ -130,8 +130,8 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
   
   chebyshev_coefs(EVMin, EVMax, *coefs, degree_of_p, -0.5);
 
-  random_spinor_field(ss,VOLUME/2, 1);
-  random_spinor_field(sc,VOLUME/2, 1);
+  random_spinor_field_eo(ss, repro);
+  random_spinor_field_eo(sc, repro);
 
   if((g_proc_id == g_stdio_proc) && (g_debug_level > 0)){
     printf("# NDPOLY MD Polynomial: EVmin = %e  EVmax = %e  \n", EVMin, EVMax);
diff --git a/chebyshev_polynomial_nd.h b/chebyshev_polynomial_nd.h
index 438ca1898..7eb091663 100644
--- a/chebyshev_polynomial_nd.h
+++ b/chebyshev_polynomial_nd.h
@@ -29,6 +29,6 @@ double cheb_eval(int M, double *c, double s);
 
 void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
 			     const double EVMin, const double EVMax,
-			     matrix_mult_nd Qsq);
+			     matrix_mult_nd Qsq, const int repro);
 
 #endif
diff --git a/hopping_test.c b/hopping_test.c
index bedb82b52..c86694dc2 100644
--- a/hopping_test.c
+++ b/hopping_test.c
@@ -270,7 +270,7 @@ int main(int argc,char *argv[])
     /*initialize the pseudo-fermion fields*/
     j_max=1;
     for (k = 0; k < k_max; k++) {
-      random_spinor_field(g_spinor_field[k], VOLUME/2, 0);
+      random_spinor_field_eo(g_spinor_field[k], reproduce_randomnumber_flag);
     }
 
     if (read_source_flag == 2) { /* save */
diff --git a/hybrid_update.c b/hybrid_update.c
index 1dce0f4c4..ac4eb5c3a 100644
--- a/hybrid_update.c
+++ b/hybrid_update.c
@@ -105,120 +105,88 @@ double moment_energy(su3adj ** const momenta) {
 double init_momenta(const int repro, su3adj ** const momenta) {
   
   su3adj *xm;
-  int i, mu;
+  int i, mu, t0, x, y, z, X, Y, Z, t, id = 0;
+  int coords[4];
 #ifdef MPI
   int k;
   int rlxd_state[105];
 #endif
-  static double y[8];
-  static double tt,tr,ts,kc,ks,sum;
+  double ALIGN yy[8];
+  double ALIGN tt, tr, ts, kc = 0., ks = 0., sum;
   
-  if(repro == 1) {
-    if(g_proc_id==0){
-      kc=0.; 
-      ks=0.;
-      for(i=0;i<VOLUME;i++){ 
-	for(mu=0;mu<4;mu++){
-	  sum=0.;
-	  xm=&momenta[i][mu];
-	  gauss_vector(y,8);
-	  /* from the previous line we get exp(-y^2) distribution */
-	  /* this means that <y^2> = sigma^2 = 1/2 */
-	  /* in order to get <y^2> = 1 distribution ==> *sqrt(2) */
-	  (*xm).d1=1.4142135623731*y[0];
-	  (*xm).d2=1.4142135623731*y[1];
-	  sum+=(*xm).d1*(*xm).d1+(*xm).d2*(*xm).d2;
-	  (*xm).d3=1.4142135623731*y[2];
-	  (*xm).d4=1.4142135623731*y[3];
-	  sum+=(*xm).d3*(*xm).d3+(*xm).d4*(*xm).d4;
-	  (*xm).d5=1.4142135623731*y[4];
-	  (*xm).d6=1.4142135623731*y[5];
-	  sum+=(*xm).d5*(*xm).d5+(*xm).d6*(*xm).d6;
-	  (*xm).d7=1.4142135623731*y[6];
-	  (*xm).d8=1.4142135623731*y[7];
-	  sum+=(*xm).d7*(*xm).d7+(*xm).d8*(*xm).d8;
-	  tr=sum+kc;
-	  ts=tr+ks;
-	  tt=ts-ks;
-	  ks=ts;
-	  kc=tr-tt;
-	}
-      }
+  if(repro) {
 #ifdef MPI
-      /* send the state for the random-number generator to 1 */
+    if(g_proc_id == 0) {
       rlxd_get(rlxd_state);
-      MPI_Send(&rlxd_state[0], 105, MPI_INT, 1, 101, MPI_COMM_WORLD);
-#endif
     }
-    
+    MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
+    rlxd_reset(rlxd_state);
+#endif
+    for(t0 = 0; t0 < g_nproc_t*T; t0++) {
+      t = t0 - T*g_proc_coords[0];
+      coords[0] = t0 / T;
+      for(x = 0; x < g_nproc_x*LX; x++) {
+	X = x - g_proc_coords[1]*LX;
+	coords[1] = x / LX;
+	for(y = 0; y < g_nproc_y*LY; y++) {
+	  Y = y - g_proc_coords[2]*LY;
+	  coords[2] = y / LY;
+	  for(z = 0; z < g_nproc_z*LZ; z++) {
+	    Z = z - g_proc_coords[3]*LZ;
+	    coords[3] = z / LZ;
 #ifdef MPI
-    if(g_proc_id != 0){
-      MPI_Recv(&rlxd_state[0], 105, MPI_INT, g_proc_id-1, 101, MPI_COMM_WORLD, &status);
-      rlxd_reset(rlxd_state);
-      kc=0.; ks=0.;
-      for(i=0;i<VOLUME;i++){ 
-	for(mu=0;mu<4;mu++){
-	  sum=0.;
-	  xm=&momenta[i][mu];
-	  gauss_vector(y,8);
-	  (*xm).d1=1.4142135623731*y[0];
-	  (*xm).d2=1.4142135623731*y[1];
-	  sum+=(*xm).d1*(*xm).d1+(*xm).d2*(*xm).d2;
-	  (*xm).d3=1.4142135623731*y[2];
-	  (*xm).d4=1.4142135623731*y[3];
-	  sum+=(*xm).d3*(*xm).d3+(*xm).d4*(*xm).d4;
-	  (*xm).d5=1.4142135623731*y[4];
-	  (*xm).d6=1.4142135623731*y[5];
-	  sum+=(*xm).d5*(*xm).d5+(*xm).d6*(*xm).d6;
-	  (*xm).d7=1.4142135623731*y[6];
-	  (*xm).d8=1.4142135623731*y[7];
-	  sum+=(*xm).d7*(*xm).d7+(*xm).d8*(*xm).d8;
-	  tr=sum+kc;
-	  ts=tr+ks;
-	  tt=ts-ks;
-	  ks=ts;
-	  kc=tr-tt;
+	    MPI_Cart_rank(g_cart_grid, coords, &id);
+#endif
+	    if(g_cart_id == id) i = g_ipt[t][X][Y][Z];
+	    for(mu = 0; mu < 4; mu++) {
+	      gauss_vector(yy,8);
+	      if(g_cart_id == id) {
+		sum = 0.;
+		xm = &momenta[i][mu];
+		(*xm).d1 = 1.4142135623731*yy[0];
+		(*xm).d2 = 1.4142135623731*yy[1];
+		sum += (*xm).d1*(*xm).d1+(*xm).d2*(*xm).d2;
+		(*xm).d3 = 1.4142135623731*yy[2];
+		(*xm).d4 = 1.4142135623731*yy[3];
+		sum += (*xm).d3*(*xm).d3+(*xm).d4*(*xm).d4;
+		(*xm).d5 = 1.4142135623731*yy[4];
+		(*xm).d6 = 1.4142135623731*yy[5];
+		sum += (*xm).d5*(*xm).d5+(*xm).d6*(*xm).d6;
+		(*xm).d7 = 1.4142135623731*yy[6];
+		(*xm).d8 = 1.4142135623731*yy[7];
+		sum += (*xm).d7*(*xm).d7+(*xm).d8*(*xm).d8;
+		tr = sum+kc;
+		ts = tr+ks;
+		tt = ts-ks;
+		ks = ts;
+		kc = tr-tt;
+	      }
+	    }
+	  }
 	}
       }
-      /* send the state fo the random-number 
-	 generator to next processor */
-      
-      k=g_proc_id+1; 
-      if(k==g_nproc){ 
-	k=0;
-      }
-      rlxd_get(rlxd_state);
-      MPI_Send(&rlxd_state[0], 105, MPI_INT, k, 101, MPI_COMM_WORLD);
     }
-#endif
     kc=0.5*(ks+kc);
-    
-#ifdef MPI
-    if(g_proc_id == 0){
-      MPI_Recv(&rlxd_state[0], 105, MPI_INT, g_nproc-1, 101, MPI_COMM_WORLD, &status);
-      rlxd_reset(rlxd_state);
-    }
-#endif
   }
   else {
     kc=0.; 
     ks=0.;
-    for(i=0;i<VOLUME;i++){ 
-      for(mu=0;mu<4;mu++){
+    for(i = 0; i < VOLUME; i++) { 
+      for(mu = 0; mu < 4; mu++) {
 	sum=0.;
 	xm=&momenta[i][mu];
-	gauss_vector(y,8);
-	(*xm).d1=1.4142135623731*y[0];
-	(*xm).d2=1.4142135623731*y[1];
+	gauss_vector(yy,8);
+	(*xm).d1=1.4142135623731*yy[0];
+	(*xm).d2=1.4142135623731*yy[1];
 	sum+=(*xm).d1*(*xm).d1+(*xm).d2*(*xm).d2;
-	(*xm).d3=1.4142135623731*y[2];
-	(*xm).d4=1.4142135623731*y[3];
+	(*xm).d3=1.4142135623731*yy[2];
+	(*xm).d4=1.4142135623731*yy[3];
 	sum+=(*xm).d3*(*xm).d3+(*xm).d4*(*xm).d4;
-	(*xm).d5=1.4142135623731*y[4];
-	(*xm).d6=1.4142135623731*y[5];
+	(*xm).d5=1.4142135623731*yy[4];
+	(*xm).d6=1.4142135623731*yy[5];
 	sum+=(*xm).d5*(*xm).d5+(*xm).d6*(*xm).d6;
-	(*xm).d7=1.4142135623731*y[6];
-	(*xm).d8=1.4142135623731*y[7];
+	(*xm).d7=1.4142135623731*yy[6];
+	(*xm).d8=1.4142135623731*yy[7];
 	sum+=(*xm).d7*(*xm).d7+(*xm).d8*(*xm).d8;
 	tr=sum+kc;
 	ts=tr+ks;
@@ -228,8 +196,6 @@ double init_momenta(const int repro, su3adj ** const momenta) {
       }
     }
     kc=0.5*(ks+kc);
-    
-
   }
 #ifdef MPI
   MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
diff --git a/invert.c b/invert.c
index 2890026e3..7d9fc19a9 100644
--- a/invert.c
+++ b/invert.c
@@ -450,11 +450,11 @@ int main(int argc, char *argv[])
       /* Compute little Dirac operators */
       /*       alt_block_compute_little_D(); */
       if (g_debug_level > 0) {
-        check_projectors();
-        check_local_D();
+        check_projectors(reproduce_randomnumber_flag);
+        check_local_D(reproduce_randomnumber_flag);
       }
       if (g_debug_level > 1) {
-        check_little_D_inversion();
+        check_little_D_inversion(reproduce_randomnumber_flag);
       }
 
     }
diff --git a/monomial/clover_trlog_monomial.c b/monomial/clover_trlog_monomial.c
index 16b3aa540..e393cee61 100644
--- a/monomial/clover_trlog_monomial.c
+++ b/monomial/clover_trlog_monomial.c
@@ -67,7 +67,7 @@ double clover_trlog_acc(const int id, hamiltonian_field_t * const hf) {
   /*compute the contribution from the clover trlog term */
   mnl->energy1 = -sw_trace(EO, mnl->mu);
   if(g_proc_id == 0 && g_debug_level > 3) {
-    printf("called clover_trlog_acc for id %d dH = %1.4e\n", 
+    printf("called clover_trlog_acc for id %d dH = %1.10e\n", 
 	   id, mnl->energy1 - mnl->energy0);
   }
   return(mnl->energy1 - mnl->energy0);
diff --git a/monomial/cloverdet_monomial.c b/monomial/cloverdet_monomial.c
index e303e31d0..5281aa19b 100644
--- a/monomial/cloverdet_monomial.c
+++ b/monomial/cloverdet_monomial.c
@@ -149,7 +149,7 @@ void cloverdet_heatbath(const int id, hamiltonian_field_t * const hf) {
   sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
   sw_invert(EE, mnl->mu);
 
-  random_spinor_field(mnl->w_fields[0], VOLUME/2, mnl->rngrepro);
+  random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro);
   mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
   
   mnl->Qp(mnl->pf, mnl->w_fields[0]);
@@ -193,7 +193,7 @@ double cloverdet_acc(const int id, hamiltonian_field_t * const hf) {
   g_mu3 = 0.;
   boundary(g_kappa);
   if(g_proc_id == 0 && g_debug_level > 3) {
-    printf("called cloverdet_acc for id %d %d dH = %1.4e\n", 
+    printf("called cloverdet_acc for id %d %d dH = %1.10e\n", 
 	   id, mnl->even_odd_flag, mnl->energy1 - mnl->energy0);
   }
   return(mnl->energy1 - mnl->energy0);
diff --git a/monomial/cloverdetratio_monomial.c b/monomial/cloverdetratio_monomial.c
index 77c3e8edb..95904c9fa 100644
--- a/monomial/cloverdetratio_monomial.c
+++ b/monomial/cloverdetratio_monomial.c
@@ -240,7 +240,7 @@ void cloverdetratio_heatbath(const int id, hamiltonian_field_t * const hf) {
   sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
   sw_invert(EE, mnl->mu);
 
-  random_spinor_field(mnl->w_fields[0], VOLUME/2, mnl->rngrepro);
+  random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro);
   mnl->energy0  = square_norm(mnl->w_fields[0], VOLUME/2, 1);
   
   g_mu3 = mnl->rho;
@@ -291,7 +291,7 @@ double cloverdetratio_acc(const int id, hamiltonian_field_t * const hf) {
   g_mu3 = 0.;
   boundary(g_kappa);
   if(g_proc_id == 0 && g_debug_level > 3) {
-    printf("called cloverdetratio_acc for id %d dH = %1.4e\n", 
+    printf("called cloverdetratio_acc for id %d dH = %1.10e\n", 
 	   id, mnl->energy1 - mnl->energy0);
   }
   return(mnl->energy1 - mnl->energy0);
diff --git a/monomial/clovernd_trlog_monomial.c b/monomial/clovernd_trlog_monomial.c
index 8d4c4c12d..bde805781 100644
--- a/monomial/clovernd_trlog_monomial.c
+++ b/monomial/clovernd_trlog_monomial.c
@@ -67,7 +67,7 @@ double clovernd_trlog_acc(const int id, hamiltonian_field_t * const hf) {
   /*compute the contribution from the clover trlog term */
   mnl->energy1 = -sw_trace_nd(EE, mnl->mubar, mnl->epsbar);
   if(g_proc_id == 0 && g_debug_level > 3) {
-    printf("called clovernd_trlog_acc for id %d dH = %1.4e\n", 
+    printf("called clovernd_trlog_acc for id %d dH = %1.10e\n", 
 	   id, mnl->energy1 - mnl->energy0);
   }
   return(mnl->energy1 - mnl->energy0);
diff --git a/monomial/cloverndpoly_monomial.c b/monomial/cloverndpoly_monomial.c
index 918a5232d..0b7fecf45 100644
--- a/monomial/cloverndpoly_monomial.c
+++ b/monomial/cloverndpoly_monomial.c
@@ -150,10 +150,10 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
 
   mnl->energy0 = 0.;
-  random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, mnl->rngrepro);
+  random_spinor_field_eo(g_chi_up_spinor_field[0], mnl->rngrepro);
   mnl->energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
 
-  random_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2, mnl->rngrepro);
+  random_spinor_field_eo(g_chi_dn_spinor_field[0], mnl->rngrepro);
   mnl->energy0 += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
 
   Qsw_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
@@ -216,7 +216,7 @@ double cloverndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   mnl->energy1 += square_norm(dn0, VOLUME/2, 1);
   
   if(g_proc_id == 0 && g_debug_level > 3) {
-    printf("called cloverndpoly_acc for id %d %d dH = %1.4e\n", id, g_running_phmc, mnl->energy1 - mnl->energy0);
+    printf("called cloverndpoly_acc for id %d %d dH = %1.10e\n", id, g_running_phmc, mnl->energy1 - mnl->energy0);
   }
   return(mnl->energy1 - mnl->energy0);
 }
diff --git a/monomial/det_monomial.c b/monomial/det_monomial.c
index 0e17a8528..e68738ea3 100644
--- a/monomial/det_monomial.c
+++ b/monomial/det_monomial.c
@@ -156,7 +156,7 @@ void det_heatbath(const int id, hamiltonian_field_t * const hf) {
   mnl->iter1 = 0;
 
   if(mnl->even_odd_flag) {
-    random_spinor_field(mnl->w_fields[0], VOLUME/2, mnl->rngrepro);
+    random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro);
     mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
     mnl->Qp(mnl->pf, mnl->w_fields[0]);
@@ -168,7 +168,7 @@ void det_heatbath(const int id, hamiltonian_field_t * const hf) {
     }
   }
   else {
-    random_spinor_field(mnl->w_fields[0], VOLUME, mnl->rngrepro);
+    random_spinor_field_lexic(mnl->w_fields[0], mnl->rngrepro);
     mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME, 1);
 
     Q_plus_psi(mnl->pf, mnl->w_fields[0]);
@@ -232,7 +232,7 @@ double det_acc(const int id, hamiltonian_field_t * const hf) {
   g_mu = g_mu1;
   boundary(g_kappa);
   if(g_proc_id == 0 && g_debug_level > 3) {
-    printf("called det_acc for id %d %d dH = %1.4e\n", 
+    printf("called det_acc for id %d %d dH = %1.10e\n", 
 	   id, mnl->even_odd_flag, mnl->energy1 - mnl->energy0);
   }
   return(mnl->energy1 - mnl->energy0);
diff --git a/monomial/detratio_monomial.c b/monomial/detratio_monomial.c
index bd3536790..f95095688 100644
--- a/monomial/detratio_monomial.c
+++ b/monomial/detratio_monomial.c
@@ -198,7 +198,7 @@ void detratio_heatbath(const int id, hamiltonian_field_t * const hf) {
   mnl->iter0 = 0;
   mnl->iter1 = 0;
   if(mnl->even_odd_flag) {
-    random_spinor_field(mnl->w_fields[0], VOLUME/2, mnl->rngrepro);
+    random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro);
     mnl->energy0  = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
     mnl->Qp(mnl->w_fields[1], mnl->w_fields[0]);
@@ -212,7 +212,7 @@ void detratio_heatbath(const int id, hamiltonian_field_t * const hf) {
 			mnl->csg_N, &mnl->csg_n, VOLUME/2);
   }
   else {
-    random_spinor_field(mnl->w_fields[0], VOLUME, mnl->rngrepro);
+    random_spinor_field_lexic(mnl->w_fields[0], mnl->rngrepro);
     mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME, 1);
 
     Q_plus_psi(mnl->w_fields[1], mnl->w_fields[0]);
@@ -271,7 +271,7 @@ double detratio_acc(const int id, hamiltonian_field_t * const hf) {
   g_mu = g_mu1;
   boundary(g_kappa);
   if(g_proc_id == 0 && g_debug_level > 3) {
-    printf("called detratio_acc for id %d %d dH = %1.4e\n", 
+    printf("called detratio_acc for id %d %d dH = %1.10e\n", 
 	   id, mnl->even_odd_flag, mnl->energy1 - mnl->energy0);
   }
   return(mnl->energy1 - mnl->energy0);
diff --git a/monomial/gauge_monomial.c b/monomial/gauge_monomial.c
index e39dbf0b5..862cf8ba9 100644
--- a/monomial/gauge_monomial.c
+++ b/monomial/gauge_monomial.c
@@ -110,7 +110,7 @@ double gauge_acc(const int id, hamiltonian_field_t * const hf) {
     mnl->energy1 += g_beta*(mnl->c1 * measure_rectangles( (const su3**) hf->gaugefield));
     }
   if(g_proc_id == 0 && g_debug_level > 3) {
-    printf("called gauge_acc for id %d %d dH = %1.4e\n", 
+    printf("called gauge_acc for id %d %d dH = %1.10e\n", 
 	   id, mnl->even_odd_flag, mnl->energy0 - mnl->energy1);
   }
   return(mnl->energy0 - mnl->energy1);
diff --git a/monomial/ndpoly_monomial.c b/monomial/ndpoly_monomial.c
index 6aa45af33..b5d98f1cd 100644
--- a/monomial/ndpoly_monomial.c
+++ b/monomial/ndpoly_monomial.c
@@ -170,11 +170,11 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
 
   mnl->energy0 = 0.;
-  random_spinor_field(g_chi_up_spinor_field[0], VOLUME/2, mnl->rngrepro);
+  random_spinor_field_eo(g_chi_up_spinor_field[0], mnl->rngrepro);
   mnl->energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
 
   if(g_epsbar!=0.0 || phmc_exact_poly == 0) {
-    random_spinor_field(g_chi_dn_spinor_field[0], VOLUME/2, mnl->rngrepro);
+    random_spinor_field_eo(g_chi_dn_spinor_field[0], mnl->rngrepro);
     mnl->energy0 += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
   } 
   else {
@@ -407,7 +407,7 @@ double ndpoly_acc(const int id, hamiltonian_field_t * const hf) {
   }
 
   if(g_proc_id == 0 && g_debug_level > 3) {
-    printf("called ndpoly_acc for id %d %d dH = %1.4e\n", id, g_running_phmc, mnl->energy1 - mnl->energy0);
+    printf("called ndpoly_acc for id %d %d dH = %1.10e\n", id, g_running_phmc, mnl->energy1 - mnl->energy0);
   }
   /* END IF PHMC */
   return(mnl->energy1 - mnl->energy0);
@@ -458,7 +458,7 @@ int init_ndpoly_monomial(const int id) {
   /* Here we prepare the less precise MD polynomial first   */
   degree_of_polynomial_nd(&mnl->MDPolyDegree, &mnl->MDPolyCoefs,
 			  mnl->EVMin, mnl->EVMax,
-			  Qsq);
+			  Qsq, mnl->rngrepro);
   phmc_dop_n_cheby = mnl->MDPolyDegree;
   phmc_dop_cheby_coef = mnl->MDPolyCoefs;
   if((g_proc_id == 0) && (g_debug_level > 1)) {
@@ -479,7 +479,7 @@ int init_ndpoly_monomial(const int id) {
   /* Here we prepare the precise polynomial Ptilde */
   degree_of_Ptilde(&mnl->PtildeDegree, &mnl->PtildeCoefs, 
 		   mnl->EVMin, mnl->EVMax, mnl->MDPolyDegree, 
-		   mnl->PrecisionPtilde, Qsq);
+		   mnl->PrecisionPtilde, Qsq, mnl->rngrepro);
   phmc_ptilde_cheby_coef = mnl->PtildeCoefs;
   phmc_ptilde_n_cheby = mnl->PtildeDegree;
 
diff --git a/monomial/poly_monomial.c b/monomial/poly_monomial.c
index 22aaa7b4c..a0b4a5c8a 100644
--- a/monomial/poly_monomial.c
+++ b/monomial/poly_monomial.c
@@ -279,7 +279,7 @@ void poly_heatbath(const int id, hamiltonian_field_t * const hf){
   if(mnl->even_odd_flag) {
 
 
-    random_spinor_field(mnl->w_fields[0], VOLUME/2, mnl->rngrepro);
+    random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro);
     mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
     if(g_proc_id == 0 && g_debug_level > 3) {
diff --git a/monomial/sf_gauge_monomial.c b/monomial/sf_gauge_monomial.c
index 719c59eaf..075f62c49 100644
--- a/monomial/sf_gauge_monomial.c
+++ b/monomial/sf_gauge_monomial.c
@@ -154,7 +154,7 @@ double sf_gauge_acc( const int id, hamiltonian_field_t * const hf)
 
   if( ( g_proc_id == 0 ) & ( g_debug_level > 3 ) )
   {
-    printf( "called sf_gauge_acc for id %d %d dH = %1.4e\n", 
+    printf( "called sf_gauge_acc for id %d %d dH = %1.10e\n", 
 	    id, mnl->even_odd_flag, mnl->energy0 - mnl->energy1 );
   }
 
diff --git a/reweighting_factor.c b/reweighting_factor.c
index cc3738904..c1c1b777b 100644
--- a/reweighting_factor.c
+++ b/reweighting_factor.c
@@ -55,12 +55,15 @@ void reweighting_factor(const int N, const int nstore) {
       mnl = &monomial_list[j];
       if(mnl->type != GAUGE) {
 	if(mnl->even_odd_flag) {
-	  n = VOLUME/2;
+	  random_spinor_field_eo(mnl->pf, mnl->rngrepro);
 	}
-	random_spinor_field(mnl->pf, n, mnl->rngrepro);
+	else random_spinor_field_lexic(mnl->pf, mnl->rngrepro);
 	mnl->energy0 = square_norm(mnl->pf, n, 1);
 	if(mnl->type == NDDETRATIO) {
-	  random_spinor_field(mnl->pf2, n, mnl->rngrepro);
+	  if(mnl->even_odd_flag) {
+	    random_spinor_field_eo(mnl->pf2, mnl->rngrepro);
+	  }
+	  else random_spinor_field_lexic(mnl->pf, mnl->rngrepro);
 	  mnl->energy0 += square_norm(mnl->pf2, n, 1);
 	}
       }
diff --git a/reweighting_factor_nd.c b/reweighting_factor_nd.c
index 48ffc699c..d852efd9a 100644
--- a/reweighting_factor_nd.c
+++ b/reweighting_factor_nd.c
@@ -32,7 +32,7 @@
 #include "phmc.h"
 #include "reweighting_factor_nd.h"
 
-double reweighting_factor_nd(const int N)
+double reweighting_factor_nd(const int N, const int repro)
 {
   int i, n_iter;
   double sq_norm, corr, sum=0., sq_sum = 0., temp1;
@@ -48,8 +48,8 @@ double reweighting_factor_nd(const int N)
 
   for(i = 0; i < N; ++i)
   {
-    random_spinor_field(g_chi_up_spinor_field[2], VOLUME/2, 1);
-    random_spinor_field(g_chi_dn_spinor_field[2], VOLUME/2, 1);
+    random_spinor_field_eo(g_chi_up_spinor_field[2], repro);
+    random_spinor_field_eo(g_chi_dn_spinor_field[2], repro);
     zero_spinor_field(g_chi_up_spinor_field[3], VOLUME/2);
     zero_spinor_field(g_chi_dn_spinor_field[3], VOLUME/2);
 
diff --git a/reweighting_factor_nd.h b/reweighting_factor_nd.h
index b5da162f6..c2b84f487 100644
--- a/reweighting_factor_nd.h
+++ b/reweighting_factor_nd.h
@@ -20,6 +20,6 @@
 #ifndef _REWEIGHTING_FACTOR_ND_H
 #define _REWEIGHTING_FACTOR_ND_H
 
-double reweighting_factor_nd(const int N);
+double reweighting_factor_nd(const int N, const int repro);
 
 #endif
diff --git a/solver/dfl_projector.c b/solver/dfl_projector.c
index cbdad2e32..0a3a66ca0 100644
--- a/solver/dfl_projector.c
+++ b/solver/dfl_projector.c
@@ -485,7 +485,7 @@ void little_D_P_R(_Complex double * const out, _Complex double * const in) {
 }
 
 
-int check_projectors() {
+int check_projectors(const int repro) {
   double nrm = 0.;
   int i,j;
   spinor **phi;
@@ -498,7 +498,7 @@ int check_projectors() {
   phi = malloc(nb_blocks*sizeof(spinor *));
   wphi = malloc(nb_blocks*sizeof(spinor *));
 
-  random_spinor_field(work_fields[0], VOLUME, 1);
+  random_spinor_field_lexic(work_fields[0], repro);
   nrm = square_norm(work_fields[0], VOLUME, 1);
   if(g_cart_id == 0) {
     printf("\nNow we check the DFL projection routines!\n\n");
@@ -806,7 +806,7 @@ int check_projectors() {
   return(0);
 }
 
-void check_little_D_inversion() {
+void check_little_D_inversion(const int repro) {
   int i,j,ctr_t;
   int contig_block = LZ / nb_blocks;
   int vol = block_list[0].volume;
@@ -816,7 +816,7 @@ void check_little_D_inversion() {
   const int nr_wf = 1;
 
   init_solver_field(&work_fields, VOLUMEPLUSRAND, nr_wf);
-  random_spinor_field(work_fields[0], VOLUME, 1);
+  random_spinor_field_lexic(work_fields[0], repro);
   if(init_dfl_projector == 0) {
     alloc_dfl_projector();
   }
@@ -895,7 +895,7 @@ void check_little_D_inversion() {
   return;
 }
 
-void check_local_D()
+void check_local_D(const int repro)
 {
   spinor * r[8];
   int j, vol = block_list[0].volume/2, i;
@@ -950,7 +950,7 @@ void check_local_D()
     }
   }
   /* check Msap and Msap_eo on a radom vector */
-  random_spinor_field(work_fields[0], VOLUME, 1);
+  random_spinor_field_lexic(work_fields[0], repro);
   zero_spinor_field(work_fields[1], VOLUME);
   Msap(work_fields[1], work_fields[0], 2);
   D_psi(work_fields[2], work_fields[1]);
diff --git a/solver/dfl_projector.h b/solver/dfl_projector.h
index 787d4ec42..ae4839a6f 100644
--- a/solver/dfl_projector.h
+++ b/solver/dfl_projector.h
@@ -26,9 +26,9 @@ void project_left(spinor * const out, spinor * const in);
 void project_right(spinor * const out, spinor * const in);
 void project_left_D(spinor * const out, spinor * const in);
 void D_project_right(spinor * const out, spinor * const in);
-int check_projectors();
-void check_little_D_inversion();
-void check_local_D();
+int check_projectors(const int repro);
+void check_little_D_inversion(const int repro);
+void check_local_D(const int repro);
 void free_dfl_projector();
 
 void little_project(_Complex double * const out, _Complex double * const in, const int  N);
diff --git a/solver/mode_number.c b/solver/mode_number.c
index 5c40642bc..d5f2a49bf 100644
--- a/solver/mode_number.c
+++ b/solver/mode_number.c
@@ -266,7 +266,7 @@ void X_over_sqrt_X_sqr(spinor * const R, double * const c,
 }
 
 
-void Check_Approximation(double const mstar) {
+void Check_Approximation(double const mstar, const int repro) {
 
   if(g_proc_id == 0) {
   printf("Checking the approximation of X/sqrt(X^2) in the mode number: \n");
@@ -305,7 +305,7 @@ void Check_Approximation(double const mstar) {
   Sin   =calloc(VOLUMEPLUSRAND, sizeof(spinor));
 #endif
 
-  random_spinor_field(Sin, VOLUME, 1);
+  random_spinor_field_lexic(Sin, repro);
 
   s_ = calloc(4*VOLUMEPLUSRAND+1, sizeof(spinor));
   s  = calloc(4, sizeof(spinor*));
diff --git a/start.c b/start.c
index 7ceaf578d..0777da746 100644
--- a/start.c
+++ b/start.c
@@ -179,21 +179,17 @@ su3_vector unif_su3_vector(void)
 }
 
 
-spinor random_spinor(void)
-{
-   spinor s;
-
-   s.s0=random_su3_vector();
-   s.s1=random_su3_vector();
-   s.s2=random_su3_vector();
-   s.s3=random_su3_vector();
-
-   _vector_mul(s.s0,0.5,s.s0);
-   _vector_mul(s.s1,0.5,s.s1);
-   _vector_mul(s.s2,0.5,s.s2);
-   _vector_mul(s.s3,0.5,s.s3);
-
-   return(s);
+void random_spinor(spinor * const s) {
+   s->s0 = random_su3_vector();
+   s->s1 = random_su3_vector();
+   s->s2 = random_su3_vector();
+   s->s3 = random_su3_vector();
+
+   _vector_mul(s->s0, 0.5, s->s0);
+   _vector_mul(s->s1, 0.5, s->s1);
+   _vector_mul(s->s2, 0.5, s->s2);
+   _vector_mul(s->s3, 0.5, s->s3);
+   return;
 }
 
 spinor unit_spinor()
@@ -219,9 +215,9 @@ void unit_spinor_field(const int k)
   }
 }
 
-/* Function provides a spinor field of length V with
+/* Function provides a spinor field of length VOLUME with
    Gaussian distribution */
-void random_spinor_field_lexic(spinor * const k) {
+void random_spinor_field_lexic(spinor * const k, const int repro) {
   int x, y, z, t, X, Y, Z, tt, id=0;
 #ifdef MPI
   int rlxd_state[105];
@@ -230,47 +226,56 @@ void random_spinor_field_lexic(spinor * const k) {
   spinor *s;
   double v[24];
 
+  if(repro) {
 #ifdef MPI
-  if(g_proc_id == 0) {
-    rlxd_get(rlxd_state);
-  }
-  MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
-  if(g_proc_id != 0) {
-    rlxd_reset(rlxd_state);
-  }
+    if(g_proc_id == 0) {
+      rlxd_get(rlxd_state);
+    }
+    MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
+    if(g_proc_id != 0) {
+      rlxd_reset(rlxd_state);
+    }
 #endif
-  for(t = 0; t < g_nproc_t*T; t++) {
-    tt = t - g_proc_coords[0]*T;
-    coords[0] = t / T;
-    for(x = 0; x < g_nproc_x*LX; x++) {
-      X = x - g_proc_coords[1]*LX; 
-      coords[1] = x / LX;
-      for(y = 0; y < g_nproc_y*LY; y++) {
-	Y = y - g_proc_coords[2]*LY;
-	coords[2] = y / LY;
-	for(z = 0; z < g_nproc_z*LZ; z++) {
-	  Z = z - g_proc_coords[3]*LZ;
-	  coords[3] = z / LZ;
+    for(t = 0; t < g_nproc_t*T; t++) {
+      tt = t - g_proc_coords[0]*T;
+      coords[0] = t / T;
+      for(x = 0; x < g_nproc_x*LX; x++) {
+	X = x - g_proc_coords[1]*LX; 
+	coords[1] = x / LX;
+	for(y = 0; y < g_nproc_y*LY; y++) {
+	  Y = y - g_proc_coords[2]*LY;
+	  coords[2] = y / LY;
+	  for(z = 0; z < g_nproc_z*LZ; z++) {
+	    Z = z - g_proc_coords[3]*LZ;
+	    coords[3] = z / LZ;
 #ifdef MPI
-	  MPI_Cart_rank(g_cart_grid, coords, &id);
+	    MPI_Cart_rank(g_cart_grid, coords, &id);
 #endif
-	  if(g_cart_id == id) {
-	    gauss_vector(v, 24);
-	    s = k + g_ipt[tt][X][Y][Z];
-	    memcpy(s, v, 24*sizeof(double));
-	  }
-	  else {
-	    ranlxd(v,24);
+	    if(g_cart_id == id) {
+	      gauss_vector(v, 24);
+	      s = k + g_ipt[tt][X][Y][Z];
+	      memcpy(s, v, 24*sizeof(double));
+	    }
+	    else {
+	      ranlxd(v,24);
+	    }
 	  }
 	}
       }
     }
   }
+  else {
+    for(x = 0; x < VOLUME; x++) {
+      gauss_vector(v, 24);
+      s = k + x;
+      memcpy(s, v, 24*sizeof(double));
+    }
+  }
   return;
 }
 
-void random_spinor_field_eo(spinor * const k) {
-  int x, y, z, t, id = 0;
+void random_spinor_field_eo(spinor * const k, const int repro) {
+  int x, X, y, Y, z, Z, t, t0, id = 0;
 #ifdef MPI
   int rlxd_state[105];
 #endif
@@ -278,117 +283,120 @@ void random_spinor_field_eo(spinor * const k) {
   spinor *s;
   double v[24];
 
+  if(repro) {
 #ifdef MPI
-  if(g_proc_id == 0) {
-    rlxd_get(rlxd_state);
-  }
-  MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
-  if(g_proc_id != 0) {
-    rlxd_reset(rlxd_state);
-  }
+    if(g_proc_id == 0) {
+      rlxd_get(rlxd_state);
+    }
+    MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
+    if(g_proc_id != 0) {
+      rlxd_reset(rlxd_state);
+    }
 #endif
-  for(t = 0; t < g_nproc_t*T; t++) {
-    coords[0] = t / T;
-    for(x = 0; x < g_nproc_x*LX; x++) {
-      coords[1] = x / LX;
-      for(y = 0; y < g_nproc_y*LY; y++) {
-	coords[2] = y / LY;
-	for(z = 0; z < g_nproc_z*LZ; z++) {
-	  coords[3] = z / LZ;
+    for(t0 = 0; t0 < g_nproc_t*T; t0++) {
+      coords[0] = t0 / T;
+      t = t0 - T*g_proc_coords[0];
+      for(x = 0; x < g_nproc_x*LX; x++) {
+	coords[1] = x / LX;
+	X = x - g_proc_coords[1]*LX;
+	for(y = 0; y < g_nproc_y*LY; y++) {
+	  coords[2] = y / LY;
+	  Y = y - g_proc_coords[2]*LY;
+	  for(z = 0; z < g_nproc_z*LZ; z++) {
+	    coords[3] = z / LZ;
+	    Z = z - g_proc_coords[3]*LZ;
 #ifdef MPI
-	  MPI_Cart_rank(g_cart_grid, coords, &id);
+	    MPI_Cart_rank(g_cart_grid, coords, &id);
 #endif
-	  gauss_vector(v, 24);
-	  if(g_cart_id == id) {
-	    s = k + g_ipt[t][x][y][z];
-	    memcpy(s, v, 24*sizeof(double));
+	    if((t0+x+y+z)%2 == 0) {
+	      gauss_vector(v, 24);
+	      if(g_cart_id == id) {
+		s = k + g_lexic2eosub[ g_ipt[t][X][Y][Z] ];
+		memcpy(s, v, 24*sizeof(double));
+	      }
+	    }
 	  }
 	}
       }
     }
   }
+  else {
+    for (x = 0; x < VOLUME/2; x++) {
+      s = k + x;
+      gauss_vector(v, 24);
+      memcpy(s, v, 24*sizeof(double));
+    }
+  }
   return;
 }
 
 void random_spinor_field(spinor * const k, const int V, const int repro) {
 
-  int ix;
+  int ix, t0, t, x, X, y, Y, z, Z, id = 0;
+  int coords[4];
+#ifdef MPI
   int rlxd_state[105];
+#endif
   spinor *s;
   double v[6];
+  if(repro) {
 #ifdef MPI
-  int j=0;
-#endif
-
-  if(g_proc_id==0 && repro == 1) {
-    for (ix = 0; ix < V; ix++) {
-      s = k + ix;
-      gauss_vector(v,6);
-      s->s0.c0 = v[0] + v[1] * I;
-      s->s0.c1 = v[2] + v[3] * I;
-      s->s0.c2 = v[4] + v[5] * I;
-      gauss_vector(v,6);
-      s->s1.c0 = v[0] + v[1] * I;
-      s->s1.c1 = v[2] + v[3] * I;
-      s->s1.c2 = v[4] + v[5] * I;
-      gauss_vector(v,6);
-      s->s2.c0 = v[0] + v[1] * I;
-      s->s2.c1 = v[2] + v[3] * I;
-      s->s2.c2 = v[4] + v[5] * I;
-      gauss_vector(v,6);
-      s->s3.c0 = v[0] + v[1] * I;
-      s->s3.c1 = v[2] + v[3] * I;
-      s->s3.c2 = v[4] + v[5] * I;
-    }
-    /* send the state for the random-number generator to 1 */
-    rlxd_get(rlxd_state);
-#ifdef MPI
-    if(g_nproc > 1) {
-      MPI_Send(&rlxd_state[0], 105, MPI_INT, 1, 102, MPI_COMM_WORLD);
+    if(g_proc_id == 0) {
+      rlxd_get(rlxd_state);
     }
+    MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
+    rlxd_reset(rlxd_state);
 #endif
-  }
+    for(t0 = 0; t0 < g_nproc_t*T; t0++) {
+      t = t0 - T*g_proc_coords[0];
+      coords[0] = t0 / T;
+      for(x = 0; x < g_nproc_x*LX; x++) {
+	X = x - g_proc_coords[1]*LX;
+	coords[1] = x / LX;
+	for(y = 0; y < g_nproc_y*LY; y++) {
+	  Y = y - g_proc_coords[2]*LY;
+	  coords[2] = y / LY;
+	  for(z = 0; z < g_nproc_z*LZ; z++) {
+	    Z = z - g_proc_coords[3]*LZ;
+	    coords[3] = z / LZ;
 #ifdef MPI
-  if(g_proc_id != 0 && repro == 1) {
-    MPI_Recv(&rlxd_state[0], 105, MPI_INT, g_proc_id-1, 102, MPI_COMM_WORLD, &status);
-    rlxd_reset(rlxd_state);
-    for (ix=0;ix<V;ix++) {
-      s = k + ix;
-      gauss_vector(v,6);
-      s->s0.c0 = v[0] + v[1] * I;
-      s->s0.c1 = v[2] + v[3] * I;
-      s->s0.c2 = v[4] + v[5] * I;
-      gauss_vector(v,6);
-      s->s1.c0 = v[0] + v[1] * I;
-      s->s1.c1 = v[2] + v[3] * I;
-      s->s1.c2 = v[4] + v[5] * I;
-      gauss_vector(v,6);
-      s->s2.c0 = v[0] + v[1] * I;
-      s->s2.c1 = v[2] + v[3] * I;
-      s->s2.c2 = v[4] + v[5] * I;
-      gauss_vector(v,6);
-      s->s3.c0 = v[0] + v[1] * I;
-      s->s3.c1 = v[2] + v[3] * I;
-      s->s3.c2 = v[4] + v[5] * I;
-    }
-    /* send the state fo the random-number generator to k+1 */
-    
-    j=g_proc_id+1;
-    if(j==g_nproc){
-      j=0;
+	    MPI_Cart_rank(g_cart_grid, coords, &id);
+#endif
+	    if(g_cart_id == id) ix = g_lexic2eosub[ g_ipt[t][X][Y][Z] ];
+	    gauss_vector(v, 6);
+	    if(g_cart_id == id) {
+	      s = k + ix;
+	      s->s0.c0 = v[0] + v[1] * I;
+	      s->s0.c1 = v[2] + v[3] * I;
+	      s->s0.c2 = v[4] + v[5] * I;
+	    }
+	    gauss_vector(v,6);
+	    if(g_cart_id == id) {
+	      s->s1.c0 = v[0] + v[1] * I;
+	      s->s1.c1 = v[2] + v[3] * I;
+	      s->s1.c2 = v[4] + v[5] * I;
+	    }
+	    gauss_vector(v,6);
+	    if(g_cart_id == id) {
+	      s->s2.c0 = v[0] + v[1] * I;
+	      s->s2.c1 = v[2] + v[3] * I;
+	      s->s2.c2 = v[4] + v[5] * I;
+	    }
+	    gauss_vector(v,6);
+	    if(g_cart_id == id) {
+	      s->s3.c0 = v[0] + v[1] * I;
+	      s->s3.c1 = v[2] + v[3] * I;
+	      s->s3.c2 = v[4] + v[5] * I;
+	    }
+	  }
+	}
+      }
     }
-    rlxd_get(rlxd_state);
-    MPI_Send(&rlxd_state[0], 105, MPI_INT, j, 102, MPI_COMM_WORLD);
-  }
-  if(g_nproc > 1 && g_proc_id==0 && repro == 1) {
-    MPI_Recv(&rlxd_state[0], 105, MPI_INT, g_nproc-1, 102, MPI_COMM_WORLD, &status);
-    rlxd_reset(rlxd_state);
   }
-#endif
-  if(repro != 1) {
+  else {
     for (ix = 0; ix < V; ix++) {
       s = k + ix;
-      gauss_vector(v,6);
+      gauss_vector(v, 6);
       s->s0.c0 = v[0] + v[1] * I;
       s->s0.c1 = v[2] + v[3] * I;
       s->s0.c2 = v[4] + v[5] * I;
@@ -406,9 +414,10 @@ void random_spinor_field(spinor * const k, const int V, const int repro) {
       s->s3.c2 = v[4] + v[5] * I;
     }
   }
+  return;
 }
 
-/* Function provides a zero spinor field of length N with */
+/* Function provides a z2 random spinor field of length N with */
 void z2_random_spinor_field(spinor * const k, const int N) {
 
   int ix;
@@ -469,15 +478,12 @@ void constant_spinor_field(spinor * const k, const int p, const int N)
 }
 
 
-su3 random_su3(void)
-{
+void random_su3(su3 * const u) {
    double norm,fact;
    _Complex double z;
    su3_vector z1,z2,z3;
-   su3 u;
 
    z1=unif_su3_vector();
-
    for (;;)
    {
       z2=unif_su3_vector();
@@ -499,19 +505,18 @@ su3 random_su3(void)
    z3.c1 = conj((z1.c2 * z2.c0) - (z1.c0 * z2.c2));
    z3.c2 = conj((z1.c0 * z2.c1) - (z1.c1 * z2.c0));
 
-   u.c00=z1.c0;
-   u.c01=z1.c1;
-   u.c02=z1.c2;
+   u->c00 = z1.c0;
+   u->c01 = z1.c1;
+   u->c02 = z1.c2;
 
-   u.c10=z2.c0;
-   u.c11=z2.c1;
-   u.c12=z2.c2;
+   u->c10 = z2.c0;
+   u->c11 = z2.c1;
+   u->c12 = z2.c2;
 
-   u.c20=z3.c0;
-   u.c21=z3.c1;
-   u.c22=z3.c2;
-
-   return(u);
+   u->c20 = z3.c0;
+   u->c21 = z3.c1;
+   u->c22 = z3.c2;
+   return;
 }
 
 
@@ -533,35 +538,58 @@ void unit_g_gauge_field(void)
 
 void random_gauge_field(const int repro) {
 
-  int ix,mu;
+  int ix, mu, t0, t, x, X, y, Y, z, Z, id;
+  int coords[4];
+  su3 ALIGN tmp;
 #ifdef MPI
   int rlxd_state[105];
-  int j=0;
+#endif
 
-  if(g_proc_id !=0 && repro == 1) {
-    MPI_Recv(&rlxd_state[0], 105, MPI_INT, g_proc_id-1, 102, MPI_COMM_WORLD, &status);
+  if(repro) {
+#ifdef MPI
+    if(g_proc_id == 0) {
+      rlxd_get(rlxd_state);
+    }
+    MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
     rlxd_reset(rlxd_state);
-  }
 #endif
-
-  for (ix = 0; ix < VOLUME; ix++) {
-    for (mu = 0; mu < 4; mu++) {
-      g_gauge_field[ix][mu] = random_su3();
+    for(t0 = 0; t0 < g_nproc_t*T; t0++) {
+      t = t0 - T*g_proc_coords[0];
+      coords[0] = t0 / T;
+      for(x = 0; x < g_nproc_x*LX; x++) {
+	X = x - g_proc_coords[1]*LX;
+	coords[1] = x / LX;
+	for(y = 0; y < g_nproc_y*LY; y++) {
+	  Y = y - g_proc_coords[2]*LY;
+	  coords[2] = y / LY;
+	  for(z = 0; z < g_nproc_z*LZ; z++) {
+	    Z = z - g_proc_coords[3]*LZ;
+	    coords[3] = z / LZ;
+#ifdef MPI
+	    MPI_Cart_rank(g_cart_grid, coords, &id);
+#endif
+	    for(mu = 0; mu < 4; mu++) {
+	      if(g_cart_id == id) {
+		ix = g_ipt[t][X][Y][Z];
+		random_su3(&g_gauge_field[ix][mu]);
+	      }
+	      else {
+		random_su3(&tmp);
+	      }
+	    }
+	  }
+	}
+      }
     }
   }
-
-#ifdef MPI
-  if(repro == 1) {
-    j = (g_proc_id + 1) % g_nproc;
-    rlxd_get(rlxd_state);
-    MPI_Send(&rlxd_state[0], 105, MPI_INT, j, 102, MPI_COMM_WORLD);
-    
-    if(g_proc_id == 0) {
-      MPI_Recv(&rlxd_state[0], 105, MPI_INT, g_nproc-1, 102, MPI_COMM_WORLD, &status);
-      rlxd_reset(rlxd_state);
+  else {
+    for (ix = 0; ix < VOLUME; ix++) {
+      for (mu = 0; mu < 4; mu++) {
+	random_su3(&g_gauge_field[ix][mu]);
+      }
     }
   }
-#endif
+
   g_update_gauge_copy = 1;
   g_update_gauge_energy = 1;
   g_update_rectangle_energy = 1;
diff --git a/start.h b/start.h
index eab92fef3..07b1a8f80 100644
--- a/start.h
+++ b/start.h
@@ -23,15 +23,16 @@
 void gauss_vector(double v[],int n);
 su3_vector random_su3_vector(void);
 su3_vector unif_su3_vector(void);
-spinor random_spinor(void);
+void random_spinor(spinor * const s);
 void unit_spinor_field(const int k);
 
-void random_spinor_field_lexic(spinor * const k);
+void random_spinor_field_lexic(spinor * const k, const int repro);
 void random_spinor_field(spinor * const k, const int V, const int repro);
+void random_spinor_field_eo(spinor * const k, const int repro);
 void z2_random_spinor_field(spinor * const k, const int N);
 void zero_spinor_field(spinor * const k, const int N);
 void constant_spinor_field(spinor * const k, const int p, const int N);
-su3 random_su3(void);
+void random_su3(su3 * const u);
 void unit_g_gauge_field(void);
 void random_gauge_field(const int repro);
 void set_spinor_field(int k, const double c);
diff --git a/update_tm.c b/update_tm.c
index f742a9412..0c4b001b8 100644
--- a/update_tm.c
+++ b/update_tm.c
@@ -162,6 +162,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy,
       new_rectangle_energy = measure_rectangles( (const su3**) hf.gaugefield);
     }
   }
+  if(g_proc_id == 0 && g_debug_level > 3) printf("called moment_energy: dh = %1.10e\n", (enepx - enep));
   /* Compute the energy difference */
   dh = dh + (enepx - enep);
   if(g_proc_id == 0 && g_debug_level > 3) {
@@ -170,8 +171,8 @@ int update_tm(double *plaquette_energy, double *rectangle_energy,
   expmdh = exp(-dh);
   /* the random number is only taken at node zero and then distributed to 
      the other sites */
+  ranlxd(yy,1);
   if(g_proc_id==0) {
-    ranlxd(yy,1);
 #ifdef MPI
     for(i = 1; i < g_nproc; i++) {
       MPI_Send(&yy[0], 1, MPI_DOUBLE, i, 31, MPI_COMM_WORLD);

From cacefd31169cf122e2c3f190e3d942f496bf3e7b Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 25 Nov 2012 15:14:46 +0100
Subject: [PATCH 091/110] one more correction for random_spinor_field in
 generate_dfl_subspace

---
 hybrid_update.c                | 2 --
 invert.c                       | 2 +-
 solver/generate_dfl_subspace.c | 2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/hybrid_update.c b/hybrid_update.c
index ac4eb5c3a..aebff8375 100644
--- a/hybrid_update.c
+++ b/hybrid_update.c
@@ -169,8 +169,6 @@ double init_momenta(const int repro, su3adj ** const momenta) {
     kc=0.5*(ks+kc);
   }
   else {
-    kc=0.; 
-    ks=0.;
     for(i = 0; i < VOLUME; i++) { 
       for(mu = 0; mu < 4; mu++) {
 	sum=0.;
diff --git a/invert.c b/invert.c
index 7d9fc19a9..22164a7e9 100644
--- a/invert.c
+++ b/invert.c
@@ -443,7 +443,7 @@ int main(int argc, char *argv[])
 
       /*       g_mu = 0.; */
       /*       boundary(0.125); */
-      generate_dfl_subspace(g_N_s, VOLUME);
+      generate_dfl_subspace(g_N_s, reproduce_randomnumber_flag);
       /*       boundary(g_kappa); */
       /*       g_mu = g_mu1; */
 
diff --git a/solver/generate_dfl_subspace.c b/solver/generate_dfl_subspace.c
index a65b4ab94..bdebbe688 100644
--- a/solver/generate_dfl_subspace.c
+++ b/solver/generate_dfl_subspace.c
@@ -117,7 +117,7 @@ int generate_dfl_subspace(const int Ns, const int N) {
   random_fields(Ns);
   if(g_debug_level > 4) {
     for(e = 0.; e < 1.; e=e+0.05) {
-      random_spinor_field(dfl_fields[0], N, 0);
+      random_spinor_field_lexic(dfl_fields[0], repro);
       nrm = sqrt(square_norm(dfl_fields[0], N, 1));
       mul_r(dfl_fields[0], 1./nrm, dfl_fields[0], N);
       d = 1.1;

From eb0b878bcb2b6661e29867e8648d6e171fab5d2d Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Mon, 26 Nov 2012 11:47:36 +0100
Subject: [PATCH 092/110] bug fixed

---
 invert.c                       | 2 +-
 solver/generate_dfl_subspace.c | 2 +-
 solver/generate_dfl_subspace.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/invert.c b/invert.c
index 22164a7e9..43311ba1b 100644
--- a/invert.c
+++ b/invert.c
@@ -443,7 +443,7 @@ int main(int argc, char *argv[])
 
       /*       g_mu = 0.; */
       /*       boundary(0.125); */
-      generate_dfl_subspace(g_N_s, reproduce_randomnumber_flag);
+      generate_dfl_subspace(g_N_s, VOLUME, reproduce_randomnumber_flag);
       /*       boundary(g_kappa); */
       /*       g_mu = g_mu1; */
 
diff --git a/solver/generate_dfl_subspace.c b/solver/generate_dfl_subspace.c
index bdebbe688..9ac37c58e 100644
--- a/solver/generate_dfl_subspace.c
+++ b/solver/generate_dfl_subspace.c
@@ -85,7 +85,7 @@ static void random_fields(const int Ns) {
   return;
 }
 
-int generate_dfl_subspace(const int Ns, const int N) {
+int generate_dfl_subspace(const int Ns, const int N, const int repro) {
   int ix, i_o,i, j, k, p, blk, vpr = VOLUMEPLUSRAND*sizeof(spinor)/sizeof(_Complex double),
     vol = VOLUME*sizeof(spinor)/sizeof(_Complex double);
   spinor **psi;
diff --git a/solver/generate_dfl_subspace.h b/solver/generate_dfl_subspace.h
index 9f88f7174..dc5848539 100644
--- a/solver/generate_dfl_subspace.h
+++ b/solver/generate_dfl_subspace.h
@@ -24,7 +24,7 @@
 
 int init_dfl_subspace(const int);
 int free_dfl_subspace();
-int generate_dfl_subspace(const int Ns, const int N);
+int generate_dfl_subspace(const int Ns, const int N, const int repro);
 int generate_dfl_subspace_free(const int Ns, const int N);
 
 extern spinor ** dfl_fields;

From 83e1154fd568067cb359ef561a8ef56935d072ee Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 30 Nov 2012 14:42:27 +0100
Subject: [PATCH 093/110] bug fixed in start_ranlux, local seed was computed
 wrongly

---
 start.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/start.c b/start.c
index 0777da746..cea700676 100644
--- a/start.c
+++ b/start.c
@@ -819,13 +819,15 @@ void start_ranlux(int level, int seed)
 {
    unsigned int max_seed,loc_seed;
    unsigned int step = g_proc_coords[0]*g_nproc_x*g_nproc_y*g_nproc_z +
-     g_nproc_y*g_proc_coords[1]*g_nproc_y*g_nproc_z +
+     g_proc_coords[1]*g_nproc_y*g_nproc_z +
      g_proc_coords[2]*g_nproc_z + g_proc_coords[3];
 
    max_seed = 2147483647 / g_nproc;
    loc_seed = (seed + step*max_seed) % 2147483647;
 
    if(loc_seed == 0) loc_seed++;
+ 
+   printf("Local seed is %d  proc_id = %d\n", loc_seed, g_proc_id);
 
    rlxs_init(level-1,loc_seed);
    rlxd_init(level,loc_seed);

From 42a900161edb11d51788f1b13955371fadaad7d7 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 30 Nov 2012 14:49:37 +0100
Subject: [PATCH 094/110] output of seed for debug_level > 3

---
 start.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/start.c b/start.c
index cea700676..0bbfcb72a 100644
--- a/start.c
+++ b/start.c
@@ -827,10 +827,12 @@ void start_ranlux(int level, int seed)
 
    if(loc_seed == 0) loc_seed++;
  
-   printf("Local seed is %d  proc_id = %d\n", loc_seed, g_proc_id);
+   if(g_debug_level > 3) {
+     printf("Local seed is %d  proc_id = %d\n", loc_seed, g_proc_id);
+   }
 
-   rlxs_init(level-1,loc_seed);
-   rlxd_init(level,loc_seed);
+   rlxs_init(level-1, loc_seed);
+   rlxd_init(level, loc_seed);
 }
 
 void gen_test_spinor_field(spinor * const k, const int eoflag) {

From e5067811a28622a328a6386f92a4e4f76a4c618a Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 30 Nov 2012 17:44:59 +0100
Subject: [PATCH 095/110] comments updated to remind that _INDEX_INDEP_GEOM
 does not work with clover yet

---
 xchange/xchange_deri.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/xchange/xchange_deri.c b/xchange/xchange_deri.c
index 807f80f18..5d8a0b9a9 100644
--- a/xchange/xchange_deri.c
+++ b/xchange/xchange_deri.c
@@ -87,7 +87,8 @@ void xchange_deri(su3adj ** const df)
     }
   }
 
-  /* send the data to the neighbour on the right is not needed*/
+  /* send the data to the neighbour on the right is needed for the */
+  /* clover case, so this needs fixing here! */
 #    endif /* (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT ) */
 #    if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || defined PARALLELXY || defined PARALLELXYZ )
 
@@ -114,7 +115,8 @@ void xchange_deri(su3adj ** const df)
       }
     }
   }
-  /* send the data to the neighbour on the right is not needed*/  
+  /* send the data to the neighbour on the right is needed for the */
+  /* clover case, so this needs fixing here! */
 #    endif /* (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || defined PARALLELXY || defined PARALLELXYZ ) */
 
 #    if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ )
@@ -143,7 +145,8 @@ void xchange_deri(su3adj ** const df)
       }
     }
   }
-  /* send the data to the neighbour on the right is not needed*/  
+  /* send the data to the neighbour on the right is needed for the */
+  /* clover case, so this needs fixing here! */
 #    endif /* (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ ) */
 
 #    if (defined PARALLELXYZT || defined PARALLELXYZ )
@@ -172,7 +175,8 @@ void xchange_deri(su3adj ** const df)
       }
     }
   }
-  /* send the data to the neighbour on the right is not needed*/  
+  /* send the data to the neighbour on the right is needed for the */
+  /* clover case, so this needs fixing here! */
 #    endif /* (defined PARALLELXYZT || defined PARALLELXYZ ) */
 #  endif /* MPI */
   return;

From 886d78c2a2275395312b374ea8a0d99023b389b4 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Fri, 30 Nov 2012 17:45:50 +0100
Subject: [PATCH 096/110] consistently set reproduce_random_number flag for ALL
 monomials now and default to repro=1 for the time being

---
 default_input_values.h | 2 +-
 monomial/monomial.c    | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/default_input_values.h b/default_input_values.h
index 848c7c9c3..b8eaddd48 100644
--- a/default_input_values.h
+++ b/default_input_values.h
@@ -103,7 +103,7 @@
 #define _default_gauge_precision_write_flag 64
 #define _default_g_disable_IO_checks 0
 #define _default_prop_precision_flag 32
-#define _default_reproduce_randomnumber_flag 0
+#define _default_reproduce_randomnumber_flag 1
 #define _default_g_sloppy_precision_flag 0
 #define _default_stout_rho 0.1
 #define _default_rho 0.
diff --git a/monomial/monomial.c b/monomial/monomial.c
index 1bc98bdf3..d2da2b501 100644
--- a/monomial/monomial.c
+++ b/monomial/monomial.c
@@ -163,11 +163,11 @@ int init_monomials(const int V, const int even_odd_flag) {
 
   no = 0;
   for(int i = 0; i < no_monomials; i++) {
+    monomial_list[i].rngrepro = reproduce_randomnumber_flag;
     if((monomial_list[i].type != GAUGE) && (monomial_list[i].type != SFGAUGE)) {
       monomial_list[i].w_fields = w_fields;
       monomial_list[i].pf = __pf+no*V;
       no++;
-      monomial_list[i].rngrepro = reproduce_randomnumber_flag;
 
       if(monomial_list[i].type == DET) {
 	monomial_list[i].hbfunction = &det_heatbath;
@@ -319,6 +319,7 @@ int init_monomials(const int V, const int even_odd_flag) {
       add_monomial(CLOVERTRLOG);
       monomial_list[no_monomials-1].pf = NULL;
       monomial_list[no_monomials-1].id = no_monomials-1;
+      monomial_list[no_monomials-1].rngrepro = reproduce_randomnumber_flag;
       // set the parameters according to cloverdet monomial
       // this need alltogether a more general approach
       monomial_list[no_monomials-1].c_sw = monomial_list[clover_monomials[j]].c_sw;
@@ -339,6 +340,7 @@ int init_monomials(const int V, const int even_odd_flag) {
       add_monomial(CLOVERNDTRLOG);
       monomial_list[no_monomials-1].pf = NULL;
       monomial_list[no_monomials-1].id = no_monomials-1;
+      monomial_list[no_monomials-1].rngrepro = reproduce_randomnumber_flag;
       // set the parameters according to cloverdet monomial
       // this need alltogether a more general approach
       monomial_list[no_monomials-1].c_sw = monomial_list[clovernd_monomials[j]].c_sw;

From 7a91e9648be6ceacfabba4cd850e61527a5536a5 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 4 Dec 2012 10:10:18 +0100
Subject: [PATCH 097/110] dumps a derivative field to stdout

---
 io/deri_write_stdout.c | 69 ++++++++++++++++++++++++++++++++++++++++++
 io/deri_write_stdout.h | 27 +++++++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 io/deri_write_stdout.c
 create mode 100644 io/deri_write_stdout.h

diff --git a/io/deri_write_stdout.c b/io/deri_write_stdout.c
new file mode 100644
index 000000000..2bd986742
--- /dev/null
+++ b/io/deri_write_stdout.c
@@ -0,0 +1,69 @@
+/***********************************************************************
+* Copyright (C) 2012 Carsten Urbach
+*
+* This file is part of tmLQCD.
+*
+* tmLQCD is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* tmLQCD is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include "global.h"
+#ifdef MPI
+# include <mpi.h>
+#endif
+#include "su3adj.h"
+#include "io/deri_write_stdout.h"
+
+void deri_write_stdout(su3adj** const df) {
+  int X, Y, Z, t0, id = 0, ix;
+  int coords[4];
+
+  for(int t = 0; t < g_nproc_t*T; t++) {
+    t0 = t - g_proc_coords[0]*T;
+    coords[0] = t / T;
+    for(int x = 0; x < g_nproc_x*LX; x++) {
+      X = x - g_proc_coords[1]*LX; 
+      coords[1] = x / LX;
+      for(int y = 0; y < g_nproc_y*LY; y++) {
+	Y = y - g_proc_coords[2]*LY;
+	coords[2] = y / LY;
+	for(int z = 0; z < g_nproc_z*LZ; z++) {
+	  Z = z - g_proc_coords[3]*LZ;
+	  coords[3] = z / LZ;
+#ifdef MPI
+	  MPI_Cart_rank(g_cart_grid, coords, &id);
+#endif
+	  if(g_cart_id == id) {
+	    ix = g_ipt[t0][X][Y][Z];
+	    printf("%d %d %d %d, %d %d %d %d: ", t, x, y, z, t0, X, Y, Z);
+	    for(int mu = 0; mu < 4; mu++) {
+	      printf("%d %e %e %e %e %e %e %e %e\n", mu, df[ix][mu].d1, df[ix][mu].d2, 
+		     df[ix][mu].d3, df[ix][mu].d4, df[ix][mu].d5, df[ix][mu].d6, 
+		     df[ix][mu].d7, df[ix][mu].d8);
+	    }
+	  }
+	  fflush(stdout);
+#ifdef MPI
+	  MPI_Barrier(MPI_COMM_WORLD);
+#endif
+	}
+      }
+    }
+  }
+  return;
+}
diff --git a/io/deri_write_stdout.h b/io/deri_write_stdout.h
new file mode 100644
index 000000000..eb83f5b63
--- /dev/null
+++ b/io/deri_write_stdout.h
@@ -0,0 +1,27 @@
+/***********************************************************************
+* Copyright (C) 2012 Carsten Urbach
+*
+* This file is part of tmLQCD.
+*
+* tmLQCD is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* tmLQCD is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+
+#ifndef _DERI_WRITE_STDOUT_H
+#define _DERI_WRITE_STDOUT_H
+
+#include "su3adj.h"
+
+void deri_write_stdout(su3adj** const df);
+
+#endif

From e0b9f0b849da5e07997982e50c860fe6a39c99cf Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Tue, 4 Dec 2012 10:16:53 +0100
Subject: [PATCH 098/110] dumps a derivative field to stdout

---
 io/Makefile.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/io/Makefile.in b/io/Makefile.in
index 70ab69c58..3cf4dac14 100644
--- a/io/Makefile.in
+++ b/io/Makefile.in
@@ -71,7 +71,8 @@ libio_TARGETS = utils_engineering \
 		utils DML_crc32 dml \
 		eospinor_write \
 		eospinor_read \
-		io_cm
+		io_cm \
+		deri_write_stdout
 
 libio_OBJECTS = $(addsuffix .o, ${libio_TARGETS})
 

From 1896db1ca8913f93c5dc54c6130a8fdff15ea169 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 5 Dec 2012 17:00:22 +0100
Subject: [PATCH 099/110] clover parallelisation issue solved

---
 xchange/xchange_deri.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xchange/xchange_deri.c b/xchange/xchange_deri.c
index 5d8a0b9a9..7112a0f7d 100644
--- a/xchange/xchange_deri.c
+++ b/xchange/xchange_deri.c
@@ -416,7 +416,7 @@ void xchange_deri(su3adj ** const df)
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
   /* zy-edge */
-  MPI_Sendrecv((void*)df[VOLUME + RAND + 4*LY*LZ + 4*T*LZ + 4*LX*LZ + 4*T*LY + 4*LX*LY + 2*LY*LZ], 
+  MPI_Sendrecv((void*)df[VOLUME + RAND + 4*LY*LZ + 4*T*LZ + 4*LX*LZ + 4*T*LY + 4*LX*LY + 2*T*LX], 
 	       1, deri_zy_edge_cont, g_nb_y_dn, 502,
 	       (void*)ddummy[0],
 	       1, deri_zy_edge_cont, g_nb_y_up, 502, 
@@ -592,7 +592,7 @@ void xchange_deri(su3adj ** const df)
       }
     }
   }
-  /* send the data to the neighbour on the right is not needed*/  
+  /* send the data to the neighbour on the right needed for clover */  
 
   MPI_Sendrecv((void*)df[VOLUME + 2*LX*LY*LZ + 2*T*LY*LZ + 2*T*LX*LZ], 
 	       1, deri_z_slice_cont, g_nb_z_up, 47,

From 02fea943e58938520434376f5c5516eac0af295a Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Wed, 5 Dec 2012 17:08:15 +0100
Subject: [PATCH 100/110] debugging functions to write der, spinor and sw
 fields to stdout

---
 io/Makefile.in           |  2 +-
 io/deri_write_stdout.c   | 20 +++++++----
 io/spinor_write_stdout.c | 70 ++++++++++++++++++++++++++++++++++++
 io/spinor_write_stdout.h | 27 ++++++++++++++
 io/sw_write_stdout.c     | 77 ++++++++++++++++++++++++++++++++++++++++
 io/sw_write_stdout.h     | 27 ++++++++++++++
 6 files changed, 216 insertions(+), 7 deletions(-)
 create mode 100644 io/spinor_write_stdout.c
 create mode 100644 io/spinor_write_stdout.h
 create mode 100644 io/sw_write_stdout.c
 create mode 100644 io/sw_write_stdout.h

diff --git a/io/Makefile.in b/io/Makefile.in
index 3cf4dac14..1f9672a5f 100644
--- a/io/Makefile.in
+++ b/io/Makefile.in
@@ -72,7 +72,7 @@ libio_TARGETS = utils_engineering \
 		eospinor_write \
 		eospinor_read \
 		io_cm \
-		deri_write_stdout
+		deri_write_stdout spinor_write_stdout sw_write_stdout
 
 libio_OBJECTS = $(addsuffix .o, ${libio_TARGETS})
 
diff --git a/io/deri_write_stdout.c b/io/deri_write_stdout.c
index 2bd986742..89fa970d3 100644
--- a/io/deri_write_stdout.c
+++ b/io/deri_write_stdout.c
@@ -30,7 +30,7 @@
 #include "io/deri_write_stdout.h"
 
 void deri_write_stdout(su3adj** const df) {
-  int X, Y, Z, t0, id = 0, ix;
+  int X, Y, Z, t0, id = 0, ix, iy;
   int coords[4];
 
   for(int t = 0; t < g_nproc_t*T; t++) {
@@ -50,14 +50,22 @@ void deri_write_stdout(su3adj** const df) {
 #endif
 	  if(g_cart_id == id) {
 	    ix = g_ipt[t0][X][Y][Z];
-	    printf("%d %d %d %d, %d %d %d %d: ", t, x, y, z, t0, X, Y, Z);
+	    iy = t*(g_nproc_x*LX*g_nproc_y*LY*g_nproc_z*LZ) +
+	      x*(g_nproc_y*LY*g_nproc_z*LZ) +
+	      y*(g_nproc_z*LZ) + z;
 	    for(int mu = 0; mu < 4; mu++) {
-	      printf("%d %e %e %e %e %e %e %e %e\n", mu, df[ix][mu].d1, df[ix][mu].d2, 
-		     df[ix][mu].d3, df[ix][mu].d4, df[ix][mu].d5, df[ix][mu].d6, 
-		     df[ix][mu].d7, df[ix][mu].d8);
+/* 	      printf(" %d %d %d %d %d, %d %d %d %d: %d %e %e %e %e %e %e %e %e\n",  */
+/* 		     iy, t, x, y, z, t0, X, Y, Z,  */
+/* 		     mu, df[ix][mu].d1, df[ix][mu].d2,  */
+/* 		     df[ix][mu].d3, df[ix][mu].d4, df[ix][mu].d5, df[ix][mu].d6,  */
+/* 		     df[ix][mu].d7, df[ix][mu].d8); */
+	      printf(" %d %d %d %d %d, %d %d %d %d: %d %e %e de\n",
+		     iy, t, x, y, z, t0, X, Y, Z, 
+		     mu, df[ix][mu].d1, df[ix][mu].d2);
+
+	      fflush(stdout);
 	    }
 	  }
-	  fflush(stdout);
 #ifdef MPI
 	  MPI_Barrier(MPI_COMM_WORLD);
 #endif
diff --git a/io/spinor_write_stdout.c b/io/spinor_write_stdout.c
new file mode 100644
index 000000000..5d4a2e253
--- /dev/null
+++ b/io/spinor_write_stdout.c
@@ -0,0 +1,70 @@
+/***********************************************************************
+* Copyright (C) 2012 Carsten Urbach
+*
+* This file is part of tmLQCD.
+*
+* tmLQCD is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* tmLQCD is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include "global.h"
+#ifdef MPI
+# include <mpi.h>
+#endif
+#include "su3.h"
+#include "io/spinor_write_stdout.h"
+
+
+void spinor_write_stdout(spinor * const s) {
+  int X, Y, Z, t0, id = 0, ix, iy;
+  int coords[4];
+
+  for(int t = 0; t < g_nproc_t*T; t++) {
+    t0 = t - g_proc_coords[0]*T;
+    coords[0] = t / T;
+    for(int x = 0; x < g_nproc_x*LX; x++) {
+      X = x - g_proc_coords[1]*LX; 
+      coords[1] = x / LX;
+      for(int y = 0; y < g_nproc_y*LY; y++) {
+	Y = y - g_proc_coords[2]*LY;
+	coords[2] = y / LY;
+	for(int z = 0; z < g_nproc_z*LZ; z++) {
+	  Z = z - g_proc_coords[3]*LZ;
+	  coords[3] = z / LZ;
+#ifdef MPI
+	  MPI_Cart_rank(g_cart_grid, coords, &id);
+#endif
+	  if((t+x+y+z)%2 == 0 && g_cart_id == id) {
+	    ix = g_lexic2eosub[ g_ipt[t0][X][Y][Z] ];
+	    iy = t*(g_nproc_x*LX*g_nproc_y*LY*g_nproc_z*LZ) +
+	      x*(g_nproc_y*LY*g_nproc_z*LZ) +
+	      y*(g_nproc_z*LZ) + z;
+	    printf(" %d %d %d %d %d, %d %d %d %d: %e %e sp\n",
+		   iy, t, x, y, z, t0, X, Y, Z, 
+		   creal(s[ix].s0.c0), cimag(s[ix].s0.c0));
+	    fflush(stdout);
+	  }
+#ifdef MPI
+	  MPI_Barrier(MPI_COMM_WORLD);
+#endif
+	}
+      }
+    }
+  }
+  return;
+}
diff --git a/io/spinor_write_stdout.h b/io/spinor_write_stdout.h
new file mode 100644
index 000000000..620a24650
--- /dev/null
+++ b/io/spinor_write_stdout.h
@@ -0,0 +1,27 @@
+/***********************************************************************
+* Copyright (C) 2012 Carsten Urbach
+*
+* This file is part of tmLQCD.
+*
+* tmLQCD is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* tmLQCD is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+
+#ifndef _SPINOR_WRITE_STDOUT_H
+#define _SPINOR_WRITE_STDOUT_H
+
+#include "su3.h"
+
+void spinor_write_stdout(spinor * const s);
+
+#endif
diff --git a/io/sw_write_stdout.c b/io/sw_write_stdout.c
new file mode 100644
index 000000000..2d28b4ccc
--- /dev/null
+++ b/io/sw_write_stdout.c
@@ -0,0 +1,77 @@
+/***********************************************************************
+* Copyright (C) 2012 Carsten Urbach
+*
+* This file is part of tmLQCD.
+*
+* tmLQCD is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* tmLQCD is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include "global.h"
+#ifdef MPI
+# include <mpi.h>
+#endif
+#include "su3.h"
+#include "io/sw_write_stdout.h"
+
+void sw_write_stdout(su3 ** u) {
+  int X, Y, Z, t0, id = 0, ix, iy;
+  int coords[4];
+
+  for(int t = 0; t < g_nproc_t*T; t++) {
+    t0 = t - g_proc_coords[0]*T;
+    coords[0] = t / T;
+    for(int x = 0; x < g_nproc_x*LX; x++) {
+      X = x - g_proc_coords[1]*LX; 
+      coords[1] = x / LX;
+      for(int y = 0; y < g_nproc_y*LY; y++) {
+	Y = y - g_proc_coords[2]*LY;
+	coords[2] = y / LY;
+	for(int z = 0; z < g_nproc_z*LZ; z++) {
+	  Z = z - g_proc_coords[3]*LZ;
+	  coords[3] = z / LZ;
+#ifdef MPI
+	  MPI_Cart_rank(g_cart_grid, coords, &id);
+#endif
+	  if(g_cart_id == id) {
+	    ix = g_ipt[t0][X][Y][Z];
+	    iy = t*(g_nproc_x*LX*g_nproc_y*LY*g_nproc_z*LZ) +
+	      x*(g_nproc_y*LY*g_nproc_z*LZ) +
+	      y*(g_nproc_z*LZ) + z;
+	    for(int mu = 0; mu < 4; mu++) {
+/* 	      printf(" %d %d %d %d %d, %d %d %d %d: %d %e %e %e %e %e %e %e %e\n",  */
+/* 		     iy, t, x, y, z, t0, X, Y, Z,  */
+/* 		     mu, df[ix][mu].d1, df[ix][mu].d2,  */
+/* 		     df[ix][mu].d3, df[ix][mu].d4, df[ix][mu].d5, df[ix][mu].d6,  */
+/* 		     df[ix][mu].d7, df[ix][mu].d8); */
+	      printf(" %d %d %d %d %d, %d %d %d %d: %d %e %e sw\n",
+		     iy, t, x, y, z, t0, X, Y, Z, 
+		     mu, creal(u[ix][mu].c00), cimag(u[ix][mu].c02));
+
+	      fflush(stdout);
+	    }
+	  }
+#ifdef MPI
+	  MPI_Barrier(MPI_COMM_WORLD);
+#endif
+	}
+      }
+    }
+  }
+  return;
+}
diff --git a/io/sw_write_stdout.h b/io/sw_write_stdout.h
new file mode 100644
index 000000000..9c7b81007
--- /dev/null
+++ b/io/sw_write_stdout.h
@@ -0,0 +1,27 @@
+/***********************************************************************
+* Copyright (C) 2012 Carsten Urbach
+*
+* This file is part of tmLQCD.
+*
+* tmLQCD is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* tmLQCD is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+
+#ifndef _SW_WRITE_STDOUT_H
+#define _SW_WRITE_STDOUT_H
+
+#include "su3.h"
+
+void sw_write_stdout(su3 ** u);
+
+#endif

From b8a4b9904d15241bc758d1da6cea331061848026 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Thu, 6 Dec 2012 15:35:08 +0100
Subject: [PATCH 101/110] add reproducable random numbers to z2_noise generator
 for the inverter remove z2 noise generator for arbitrary volumes move
 generator of gaussian momenta into start.c add repro parameter to
 Check_approximation in P_M_eta.[c,h] and solver/mode_number.[c,h]

---
 P_M_eta.c            |   4 +-
 P_M_eta.h            |   2 +-
 hybrid_update.c      | 111 ------------------------
 invert.c             |   2 +-
 solver/mode_number.h |   2 +-
 start.c              | 196 +++++++++++++++++++++++++++++++++++--------
 start.h              |  21 +++--
 update_tm.c          |   2 +-
 8 files changed, 181 insertions(+), 159 deletions(-)

diff --git a/P_M_eta.c b/P_M_eta.c
index 14e8454b9..f980837d7 100644
--- a/P_M_eta.c
+++ b/P_M_eta.c
@@ -359,7 +359,7 @@ void X_over_sqrt_X_sqr(spinor * const R, double * const c,
 }
 
 
-void Check_Approximation(double const mstar) {
+void Check_Approximation(double const mstar, const int repro) {
 
   if(g_proc_id == 0) {
   printf("Checking the approximation of X/sqrt(X^2) in the mode number: \n");
@@ -391,7 +391,7 @@ void Check_Approximation(double const mstar) {
   Sin   =calloc(VOLUMEPLUSRAND, sizeof(spinor));
 #endif
 
-  random_spinor_field_lexic(Sin, 0);
+  random_spinor_field_lexic(Sin, repro);
 
   s_ = calloc(4*VOLUMEPLUSRAND+1, sizeof(spinor));
   s  = calloc(4, sizeof(spinor*));
diff --git a/P_M_eta.h b/P_M_eta.h
index 9a5b2ed2d..5b0067f69 100644
--- a/P_M_eta.h
+++ b/P_M_eta.h
@@ -38,7 +38,7 @@ void h_X_eta(spinor * const R,spinor * const S, double const mstar);
 
 void h_X_4_eta(spinor * const R1, spinor * const R2, spinor * const S, double const mstar);
 
-void Check_Approximation(double const mstar);
+void Check_Approximation(double const mstar, const int repro);
 
 #endif
 
diff --git a/hybrid_update.c b/hybrid_update.c
index aebff8375..9705aba69 100644
--- a/hybrid_update.c
+++ b/hybrid_update.c
@@ -47,9 +47,6 @@
 #include "phmc.h"
 #include "hybrid_update.h"
 
-
-
-
 /*----------------------------------------------------------------------------*/
 
 /*******************************************
@@ -94,111 +91,3 @@ double moment_energy(su3adj ** const momenta) {
 #endif
 }
 
-/*----------------------------------------------------------------------------*/
-
-/**************************************
- *
- * Initialises the momenta
- * with the gaussian distribution
- *
- **************************************/
-double init_momenta(const int repro, su3adj ** const momenta) {
-  
-  su3adj *xm;
-  int i, mu, t0, x, y, z, X, Y, Z, t, id = 0;
-  int coords[4];
-#ifdef MPI
-  int k;
-  int rlxd_state[105];
-#endif
-  double ALIGN yy[8];
-  double ALIGN tt, tr, ts, kc = 0., ks = 0., sum;
-  
-  if(repro) {
-#ifdef MPI
-    if(g_proc_id == 0) {
-      rlxd_get(rlxd_state);
-    }
-    MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
-    rlxd_reset(rlxd_state);
-#endif
-    for(t0 = 0; t0 < g_nproc_t*T; t0++) {
-      t = t0 - T*g_proc_coords[0];
-      coords[0] = t0 / T;
-      for(x = 0; x < g_nproc_x*LX; x++) {
-	X = x - g_proc_coords[1]*LX;
-	coords[1] = x / LX;
-	for(y = 0; y < g_nproc_y*LY; y++) {
-	  Y = y - g_proc_coords[2]*LY;
-	  coords[2] = y / LY;
-	  for(z = 0; z < g_nproc_z*LZ; z++) {
-	    Z = z - g_proc_coords[3]*LZ;
-	    coords[3] = z / LZ;
-#ifdef MPI
-	    MPI_Cart_rank(g_cart_grid, coords, &id);
-#endif
-	    if(g_cart_id == id) i = g_ipt[t][X][Y][Z];
-	    for(mu = 0; mu < 4; mu++) {
-	      gauss_vector(yy,8);
-	      if(g_cart_id == id) {
-		sum = 0.;
-		xm = &momenta[i][mu];
-		(*xm).d1 = 1.4142135623731*yy[0];
-		(*xm).d2 = 1.4142135623731*yy[1];
-		sum += (*xm).d1*(*xm).d1+(*xm).d2*(*xm).d2;
-		(*xm).d3 = 1.4142135623731*yy[2];
-		(*xm).d4 = 1.4142135623731*yy[3];
-		sum += (*xm).d3*(*xm).d3+(*xm).d4*(*xm).d4;
-		(*xm).d5 = 1.4142135623731*yy[4];
-		(*xm).d6 = 1.4142135623731*yy[5];
-		sum += (*xm).d5*(*xm).d5+(*xm).d6*(*xm).d6;
-		(*xm).d7 = 1.4142135623731*yy[6];
-		(*xm).d8 = 1.4142135623731*yy[7];
-		sum += (*xm).d7*(*xm).d7+(*xm).d8*(*xm).d8;
-		tr = sum+kc;
-		ts = tr+ks;
-		tt = ts-ks;
-		ks = ts;
-		kc = tr-tt;
-	      }
-	    }
-	  }
-	}
-      }
-    }
-    kc=0.5*(ks+kc);
-  }
-  else {
-    for(i = 0; i < VOLUME; i++) { 
-      for(mu = 0; mu < 4; mu++) {
-	sum=0.;
-	xm=&momenta[i][mu];
-	gauss_vector(yy,8);
-	(*xm).d1=1.4142135623731*yy[0];
-	(*xm).d2=1.4142135623731*yy[1];
-	sum+=(*xm).d1*(*xm).d1+(*xm).d2*(*xm).d2;
-	(*xm).d3=1.4142135623731*yy[2];
-	(*xm).d4=1.4142135623731*yy[3];
-	sum+=(*xm).d3*(*xm).d3+(*xm).d4*(*xm).d4;
-	(*xm).d5=1.4142135623731*yy[4];
-	(*xm).d6=1.4142135623731*yy[5];
-	sum+=(*xm).d5*(*xm).d5+(*xm).d6*(*xm).d6;
-	(*xm).d7=1.4142135623731*yy[6];
-	(*xm).d8=1.4142135623731*yy[7];
-	sum+=(*xm).d7*(*xm).d7+(*xm).d8*(*xm).d8;
-	tr=sum+kc;
-	ts=tr+ks;
-	tt=ts-ks;
-	ks=ts;
-	kc=tr-tt;
-      }
-    }
-    kc=0.5*(ks+kc);
-  }
-#ifdef MPI
-  MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-  return ks;
-#endif
-  return kc;
-}
-
diff --git a/invert.c b/invert.c
index 43311ba1b..6c7f718fd 100644
--- a/invert.c
+++ b/invert.c
@@ -403,7 +403,7 @@ int main(int argc, char *argv[])
         s[i] = s_+i*VOLUMEPLUSRAND;
 #endif
 	
-        z2_random_spinor_field(s[i], VOLUME);
+        z2_random_spinor_field_lexic(s[i], reproduce_randomnumber_flag);
 	
 /* 	what is this here needed for?? */
 /*         spinor *aux_,*aux; */
diff --git a/solver/mode_number.h b/solver/mode_number.h
index 1091ae4bb..05534fbf0 100644
--- a/solver/mode_number.h
+++ b/solver/mode_number.h
@@ -6,6 +6,6 @@ extern double * x_cheby_coef;
 
 void mode_number(spinor * const, double const mstar);
 
-void Check_Approximation(double const mstar);
+void Check_Approximation(double const mstar, const int repro);
 
 
diff --git a/start.c b/start.c
index 0bbfcb72a..dc3dc8589 100644
--- a/start.c
+++ b/start.c
@@ -78,7 +78,7 @@
 #include "ranlxs.h"
 #include "start.h"
 
-void gauss_vector(double v[],int n)
+static void gauss_vector(double v[],int n)
 {
    int k;
    double r[2];
@@ -106,6 +106,17 @@ void gauss_vector(double v[],int n)
    }
 }
 
+/* produce a double array of z2 noise of length N */
+static void z2_vector(double *v, const int N) {
+  ranlxd(v,N);
+  for (int i = 0; i < N; ++i) {
+    if(v[i] < 0.5)
+      v[i]=1/sqrt(2);
+    else
+      v[i]=-1/sqrt(2);
+  }
+  return;
+} 
 
 static su3 unit_su3(void)
 {
@@ -255,8 +266,7 @@ void random_spinor_field_lexic(spinor * const k, const int repro) {
 	      gauss_vector(v, 24);
 	      s = k + g_ipt[tt][X][Y][Z];
 	      memcpy(s, v, 24*sizeof(double));
-	    }
-	    else {
+	    } else {
 	      ranlxd(v,24);
 	    }
 	  }
@@ -417,43 +427,65 @@ void random_spinor_field(spinor * const k, const int V, const int repro) {
   return;
 }
 
-/* Function provides a z2 random spinor field of length N with */
-void z2_random_spinor_field(spinor * const k, const int N) {
+/* Function provides a spinor field of length VOLUME
+   filled with Z2 noise */
 
-  int ix;
+void z2_random_spinor_field_lexic(spinor * const k, const int repro) {
+  int x, y, z, t, X, Y, Z, tt, id=0;
+#ifdef MPI
+  int rlxd_state[105];
+#endif
+  int coords[4];
   spinor *s;
-  double r[24];
-  double z2noise[24];
-  int rv=0;
+  double v[24];
 
-  s = k;
-  for (ix = 0;ix < N; ix++) {
-    ranlxd(r,24);
-
-    for (rv = 0  ; rv < 24; rv++){
-      if(r[rv] < 0.5)
-        z2noise[rv]=1/sqrt(2);
-      else
-        z2noise[rv]=-1/sqrt(2);
-    }
-    s->s0.c0 = z2noise[0] + z2noise[1] * I;
-    s->s0.c1 = z2noise[2] + z2noise[3] * I;
-    s->s0.c2 = z2noise[4] + z2noise[5] * I;
-    s->s1.c0 = z2noise[6] + z2noise[7] * I;
-    s->s1.c1 = z2noise[8] + z2noise[9] * I;
-    s->s1.c2 = z2noise[10] + z2noise[11] * I;
-    s->s2.c0 = z2noise[12] + z2noise[13] * I;
-    s->s2.c1 = z2noise[14] + z2noise[15] * I;
-    s->s2.c2 = z2noise[16] + z2noise[17] * I;
-    s->s3.c0 = z2noise[18] + z2noise[19] * I;
-    s->s3.c1 = z2noise[20] + z2noise[21] * I;
-    s->s3.c2 = z2noise[22] + z2noise[23] * I;
-    s++;
+  if(repro) {
+#ifdef MPI
+    if(g_proc_id == 0) {
+      rlxd_get(rlxd_state);
+    }
+    MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
+    if(g_proc_id != 0) {
+      rlxd_reset(rlxd_state);
+    }
+#endif
+    for(t = 0; t < g_nproc_t*T; t++) {
+      tt = t - g_proc_coords[0]*T;
+      coords[0] = t / T;
+      for(x = 0; x < g_nproc_x*LX; x++) {
+	X = x - g_proc_coords[1]*LX; 
+	coords[1] = x / LX;
+	for(y = 0; y < g_nproc_y*LY; y++) {
+	  Y = y - g_proc_coords[2]*LY;
+	  coords[2] = y / LY;
+	  for(z = 0; z < g_nproc_z*LZ; z++) {
+	    Z = z - g_proc_coords[3]*LZ;
+	    coords[3] = z / LZ;
+#ifdef MPI
+	    MPI_Cart_rank(g_cart_grid, coords, &id);
+#endif
+	    if(g_cart_id == id) {
+	      z2_vector(v, 24);
+	      s = k + g_ipt[tt][X][Y][Z];
+	      memcpy(s, v, 24*sizeof(double));
+	    } else {
+	      ranlxd(v,24);
+	    }
+	  }
+	}
+      }
+    }
+  }
+  else {
+    for(x = 0; x < VOLUME; x++) {
+      z2_vector(v, 24);
+      s = k + x;
+      memcpy(s, v, 24*sizeof(double));
+    }
   }
   return;
 }
 
-
 /* Function provides a zero spinor field of length N with */
 void zero_spinor_field(spinor * const k, const int N)
 {
@@ -596,6 +628,104 @@ void random_gauge_field(const int repro) {
   return;
 }
 
+/* writes gaussian distributed random momenta of length VOLUME into momenta array
+   and returns their energy contribution */
+double random_su3adj_field(const int repro, su3adj ** const momenta) {
+  su3adj *xm;
+  int i, mu, t0, x, y, z, X, Y, Z, t, id = 0;
+  int coords[4];
+#ifdef MPI
+  int k;
+  int rlxd_state[105];
+#endif
+  double ALIGN yy[8];
+  double ALIGN tt, tr, ts, kc = 0., ks = 0., sum;
+  
+  if(repro) {
+#ifdef MPI
+    if(g_proc_id == 0) {
+      rlxd_get(rlxd_state);
+    }
+    MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
+    rlxd_reset(rlxd_state);
+#endif
+    for(t0 = 0; t0 < g_nproc_t*T; t0++) {
+      t = t0 - T*g_proc_coords[0];
+      coords[0] = t0 / T;
+      for(x = 0; x < g_nproc_x*LX; x++) {
+	X = x - g_proc_coords[1]*LX;
+	coords[1] = x / LX;
+	for(y = 0; y < g_nproc_y*LY; y++) {
+	  Y = y - g_proc_coords[2]*LY;
+	  coords[2] = y / LY;
+	  for(z = 0; z < g_nproc_z*LZ; z++) {
+	    Z = z - g_proc_coords[3]*LZ;
+	    coords[3] = z / LZ;
+#ifdef MPI
+	    MPI_Cart_rank(g_cart_grid, coords, &id);
+#endif
+	    if(g_cart_id == id) i = g_ipt[t][X][Y][Z];
+	    for(mu = 0; mu < 4; mu++) {
+	      gauss_vector(yy,8);
+	      if(g_cart_id == id) {
+		sum = 0.;
+		xm = &momenta[i][mu];
+		(*xm).d1 = 1.4142135623731*yy[0];
+		(*xm).d2 = 1.4142135623731*yy[1];
+		sum += (*xm).d1*(*xm).d1+(*xm).d2*(*xm).d2;
+		(*xm).d3 = 1.4142135623731*yy[2];
+		(*xm).d4 = 1.4142135623731*yy[3];
+		sum += (*xm).d3*(*xm).d3+(*xm).d4*(*xm).d4;
+		(*xm).d5 = 1.4142135623731*yy[4];
+		(*xm).d6 = 1.4142135623731*yy[5];
+		sum += (*xm).d5*(*xm).d5+(*xm).d6*(*xm).d6;
+		tr = sum+kc;
+		ts = tr+ks;
+		tt = ts-ks;
+		ks = ts;
+		kc = tr-tt;
+	      }
+	    }
+	  }
+	}
+      }
+    }
+    kc=0.5*(ks+kc);
+  }
+  else {
+    for(i = 0; i < VOLUME; i++) { 
+      for(mu = 0; mu < 4; mu++) {
+	sum=0.;
+	xm=&momenta[i][mu];
+	gauss_vector(yy,8);
+	(*xm).d1=1.4142135623731*yy[0];
+	(*xm).d2=1.4142135623731*yy[1];
+	sum+=(*xm).d1*(*xm).d1+(*xm).d2*(*xm).d2;
+	(*xm).d3=1.4142135623731*yy[2];
+	(*xm).d4=1.4142135623731*yy[3];
+	sum+=(*xm).d3*(*xm).d3+(*xm).d4*(*xm).d4;
+	(*xm).d5=1.4142135623731*yy[4];
+	(*xm).d6=1.4142135623731*yy[5];
+	sum+=(*xm).d5*(*xm).d5+(*xm).d6*(*xm).d6;
+	(*xm).d7=1.4142135623731*yy[6];
+	(*xm).d8=1.4142135623731*yy[7];
+	sum+=(*xm).d7*(*xm).d7+(*xm).d8*(*xm).d8;
+	tr=sum+kc;
+	ts=tr+ks;
+	tt=ts-ks;
+	ks=ts;
+	kc=tr-tt;
+      }
+    }
+    kc=0.5*(ks+kc);
+  }
+#ifdef MPI
+  MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  return ks;
+#endif
+  return kc;
+}
+
 void set_spinor_point(spinor * s, const double c)
 {
   s->s0.c0 = c * (1 + I);
diff --git a/start.h b/start.h
index 07b1a8f80..92abb8d1d 100644
--- a/start.h
+++ b/start.h
@@ -20,27 +20,30 @@
 #ifndef _START_H
 #define _START_H
 
-void gauss_vector(double v[],int n);
-su3_vector random_su3_vector(void);
-su3_vector unif_su3_vector(void);
-void random_spinor(spinor * const s);
 void unit_spinor_field(const int k);
+void zero_spinor_field(spinor * const k, const int N);
+void constant_spinor_field(spinor * const k, const int p, const int N);
 
 void random_spinor_field_lexic(spinor * const k, const int repro);
-void random_spinor_field(spinor * const k, const int V, const int repro);
 void random_spinor_field_eo(spinor * const k, const int repro);
-void z2_random_spinor_field(spinor * const k, const int N);
-void zero_spinor_field(spinor * const k, const int N);
-void constant_spinor_field(spinor * const k, const int p, const int N);
-void random_su3(su3 * const u);
+
+void z2_random_spinor_field_lexic(spinor * const k, const int repro);
+void random_spinor_field(spinor * const k, const int V, const int repro);
+
 void unit_g_gauge_field(void);
+
 void random_gauge_field(const int repro);
+
+double random_su3adj_field(const int repro, su3adj ** const momenta);
+
 void set_spinor_field(int k, const double c);
 void set_gauge_field(const double c);
 void set_spinor_point(spinor * s, const double c);
 su3 set_su3(const double c);
+
 void source_spinor_field(spinor * const P, spinor * const Q, int is, int ic);
 void source_spinor_field_point_from_file(spinor * const P, spinor * const Q, int is, int ic, int source_indx);
+
 void start_ranlux(int level,int seed);
 
 void gen_test_spinor_field(spinor * const k , const int eoflag);
diff --git a/update_tm.c b/update_tm.c
index 0c4b001b8..d07146de3 100644
--- a/update_tm.c
+++ b/update_tm.c
@@ -136,7 +136,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy,
   }
 
   /* initialize the momenta  */
-  enep = init_momenta(reproduce_randomnumber_flag, hf.momenta);
+  enep = random_su3adj_field(reproduce_randomnumber_flag, hf.momenta);
 
   g_sloppy_precision = 1;
 

From 2ce7a899d93e378c6783f20954d4c8fadaa9c68f Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Thu, 6 Dec 2012 16:18:49 +0100
Subject: [PATCH 102/110] return to XOR'ing random seed with nstore instead of
 nstore+1

---
 hmc_tm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hmc_tm.c b/hmc_tm.c
index 9fb71c351..76e101c2e 100644
--- a/hmc_tm.c
+++ b/hmc_tm.c
@@ -338,7 +338,7 @@ int main(int argc,char *argv[]) {
 #endif
 
   /* Initialise random number generator */
-  start_ranlux(rlxd_level, random_seed^(nstore+1) );
+  start_ranlux(rlxd_level, random_seed^nstore );
 
   /* Set up the gauge field */
   /* continue and restart */

From bd63fce39b4f7507fa2043eca46e8fa51de730e4 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Fri, 7 Dec 2012 09:47:04 +0100
Subject: [PATCH 103/110] removed random_spinor_field for arbitrary lengths

---
 start.c | 87 ---------------------------------------------------------
 start.h |  1 -
 2 files changed, 88 deletions(-)

diff --git a/start.c b/start.c
index dc3dc8589..efbb7603a 100644
--- a/start.c
+++ b/start.c
@@ -340,93 +340,6 @@ void random_spinor_field_eo(spinor * const k, const int repro) {
   return;
 }
 
-void random_spinor_field(spinor * const k, const int V, const int repro) {
-
-  int ix, t0, t, x, X, y, Y, z, Z, id = 0;
-  int coords[4];
-#ifdef MPI
-  int rlxd_state[105];
-#endif
-  spinor *s;
-  double v[6];
-  if(repro) {
-#ifdef MPI
-    if(g_proc_id == 0) {
-      rlxd_get(rlxd_state);
-    }
-    MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
-    rlxd_reset(rlxd_state);
-#endif
-    for(t0 = 0; t0 < g_nproc_t*T; t0++) {
-      t = t0 - T*g_proc_coords[0];
-      coords[0] = t0 / T;
-      for(x = 0; x < g_nproc_x*LX; x++) {
-	X = x - g_proc_coords[1]*LX;
-	coords[1] = x / LX;
-	for(y = 0; y < g_nproc_y*LY; y++) {
-	  Y = y - g_proc_coords[2]*LY;
-	  coords[2] = y / LY;
-	  for(z = 0; z < g_nproc_z*LZ; z++) {
-	    Z = z - g_proc_coords[3]*LZ;
-	    coords[3] = z / LZ;
-#ifdef MPI
-	    MPI_Cart_rank(g_cart_grid, coords, &id);
-#endif
-	    if(g_cart_id == id) ix = g_lexic2eosub[ g_ipt[t][X][Y][Z] ];
-	    gauss_vector(v, 6);
-	    if(g_cart_id == id) {
-	      s = k + ix;
-	      s->s0.c0 = v[0] + v[1] * I;
-	      s->s0.c1 = v[2] + v[3] * I;
-	      s->s0.c2 = v[4] + v[5] * I;
-	    }
-	    gauss_vector(v,6);
-	    if(g_cart_id == id) {
-	      s->s1.c0 = v[0] + v[1] * I;
-	      s->s1.c1 = v[2] + v[3] * I;
-	      s->s1.c2 = v[4] + v[5] * I;
-	    }
-	    gauss_vector(v,6);
-	    if(g_cart_id == id) {
-	      s->s2.c0 = v[0] + v[1] * I;
-	      s->s2.c1 = v[2] + v[3] * I;
-	      s->s2.c2 = v[4] + v[5] * I;
-	    }
-	    gauss_vector(v,6);
-	    if(g_cart_id == id) {
-	      s->s3.c0 = v[0] + v[1] * I;
-	      s->s3.c1 = v[2] + v[3] * I;
-	      s->s3.c2 = v[4] + v[5] * I;
-	    }
-	  }
-	}
-      }
-    }
-  }
-  else {
-    for (ix = 0; ix < V; ix++) {
-      s = k + ix;
-      gauss_vector(v, 6);
-      s->s0.c0 = v[0] + v[1] * I;
-      s->s0.c1 = v[2] + v[3] * I;
-      s->s0.c2 = v[4] + v[5] * I;
-      gauss_vector(v,6);
-      s->s1.c0 = v[0] + v[1] * I;
-      s->s1.c1 = v[2] + v[3] * I;
-      s->s1.c2 = v[4] + v[5] * I;
-      gauss_vector(v,6);
-      s->s2.c0 = v[0] + v[1] * I;
-      s->s2.c1 = v[2] + v[3] * I;
-      s->s2.c2 = v[4] + v[5] * I;
-      gauss_vector(v,6);
-      s->s3.c0 = v[0] + v[1] * I;
-      s->s3.c1 = v[2] + v[3] * I;
-      s->s3.c2 = v[4] + v[5] * I;
-    }
-  }
-  return;
-}
-
 /* Function provides a spinor field of length VOLUME
    filled with Z2 noise */
 
diff --git a/start.h b/start.h
index 92abb8d1d..b63cca093 100644
--- a/start.h
+++ b/start.h
@@ -28,7 +28,6 @@ void random_spinor_field_lexic(spinor * const k, const int repro);
 void random_spinor_field_eo(spinor * const k, const int repro);
 
 void z2_random_spinor_field_lexic(spinor * const k, const int repro);
-void random_spinor_field(spinor * const k, const int V, const int repro);
 
 void unit_g_gauge_field(void);
 

From f921a6df31d0aeb95f01b14419a73e497b46b751 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Fri, 7 Dec 2012 10:14:25 +0100
Subject: [PATCH 104/110] add protective state saving to "reproduce random
 numbers" mode to make sure that even if a functions requests random numbers
 without knowing about the current mode, random numbers are never reused

correct a bug in momenta generation introduced when it was moved from hybrid_update to start.c
---
 start.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 5 deletions(-)

diff --git a/start.c b/start.c
index efbb7603a..cde0930e7 100644
--- a/start.c
+++ b/start.c
@@ -232,6 +232,7 @@ void random_spinor_field_lexic(spinor * const k, const int repro) {
   int x, y, z, t, X, Y, Z, tt, id=0;
 #ifdef MPI
   int rlxd_state[105];
+  int rlxd_state_backup[105];
 #endif
   int coords[4];
   spinor *s;
@@ -239,7 +240,9 @@ void random_spinor_field_lexic(spinor * const k, const int repro) {
 
   if(repro) {
 #ifdef MPI
-    if(g_proc_id == 0) {
+    if(g_proc_id != 0) {
+      rlxd_get(rlxd_state_backup);
+    } else if(g_proc_id == 0) {
       rlxd_get(rlxd_state);
     }
     MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
@@ -273,6 +276,11 @@ void random_spinor_field_lexic(spinor * const k, const int repro) {
 	}
       }
     }
+#ifdef MPI
+    if(g_proc_id != 0) {
+      rlxd_reset(rlxd_state_backup);
+    }
+#endif
   }
   else {
     for(x = 0; x < VOLUME; x++) {
@@ -288,6 +296,7 @@ void random_spinor_field_eo(spinor * const k, const int repro) {
   int x, X, y, Y, z, Z, t, t0, id = 0;
 #ifdef MPI
   int rlxd_state[105];
+  int rlxd_state_backup[105];
 #endif
   int coords[4];
   spinor *s;
@@ -295,7 +304,9 @@ void random_spinor_field_eo(spinor * const k, const int repro) {
 
   if(repro) {
 #ifdef MPI
-    if(g_proc_id == 0) {
+    if(g_proc_id != 0) {
+      rlxd_get(rlxd_state_backup);
+    } else if(g_proc_id == 0) {
       rlxd_get(rlxd_state);
     }
     MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
@@ -329,6 +340,11 @@ void random_spinor_field_eo(spinor * const k, const int repro) {
 	}
       }
     }
+#ifdef MPI
+    if(g_proc_id != 0) {
+      rlxd_reset(rlxd_state_backup);
+    }
+#endif
   }
   else {
     for (x = 0; x < VOLUME/2; x++) {
@@ -347,6 +363,7 @@ void z2_random_spinor_field_lexic(spinor * const k, const int repro) {
   int x, y, z, t, X, Y, Z, tt, id=0;
 #ifdef MPI
   int rlxd_state[105];
+  int rlxd_state_backup[105];
 #endif
   int coords[4];
   spinor *s;
@@ -354,7 +371,9 @@ void z2_random_spinor_field_lexic(spinor * const k, const int repro) {
 
   if(repro) {
 #ifdef MPI
-    if(g_proc_id == 0) {
+    if(g_proc_id != 0) {
+      rlxd_get(rlxd_state_backup);
+    } else if(g_proc_id == 0) {
       rlxd_get(rlxd_state);
     }
     MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
@@ -388,6 +407,11 @@ void z2_random_spinor_field_lexic(spinor * const k, const int repro) {
 	}
       }
     }
+#ifdef MPI
+  if(g_proc_id != 0) {
+    rlxd_reset(rlxd_state_backup);
+  }
+#endif
   }
   else {
     for(x = 0; x < VOLUME; x++) {
@@ -488,11 +512,14 @@ void random_gauge_field(const int repro) {
   su3 ALIGN tmp;
 #ifdef MPI
   int rlxd_state[105];
+  int rlxd_state_backup[105];
 #endif
 
   if(repro) {
 #ifdef MPI
-    if(g_proc_id == 0) {
+    if(g_proc_id != 0) {
+      rlxd_get(rlxd_state_backup);
+    } else if(g_proc_id == 0) {
       rlxd_get(rlxd_state);
     }
     MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
@@ -526,6 +553,11 @@ void random_gauge_field(const int repro) {
 	}
       }
     }
+#ifdef MPI
+    if(g_proc_id != 0) {
+      rlxd_get(rlxd_state_backup);
+    }
+#endif
   }
   else {
     for (ix = 0; ix < VOLUME; ix++) {
@@ -550,13 +582,16 @@ double random_su3adj_field(const int repro, su3adj ** const momenta) {
 #ifdef MPI
   int k;
   int rlxd_state[105];
+  int rlxd_state_backup[105];
 #endif
   double ALIGN yy[8];
   double ALIGN tt, tr, ts, kc = 0., ks = 0., sum;
   
   if(repro) {
 #ifdef MPI
-    if(g_proc_id == 0) {
+    if(g_proc_id != 0) {
+      rlxd_get(rlxd_state_backup);
+    } else if(g_proc_id == 0) {
       rlxd_get(rlxd_state);
     }
     MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
@@ -592,6 +627,10 @@ double random_su3adj_field(const int repro, su3adj ** const momenta) {
 		(*xm).d5 = 1.4142135623731*yy[4];
 		(*xm).d6 = 1.4142135623731*yy[5];
 		sum += (*xm).d5*(*xm).d5+(*xm).d6*(*xm).d6;
+	  (*xm).d7 = 1.4142135623731*yy[6];
+	  (*xm).d8 = 1.4142135623731*yy[7];
+	  sum+=(*xm).d7*(*xm).d7+(*xm).d8*(*xm).d8;
+	  tr=sum+kc;
 		tr = sum+kc;
 		ts = tr+ks;
 		tt = ts-ks;
@@ -604,6 +643,11 @@ double random_su3adj_field(const int repro, su3adj ** const momenta) {
       }
     }
     kc=0.5*(ks+kc);
+#ifdef MPI
+    if(g_proc_id != 0) {
+      rlxd_reset(rlxd_state_backup);
+    }
+#endif
   }
   else {
     for(i = 0; i < VOLUME; i++) { 

From 00aef62968e38b5e1135d2971f85bb34e403401d Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Fri, 7 Dec 2012 11:51:52 +0100
Subject: [PATCH 105/110] update "write_first_messages" to work for general
 executables and add functionality to pass the git_hash down to that function

---
 check_locallity.c              |  2 +-
 hmc_tm.c                       |  2 +-
 invert.c                       |  2 +-
 io/utils.h                     |  2 +-
 io/utils_write_first_message.c | 24 ++++++++++--------------
 5 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/check_locallity.c b/check_locallity.c
index 3711edccd..3de2dc934 100644
--- a/check_locallity.c
+++ b/check_locallity.c
@@ -192,7 +192,7 @@ int main(int argc,char *argv[]) {
     strcpy(parameterfilename,filename);  strcat(parameterfilename,".para");
     
     parameterfile=fopen(parameterfilename, "w");
-    write_first_messages(parameterfile, 0, 1);
+    write_first_messages(parameterfile, "check_locality", "NA");
   }
 
   /* define the geometry */
diff --git a/hmc_tm.c b/hmc_tm.c
index 9fb71c351..17334df0d 100644
--- a/hmc_tm.c
+++ b/hmc_tm.c
@@ -306,7 +306,7 @@ int main(int argc,char *argv[]) {
 
   if(g_proc_id == 0){
     parameterfile = fopen(parameterfilename, "a");
-    write_first_messages(parameterfile, 0);
+    write_first_messages(parameterfile, "hmc", git_hash);
   }
 
   /* define the geometry */
diff --git a/invert.c b/invert.c
index 43311ba1b..2ad678def 100644
--- a/invert.c
+++ b/invert.c
@@ -286,7 +286,7 @@ int main(int argc, char *argv[])
     strcat(parameterfilename, ".para");
 
     parameterfile = fopen(parameterfilename, "w");
-    write_first_messages(parameterfile, 1);
+    write_first_messages(parameterfile, "invert", git_hash);
     fclose(parameterfile);
   }
 
diff --git a/io/utils.h b/io/utils.h
index 7912848bc..79dd51b72 100644
--- a/io/utils.h
+++ b/io/utils.h
@@ -80,7 +80,7 @@ void single2double_cm(spinor * const R, float * const S);
 void double2single_cm(float * const S, spinor * const R);
 void zero_spinor(spinor * const R);
 
-int write_first_messages(FILE * parameterfile, const int inv);
+int write_first_messages(FILE * parameterfile, const char const *executable, const char const *git_hash);
 int parse_propagator_type(READER * reader);
 
 int parse_ildgformat_xml(char *message, paramsIldgFormat *ildgformat);
diff --git a/io/utils_write_first_message.c b/io/utils_write_first_message.c
index 9b4c7ae78..db97b15a1 100644
--- a/io/utils_write_first_message.c
+++ b/io/utils_write_first_message.c
@@ -17,21 +17,17 @@
  * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
  ***********************************************************************/
 
+#include <string.h>
+
 #include "utils.ih"
 #include <read_input.h>
 
-int write_first_messages(FILE * parameterfile, const int inv) {
+int write_first_messages(FILE * parameterfile, const char const *executable, const char const *git_hash) {
+  char message[1024];
+  snprintf(message, 1024, "This is the %s code for twisted mass Wilson QCD\n\nVersion %s, commit %s\n",executable,PACKAGE_VERSION,git_hash);
+  printf("%s",message);
+  fprintf(parameterfile,"%s",message);
 
-  if(inv != 1) {
-    printf("# This is the hmc code for twisted Mass Wilson QCD\n\nVersion %s\n", PACKAGE_VERSION);
-    fprintf(parameterfile, 
-	    "# This is the hmc code for twisted Mass Wilson QCD\n\nVersion %s\n", PACKAGE_VERSION);
-  }
-  else {
-    printf("# This is the invert code for twisted Mass Wilson QCD\n\nVersion %s\n", PACKAGE_VERSION);
-    fprintf(parameterfile, 
-	    "# This is the invert code for twisted Mass Wilson QCD\n\nVersion %s\n", PACKAGE_VERSION);
-  }
 #ifdef SSE
   printf("# The code is compiled with SSE instructions\n");
   fprintf(parameterfile, 
@@ -136,7 +132,7 @@ int write_first_messages(FILE * parameterfile, const int inv) {
   }
   printf("# beta = %f , kappa= %f\n", g_beta, g_kappa);
   printf("# boundary conditions for fermion fields (t,x,y,z) * pi: %f %f %f %f \n",X0,X1,X2,X3);
-  if(inv != 1) {
+  if( strcmp(executable,"hmc") == 0 ) {
     printf("# mu = %f\n", g_mu/2./g_kappa);
     printf("# g_rgi_C0 = %f, g_rgi_C1 = %f\n", g_rgi_C0, g_rgi_C1);
     printf("# Using %s precision for the inversions!\n", 
@@ -147,7 +143,7 @@ int write_first_messages(FILE * parameterfile, const int inv) {
   fprintf(parameterfile, "# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY), (int)(LZ));
   fprintf(parameterfile, "# g_beta = %f , g_kappa= %f, g_kappa*csw/8= %f \n",g_beta,g_kappa,g_ka_csw_8);
   fprintf(parameterfile, "# boundary conditions for fermion fields (t,x,y,z) * pi: %f %f %f %f \n",X0,X1,X2,X3);
-  if(inv != 1) {
+  if( strcmp(executable,"hmc") == 0 ) {
     fprintf(parameterfile, "# ITER_MAX_BCG=%d\n", ITER_MAX_BCG);
     fprintf(parameterfile, "# Nmeas=%d, Nsave=%d \n",
 	    Nmeas,Nsave);
@@ -156,7 +152,7 @@ int write_first_messages(FILE * parameterfile, const int inv) {
     fprintf(parameterfile, "# Using %s precision for the inversions!\n", 
 	    g_relative_precision_flag ? "relative" : "absolute");
   }
-  if(inv == 1) {
+  if( strcmp(executable,"invert") == 0 ) {
     printf("# beta = %f, mu = %f, kappa = %f\n", g_beta, g_mu/2./g_kappa, g_kappa);
     fprintf(parameterfile,
 	    "# beta = %f, mu = %f, kappa = %f\n", g_beta, g_mu/2./g_kappa, g_kappa);

From 44cb9f71867f00a9eeacf1473c8999ba60ba28fb Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Fri, 7 Dec 2012 18:54:06 +0100
Subject: [PATCH 106/110] merge Z2 noise and random_spinor_field_* and provide
 the ability to also get EO Z2 noise.

---
 P_M_eta.c                          |   2 +-
 Ptilde_nd.c                        |   4 +-
 benchmark.c                        |   4 +-
 chebyshev_polynomial.c             |   4 +-
 chebyshev_polynomial_nd.c          |   4 +-
 hopping_test.c                     |   2 +-
 invert.c                           |   2 +-
 monomial/cloverdet_monomial.c      |   2 +-
 monomial/cloverdetratio_monomial.c |   2 +-
 monomial/cloverndpoly_monomial.c   |   4 +-
 monomial/det_monomial.c            |   4 +-
 monomial/detratio_monomial.c       |   4 +-
 monomial/ndpoly_monomial.c         |   4 +-
 monomial/poly_monomial.c           |   2 +-
 reweighting_factor.c               |   8 +--
 reweighting_factor_nd.c            |   4 +-
 solver/dfl_projector.c             |   6 +-
 solver/generate_dfl_subspace.c     |   2 +-
 solver/mode_number.c               |   2 +-
 start.c                            | 105 +++++++++--------------------
 start.h                            |   8 +--
 21 files changed, 69 insertions(+), 110 deletions(-)

diff --git a/P_M_eta.c b/P_M_eta.c
index f980837d7..6e44bbbdc 100644
--- a/P_M_eta.c
+++ b/P_M_eta.c
@@ -391,7 +391,7 @@ void Check_Approximation(double const mstar, const int repro) {
   Sin   =calloc(VOLUMEPLUSRAND, sizeof(spinor));
 #endif
 
-  random_spinor_field_lexic(Sin, repro);
+  random_spinor_field_lexic(Sin, repro, RN_GAUSS);
 
   s_ = calloc(4*VOLUMEPLUSRAND+1, sizeof(spinor));
   s  = calloc(4, sizeof(spinor*));
diff --git a/Ptilde_nd.c b/Ptilde_nd.c
index 3a003063f..25a87bf9a 100644
--- a/Ptilde_nd.c
+++ b/Ptilde_nd.c
@@ -307,8 +307,8 @@ void degree_of_Ptilde(int * _degree, double ** coefs,
   if(g_debug_level > 0) {
     /* Ptilde P S P  Ptilde X - X */
     /* for random spinor X        */
-    random_spinor_field_eo(ss, repro);
-    random_spinor_field_eo(sc, repro);
+    random_spinor_field_eo(ss, repro, RN_GAUSS);
+    random_spinor_field_eo(sc, repro, RN_GAUSS);
 
     Ptilde_ndpsi(&auxs[0], &auxc[0], *coefs, degree, &ss[0], &sc[0], Qsq);
     Ptilde_ndpsi(&aux2s[0], &aux2c[0], phmc_dop_cheby_coef, phmc_dop_n_cheby, &auxs[0], &auxc[0], Qsq);
diff --git a/benchmark.c b/benchmark.c
index 041019cbe..b02ae3605 100644
--- a/benchmark.c
+++ b/benchmark.c
@@ -268,7 +268,7 @@ int main(int argc,char *argv[])
     j_max=2048;
     sdt=0.;
     for (k = 0; k < k_max; k++) {
-      random_spinor_field_eo(g_spinor_field[k], reproduce_randomnumber_flag);
+      random_spinor_field_eo(g_spinor_field[k], reproduce_randomnumber_flag, RN_GAUSS);
     }
     
     while(sdt < 30.) {
@@ -366,7 +366,7 @@ int main(int argc,char *argv[])
     j_max=1;
     sdt=0.;
     for (k=0;k<k_max;k++) {
-      random_spinor_field_lexic(g_spinor_field[k], reproduce_randomnumber_flag);
+      random_spinor_field_lexic(g_spinor_field[k], reproduce_randomnumber_flag, RN_GAUSS);
     }
     
     while(sdt < 3.) {
diff --git a/chebyshev_polynomial.c b/chebyshev_polynomial.c
index 05893bbf1..bbf6e4218 100644
--- a/chebyshev_polynomial.c
+++ b/chebyshev_polynomial.c
@@ -292,8 +292,8 @@ void degree_of_polynomial(const int repro){
    chebyshev_polynomial(cheb_evmin, cheb_evmax, dop_cheby_coef, N_CHEBYMAX, 0.25);
 
    temp=1.0;
-   random_spinor_field_eo(ss, repro);
-   random_spinor_field_eo(sc, repro);
+   random_spinor_field_eo(ss, repro, RN_GAUSS);
+   random_spinor_field_eo(sc, repro, RN_GAUSS);
 /*   assign(&sc[0], &ss[0],VOLUME/2);
 
   Qtm_pm_psi(&auxs[0], &ss[0]);
diff --git a/chebyshev_polynomial_nd.c b/chebyshev_polynomial_nd.c
index 16eccb209..e322dd690 100644
--- a/chebyshev_polynomial_nd.c
+++ b/chebyshev_polynomial_nd.c
@@ -130,8 +130,8 @@ void degree_of_polynomial_nd(int * _degree_of_p, double ** coefs,
   
   chebyshev_coefs(EVMin, EVMax, *coefs, degree_of_p, -0.5);
 
-  random_spinor_field_eo(ss, repro);
-  random_spinor_field_eo(sc, repro);
+  random_spinor_field_eo(ss, repro, RN_GAUSS);
+  random_spinor_field_eo(sc, repro, RN_GAUSS);
 
   if((g_proc_id == g_stdio_proc) && (g_debug_level > 0)){
     printf("# NDPOLY MD Polynomial: EVmin = %e  EVmax = %e  \n", EVMin, EVMax);
diff --git a/hopping_test.c b/hopping_test.c
index c86694dc2..6d519e396 100644
--- a/hopping_test.c
+++ b/hopping_test.c
@@ -270,7 +270,7 @@ int main(int argc,char *argv[])
     /*initialize the pseudo-fermion fields*/
     j_max=1;
     for (k = 0; k < k_max; k++) {
-      random_spinor_field_eo(g_spinor_field[k], reproduce_randomnumber_flag);
+      random_spinor_field_eo(g_spinor_field[k], reproduce_randomnumber_flag, RN_GAUSS);
     }
 
     if (read_source_flag == 2) { /* save */
diff --git a/invert.c b/invert.c
index 6c7f718fd..48b246ce4 100644
--- a/invert.c
+++ b/invert.c
@@ -403,7 +403,7 @@ int main(int argc, char *argv[])
         s[i] = s_+i*VOLUMEPLUSRAND;
 #endif
 	
-        z2_random_spinor_field_lexic(s[i], reproduce_randomnumber_flag);
+        random_spinor_field_lexic(s[i], reproduce_randomnumber_flag,RN_Z2);
 	
 /* 	what is this here needed for?? */
 /*         spinor *aux_,*aux; */
diff --git a/monomial/cloverdet_monomial.c b/monomial/cloverdet_monomial.c
index 5281aa19b..d320cf0d8 100644
--- a/monomial/cloverdet_monomial.c
+++ b/monomial/cloverdet_monomial.c
@@ -149,7 +149,7 @@ void cloverdet_heatbath(const int id, hamiltonian_field_t * const hf) {
   sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
   sw_invert(EE, mnl->mu);
 
-  random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro);
+  random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro, RN_GAUSS);
   mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
   
   mnl->Qp(mnl->pf, mnl->w_fields[0]);
diff --git a/monomial/cloverdetratio_monomial.c b/monomial/cloverdetratio_monomial.c
index 95904c9fa..66785252b 100644
--- a/monomial/cloverdetratio_monomial.c
+++ b/monomial/cloverdetratio_monomial.c
@@ -240,7 +240,7 @@ void cloverdetratio_heatbath(const int id, hamiltonian_field_t * const hf) {
   sw_term( (const su3**) hf->gaugefield, mnl->kappa, mnl->c_sw); 
   sw_invert(EE, mnl->mu);
 
-  random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro);
+  random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro, RN_GAUSS);
   mnl->energy0  = square_norm(mnl->w_fields[0], VOLUME/2, 1);
   
   g_mu3 = mnl->rho;
diff --git a/monomial/cloverndpoly_monomial.c b/monomial/cloverndpoly_monomial.c
index 0b7fecf45..01ebee93a 100644
--- a/monomial/cloverndpoly_monomial.c
+++ b/monomial/cloverndpoly_monomial.c
@@ -150,10 +150,10 @@ void cloverndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
 
   mnl->energy0 = 0.;
-  random_spinor_field_eo(g_chi_up_spinor_field[0], mnl->rngrepro);
+  random_spinor_field_eo(g_chi_up_spinor_field[0], mnl->rngrepro, RN_GAUSS);
   mnl->energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
 
-  random_spinor_field_eo(g_chi_dn_spinor_field[0], mnl->rngrepro);
+  random_spinor_field_eo(g_chi_dn_spinor_field[0], mnl->rngrepro, RN_GAUSS);
   mnl->energy0 += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
 
   Qsw_ndpsi(g_chi_up_spinor_field[1], g_chi_dn_spinor_field[1], 
diff --git a/monomial/det_monomial.c b/monomial/det_monomial.c
index e68738ea3..c9b41fd87 100644
--- a/monomial/det_monomial.c
+++ b/monomial/det_monomial.c
@@ -156,7 +156,7 @@ void det_heatbath(const int id, hamiltonian_field_t * const hf) {
   mnl->iter1 = 0;
 
   if(mnl->even_odd_flag) {
-    random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro);
+    random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro, RN_GAUSS);
     mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
     mnl->Qp(mnl->pf, mnl->w_fields[0]);
@@ -168,7 +168,7 @@ void det_heatbath(const int id, hamiltonian_field_t * const hf) {
     }
   }
   else {
-    random_spinor_field_lexic(mnl->w_fields[0], mnl->rngrepro);
+    random_spinor_field_lexic(mnl->w_fields[0], mnl->rngrepro,RN_GAUSS);
     mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME, 1);
 
     Q_plus_psi(mnl->pf, mnl->w_fields[0]);
diff --git a/monomial/detratio_monomial.c b/monomial/detratio_monomial.c
index f95095688..841949a41 100644
--- a/monomial/detratio_monomial.c
+++ b/monomial/detratio_monomial.c
@@ -198,7 +198,7 @@ void detratio_heatbath(const int id, hamiltonian_field_t * const hf) {
   mnl->iter0 = 0;
   mnl->iter1 = 0;
   if(mnl->even_odd_flag) {
-    random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro);
+    random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro, RN_GAUSS);
     mnl->energy0  = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
     mnl->Qp(mnl->w_fields[1], mnl->w_fields[0]);
@@ -212,7 +212,7 @@ void detratio_heatbath(const int id, hamiltonian_field_t * const hf) {
 			mnl->csg_N, &mnl->csg_n, VOLUME/2);
   }
   else {
-    random_spinor_field_lexic(mnl->w_fields[0], mnl->rngrepro);
+    random_spinor_field_lexic(mnl->w_fields[0], mnl->rngrepro,RN_GAUSS);
     mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME, 1);
 
     Q_plus_psi(mnl->w_fields[1], mnl->w_fields[0]);
diff --git a/monomial/ndpoly_monomial.c b/monomial/ndpoly_monomial.c
index b5d98f1cd..7c8491aa7 100644
--- a/monomial/ndpoly_monomial.c
+++ b/monomial/ndpoly_monomial.c
@@ -170,11 +170,11 @@ void ndpoly_heatbath(const int id, hamiltonian_field_t * const hf) {
   }
 
   mnl->energy0 = 0.;
-  random_spinor_field_eo(g_chi_up_spinor_field[0], mnl->rngrepro);
+  random_spinor_field_eo(g_chi_up_spinor_field[0], mnl->rngrepro, RN_GAUSS);
   mnl->energy0 = square_norm(g_chi_up_spinor_field[0], VOLUME/2, 1);
 
   if(g_epsbar!=0.0 || phmc_exact_poly == 0) {
-    random_spinor_field_eo(g_chi_dn_spinor_field[0], mnl->rngrepro);
+    random_spinor_field_eo(g_chi_dn_spinor_field[0], mnl->rngrepro, RN_GAUSS);
     mnl->energy0 += square_norm(g_chi_dn_spinor_field[0], VOLUME/2, 1);
   } 
   else {
diff --git a/monomial/poly_monomial.c b/monomial/poly_monomial.c
index a0b4a5c8a..9df908bb8 100644
--- a/monomial/poly_monomial.c
+++ b/monomial/poly_monomial.c
@@ -279,7 +279,7 @@ void poly_heatbath(const int id, hamiltonian_field_t * const hf){
   if(mnl->even_odd_flag) {
 
 
-    random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro);
+    random_spinor_field_eo(mnl->w_fields[0], mnl->rngrepro, RN_GAUSS);
     mnl->energy0 = square_norm(mnl->w_fields[0], VOLUME/2, 1);
 
     if(g_proc_id == 0 && g_debug_level > 3) {
diff --git a/reweighting_factor.c b/reweighting_factor.c
index c1c1b777b..cfa27d990 100644
--- a/reweighting_factor.c
+++ b/reweighting_factor.c
@@ -55,15 +55,15 @@ void reweighting_factor(const int N, const int nstore) {
       mnl = &monomial_list[j];
       if(mnl->type != GAUGE) {
 	if(mnl->even_odd_flag) {
-	  random_spinor_field_eo(mnl->pf, mnl->rngrepro);
+	  random_spinor_field_eo(mnl->pf, mnl->rngrepro, RN_GAUSS);
 	}
-	else random_spinor_field_lexic(mnl->pf, mnl->rngrepro);
+	else random_spinor_field_lexic(mnl->pf, mnl->rngrepro, RN_GAUSS);
 	mnl->energy0 = square_norm(mnl->pf, n, 1);
 	if(mnl->type == NDDETRATIO) {
 	  if(mnl->even_odd_flag) {
-	    random_spinor_field_eo(mnl->pf2, mnl->rngrepro);
+	    random_spinor_field_eo(mnl->pf2, mnl->rngrepro, RN_GAUSS);
 	  }
-	  else random_spinor_field_lexic(mnl->pf, mnl->rngrepro);
+	  else random_spinor_field_lexic(mnl->pf, mnl->rngrepro, RN_GAUSS);
 	  mnl->energy0 += square_norm(mnl->pf2, n, 1);
 	}
       }
diff --git a/reweighting_factor_nd.c b/reweighting_factor_nd.c
index d852efd9a..87f8a9274 100644
--- a/reweighting_factor_nd.c
+++ b/reweighting_factor_nd.c
@@ -48,8 +48,8 @@ double reweighting_factor_nd(const int N, const int repro)
 
   for(i = 0; i < N; ++i)
   {
-    random_spinor_field_eo(g_chi_up_spinor_field[2], repro);
-    random_spinor_field_eo(g_chi_dn_spinor_field[2], repro);
+    random_spinor_field_eo(g_chi_up_spinor_field[2], repro, RN_GAUSS);
+    random_spinor_field_eo(g_chi_dn_spinor_field[2], repro, RN_GAUSS);
     zero_spinor_field(g_chi_up_spinor_field[3], VOLUME/2);
     zero_spinor_field(g_chi_dn_spinor_field[3], VOLUME/2);
 
diff --git a/solver/dfl_projector.c b/solver/dfl_projector.c
index 0a3a66ca0..eb79f2467 100644
--- a/solver/dfl_projector.c
+++ b/solver/dfl_projector.c
@@ -498,7 +498,7 @@ int check_projectors(const int repro) {
   phi = malloc(nb_blocks*sizeof(spinor *));
   wphi = malloc(nb_blocks*sizeof(spinor *));
 
-  random_spinor_field_lexic(work_fields[0], repro);
+  random_spinor_field_lexic(work_fields[0], repro, RN_GAUSS);
   nrm = square_norm(work_fields[0], VOLUME, 1);
   if(g_cart_id == 0) {
     printf("\nNow we check the DFL projection routines!\n\n");
@@ -816,7 +816,7 @@ void check_little_D_inversion(const int repro) {
   const int nr_wf = 1;
 
   init_solver_field(&work_fields, VOLUMEPLUSRAND, nr_wf);
-  random_spinor_field_lexic(work_fields[0], repro);
+  random_spinor_field_lexic(work_fields[0], repro, RN_GAUSS);
   if(init_dfl_projector == 0) {
     alloc_dfl_projector();
   }
@@ -950,7 +950,7 @@ void check_local_D(const int repro)
     }
   }
   /* check Msap and Msap_eo on a radom vector */
-  random_spinor_field_lexic(work_fields[0], repro);
+  random_spinor_field_lexic(work_fields[0], repro, RN_GAUSS);
   zero_spinor_field(work_fields[1], VOLUME);
   Msap(work_fields[1], work_fields[0], 2);
   D_psi(work_fields[2], work_fields[1]);
diff --git a/solver/generate_dfl_subspace.c b/solver/generate_dfl_subspace.c
index 9ac37c58e..305d8f4a3 100644
--- a/solver/generate_dfl_subspace.c
+++ b/solver/generate_dfl_subspace.c
@@ -117,7 +117,7 @@ int generate_dfl_subspace(const int Ns, const int N, const int repro) {
   random_fields(Ns);
   if(g_debug_level > 4) {
     for(e = 0.; e < 1.; e=e+0.05) {
-      random_spinor_field_lexic(dfl_fields[0], repro);
+      random_spinor_field_lexic(dfl_fields[0], repro, RN_GAUSS);
       nrm = sqrt(square_norm(dfl_fields[0], N, 1));
       mul_r(dfl_fields[0], 1./nrm, dfl_fields[0], N);
       d = 1.1;
diff --git a/solver/mode_number.c b/solver/mode_number.c
index d5f2a49bf..a446731f4 100644
--- a/solver/mode_number.c
+++ b/solver/mode_number.c
@@ -305,7 +305,7 @@ void Check_Approximation(double const mstar, const int repro) {
   Sin   =calloc(VOLUMEPLUSRAND, sizeof(spinor));
 #endif
 
-  random_spinor_field_lexic(Sin, repro);
+  random_spinor_field_lexic(Sin, repro, RN_GAUSS);
 
   s_ = calloc(4*VOLUMEPLUSRAND+1, sizeof(spinor));
   s  = calloc(4, sizeof(spinor*));
diff --git a/start.c b/start.c
index cde0930e7..4894860f8 100644
--- a/start.c
+++ b/start.c
@@ -228,8 +228,21 @@ void unit_spinor_field(const int k)
 
 /* Function provides a spinor field of length VOLUME with
    Gaussian distribution */
-void random_spinor_field_lexic(spinor * const k, const int repro) {
+void random_spinor_field_lexic(spinor * const k, const int repro, const int rn_type) {
   int x, y, z, t, X, Y, Z, tt, id=0;
+
+  void (*random_vector)(double*,int) = NULL;
+
+  switch( rn_type ) {
+    case RN_Z2:
+      random_vector = z2_vector;
+      break;
+    case RN_GAUSS:
+    default:
+      random_vector = gauss_vector;
+      break;
+  }
+
 #ifdef MPI
   int rlxd_state[105];
   int rlxd_state_backup[105];
@@ -266,7 +279,7 @@ void random_spinor_field_lexic(spinor * const k, const int repro) {
 	    MPI_Cart_rank(g_cart_grid, coords, &id);
 #endif
 	    if(g_cart_id == id) {
-	      gauss_vector(v, 24);
+	      random_vector(v, 24);
 	      s = k + g_ipt[tt][X][Y][Z];
 	      memcpy(s, v, 24*sizeof(double));
 	    } else {
@@ -284,7 +297,7 @@ void random_spinor_field_lexic(spinor * const k, const int repro) {
   }
   else {
     for(x = 0; x < VOLUME; x++) {
-      gauss_vector(v, 24);
+      random_vector(v, 24);
       s = k + x;
       memcpy(s, v, 24*sizeof(double));
     }
@@ -292,8 +305,21 @@ void random_spinor_field_lexic(spinor * const k, const int repro) {
   return;
 }
 
-void random_spinor_field_eo(spinor * const k, const int repro) {
+void random_spinor_field_eo(spinor * const k, const int repro, const int rn_type ) {
   int x, X, y, Y, z, Z, t, t0, id = 0;
+
+  void (*random_vector)(double*,int) = NULL;
+
+  switch( rn_type ) {
+    case RN_Z2:
+      random_vector = z2_vector;
+      break;
+    case RN_GAUSS:
+    default:
+      random_vector = gauss_vector;
+      break;
+  }
+
 #ifdef MPI
   int rlxd_state[105];
   int rlxd_state_backup[105];
@@ -330,7 +356,7 @@ void random_spinor_field_eo(spinor * const k, const int repro) {
 	    MPI_Cart_rank(g_cart_grid, coords, &id);
 #endif
 	    if((t0+x+y+z)%2 == 0) {
-	      gauss_vector(v, 24);
+	      random_vector(v, 24);
 	      if(g_cart_id == id) {
 		s = k + g_lexic2eosub[ g_ipt[t][X][Y][Z] ];
 		memcpy(s, v, 24*sizeof(double));
@@ -349,74 +375,7 @@ void random_spinor_field_eo(spinor * const k, const int repro) {
   else {
     for (x = 0; x < VOLUME/2; x++) {
       s = k + x;
-      gauss_vector(v, 24);
-      memcpy(s, v, 24*sizeof(double));
-    }
-  }
-  return;
-}
-
-/* Function provides a spinor field of length VOLUME
-   filled with Z2 noise */
-
-void z2_random_spinor_field_lexic(spinor * const k, const int repro) {
-  int x, y, z, t, X, Y, Z, tt, id=0;
-#ifdef MPI
-  int rlxd_state[105];
-  int rlxd_state_backup[105];
-#endif
-  int coords[4];
-  spinor *s;
-  double v[24];
-
-  if(repro) {
-#ifdef MPI
-    if(g_proc_id != 0) {
-      rlxd_get(rlxd_state_backup);
-    } else if(g_proc_id == 0) {
-      rlxd_get(rlxd_state);
-    }
-    MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD);
-    if(g_proc_id != 0) {
-      rlxd_reset(rlxd_state);
-    }
-#endif
-    for(t = 0; t < g_nproc_t*T; t++) {
-      tt = t - g_proc_coords[0]*T;
-      coords[0] = t / T;
-      for(x = 0; x < g_nproc_x*LX; x++) {
-	X = x - g_proc_coords[1]*LX; 
-	coords[1] = x / LX;
-	for(y = 0; y < g_nproc_y*LY; y++) {
-	  Y = y - g_proc_coords[2]*LY;
-	  coords[2] = y / LY;
-	  for(z = 0; z < g_nproc_z*LZ; z++) {
-	    Z = z - g_proc_coords[3]*LZ;
-	    coords[3] = z / LZ;
-#ifdef MPI
-	    MPI_Cart_rank(g_cart_grid, coords, &id);
-#endif
-	    if(g_cart_id == id) {
-	      z2_vector(v, 24);
-	      s = k + g_ipt[tt][X][Y][Z];
-	      memcpy(s, v, 24*sizeof(double));
-	    } else {
-	      ranlxd(v,24);
-	    }
-	  }
-	}
-      }
-    }
-#ifdef MPI
-  if(g_proc_id != 0) {
-    rlxd_reset(rlxd_state_backup);
-  }
-#endif
-  }
-  else {
-    for(x = 0; x < VOLUME; x++) {
-      z2_vector(v, 24);
-      s = k + x;
+      random_vector(v, 24);
       memcpy(s, v, 24*sizeof(double));
     }
   }
diff --git a/start.h b/start.h
index b63cca093..7d9043ab7 100644
--- a/start.h
+++ b/start.h
@@ -20,14 +20,14 @@
 #ifndef _START_H
 #define _START_H
 
+enum RN_TYPE { RN_GAUSS, RN_Z2 };
+
 void unit_spinor_field(const int k);
 void zero_spinor_field(spinor * const k, const int N);
 void constant_spinor_field(spinor * const k, const int p, const int N);
 
-void random_spinor_field_lexic(spinor * const k, const int repro);
-void random_spinor_field_eo(spinor * const k, const int repro);
-
-void z2_random_spinor_field_lexic(spinor * const k, const int repro);
+void random_spinor_field_lexic(spinor * const k, const int repro, const int rn_type);
+void random_spinor_field_eo(spinor * const k, const int repro, const int rn_type);
 
 void unit_g_gauge_field(void);
 

From aabb256a164b4aeffc980affb47f4c665bfd940b Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Fri, 7 Dec 2012 19:03:55 +0100
Subject: [PATCH 107/110] adjust comments for random_spinor_field to reflect
 current functionality

---
 start.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/start.c b/start.c
index 4894860f8..3c79ed272 100644
--- a/start.c
+++ b/start.c
@@ -227,7 +227,7 @@ void unit_spinor_field(const int k)
 }
 
 /* Function provides a spinor field of length VOLUME with
-   Gaussian distribution */
+   distributions given by rn_type as defined in start.h */
 void random_spinor_field_lexic(spinor * const k, const int repro, const int rn_type) {
   int x, y, z, t, X, Y, Z, tt, id=0;
 
@@ -304,6 +304,8 @@ void random_spinor_field_lexic(spinor * const k, const int repro, const int rn_t
   }
   return;
 }
+/* Function provides a spinor field of length VOLUME/2 for even odd preconditioning 
+   with distributions given by rn_type as defined in start.h */
 
 void random_spinor_field_eo(spinor * const k, const int repro, const int rn_type ) {
   int x, X, y, Y, z, Z, t, t0, id = 0;

From 34af6f3a2905b15ff190705cfa2d15c5eb6b7d8a Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 9 Dec 2012 12:56:35 +0100
Subject: [PATCH 108/110] this fixes the problem with the online CORRELATOR
 measurement. Now one needs to specify an operator in the input file for which
 the inversions will be performed.

Currently, only the first operator in the file will be used and only
TMWILSON, WILSON and CLOVER are working. For other operators the
measurement will not be done and the hmc proceeds as if no CORRELATOR measurement
specified in the input file.

Also, removed MAX_ITER_ variables from global.h and all other appearences, as it
was not longer needed. Therefore, also the input parameters

BCGstabMaxIter =
CGMaxIter =

are no longer supported and removed from input.
---
 default_input_values.h                        |   2 -
 doc/input.tex                                 |   6 -
 global.h                                      |   3 -
 invert.c                                      |   2 +-
 io/utils_write_first_message.c                |   1 -
 monomial/det_monomial.c                       |   3 -
 online_measurement.c                          |  32 +++++-
 operator.c                                    | 105 +++++++++---------
 operator.h                                    |   2 +-
 read_input.h                                  |   2 -
 read_input.l                                  |  14 ---
 sample-input/sample-hmc-cloverdet.input       |  11 +-
 sample-input/sample-hmc-poly.input            |  11 +-
 sample-input/sample-hmc-tmcloverdet.input     |  12 +-
 .../sample-hmc-tmcloverdetratio.input         |  13 ++-
 sample-input/sample-hmc0.input                |  11 +-
 sample-input/sample-hmc1.input                |  11 +-
 sample-input/sample-hmc2.input                |  11 +-
 sample-input/sample-hmc3.input                |  11 +-
 sample-input/sample-hmc4.input                |  11 +-
 sample-input/sample-ndclover.input            |  11 +-
 sample-input/sample-sf-quenched0.input        |   2 -
 test/test_eigenvalues.c                       |  10 +-
 23 files changed, 182 insertions(+), 115 deletions(-)

diff --git a/default_input_values.h b/default_input_values.h
index b8eaddd48..c57d4fa38 100644
--- a/default_input_values.h
+++ b/default_input_values.h
@@ -71,8 +71,6 @@
 #define _default_g_stdio_proc 0
 #define _default_index_start 0
 #define _default_index_end 12
-#define _default_ITER_MAX_BCG 5000
-#define _default_ITER_MAX_CG 5000
 #define _default_X0 0.
 #define _default_X1 0.
 #define _default_X2 0.
diff --git a/doc/input.tex b/doc/input.tex
index db891e19d..4574ccd83 100644
--- a/doc/input.tex
+++ b/doc/input.tex
@@ -315,12 +315,6 @@ \subsection{Input parameter for main program}
   does not exist (its written in the course of the HMC) then the input
   parameter described here are used instead.
 
-\item {\ttfamily BCGstabMaxIter}:\\
-  The maximal number of iterations in the BiCGstab solver.
-
-\item {\ttfamily CGMaxIter}:\\
-  The maximal number of iterations in the CG solver.
-
 \item {\ttfamily ReversibilityCheck}:\\
   If set to {\ttfamily yes} the program will perform a check of
   reversibility violation in the integrator by integrating back in
diff --git a/global.h b/global.h
index 5455a0bcc..01b95faed 100644
--- a/global.h
+++ b/global.h
@@ -242,9 +242,6 @@ EXTERN double DeltaTtot, DeltaTcd, DeltaTev;
 EXTERN int counter_Spsi;
 /* end of the something ... */
 
-EXTERN int ITER_MAX_BCG;
-EXTERN int ITER_MAX_CG;
-
 EXTERN void* g_precWS;
 
 #ifdef WITHLAPH
diff --git a/invert.c b/invert.c
index 7e137dc13..5334f00a6 100644
--- a/invert.c
+++ b/invert.c
@@ -507,7 +507,7 @@ int main(int argc, char *argv[])
           /* 0-3 in case of 1 flavour  */
           /* 0-7 in case of 2 flavours */
           prepare_source(nstore, isample, ix, op_id, read_source_flag, source_location);
-          operator_list[op_id].inverter(op_id, index_start);
+          operator_list[op_id].inverter(op_id, index_start, 1);
         }
       }
 
diff --git a/io/utils_write_first_message.c b/io/utils_write_first_message.c
index db97b15a1..48a5ef564 100644
--- a/io/utils_write_first_message.c
+++ b/io/utils_write_first_message.c
@@ -144,7 +144,6 @@ int write_first_messages(FILE * parameterfile, const char const *executable, con
   fprintf(parameterfile, "# g_beta = %f , g_kappa= %f, g_kappa*csw/8= %f \n",g_beta,g_kappa,g_ka_csw_8);
   fprintf(parameterfile, "# boundary conditions for fermion fields (t,x,y,z) * pi: %f %f %f %f \n",X0,X1,X2,X3);
   if( strcmp(executable,"hmc") == 0 ) {
-    fprintf(parameterfile, "# ITER_MAX_BCG=%d\n", ITER_MAX_BCG);
     fprintf(parameterfile, "# Nmeas=%d, Nsave=%d \n",
 	    Nmeas,Nsave);
     fprintf(parameterfile, "# mu = %f\n", g_mu/2./g_kappa);
diff --git a/monomial/det_monomial.c b/monomial/det_monomial.c
index c9b41fd87..393cefd2a 100644
--- a/monomial/det_monomial.c
+++ b/monomial/det_monomial.c
@@ -196,9 +196,6 @@ double det_acc(const int id, hamiltonian_field_t * const hf) {
   boundary(mnl->kappa);
   if(mnl->even_odd_flag) {
 
-    if(mnl->solver == CG) {
-      ITER_MAX_BCG = 0;
-    }
     chrono_guess(mnl->w_fields[0], mnl->pf, mnl->csg_field, mnl->csg_index_array,
     	 mnl->csg_N, mnl->csg_n, VOLUME/2, mnl->Qsq);
     g_sloppy_precision_flag = 0;
diff --git a/online_measurement.c b/online_measurement.c
index fcffa4c79..59e031784 100644
--- a/online_measurement.c
+++ b/online_measurement.c
@@ -30,6 +30,7 @@
 #include "ranlxs.h"
 #include "su3spinor.h"
 #include "source_generation.h"
+#include "operator.h"
 #include "invert_eo.h"
 #include "solver/solver.h"
 #include "geometry_eo.h"
@@ -58,6 +59,7 @@ void online_measurement(const int traj, const int id, const int ieo) {
   double res = 0., respa = 0., resp4 = 0.;
   double atime, etime;
   float tmp;
+  operator * optr;
 #ifdef MPI
   double mpi_res = 0., mpi_respa = 0., mpi_resp4 = 0.;
   // send buffer for MPI_Gather
@@ -70,6 +72,27 @@ void online_measurement(const int traj, const int id, const int ieo) {
   filename=buf;
   sprintf(filename,"%s%.6d", "onlinemeas." ,traj);
 
+  init_operators();
+  if(no_operators < 1 && g_proc_id == 0) {
+    if(g_proc_id == 0) {
+      fprintf(stderr, "Warning! no operators defined in input file, cannot perform online correlator mesurements!\n");
+    }
+    return;
+  }
+  if(no_operators > 1 && g_proc_id == 0) {
+    fprintf(stderr, "Warning! number of operators defined larger than 1, using only the first!\n");
+  }
+  optr = &operator_list[0];
+  // we don't want to do inversion twice for this purpose here
+  optr->DownProp = 0;
+  if(optr->type != TMWILSON && optr->type != WILSON && optr->type != CLOVER) {
+    if(g_proc_id == 0) {
+      fprintf(stderr, "Warning! correlator online measurement currently only implemented for TMWILSON, WILSON and CLOVER\n");
+      fprintf(stderr, "Cannot perform online measurement!\n");
+    }
+    return;
+  }
+
   /* generate random timeslice */
   if(ranlxs_init == 0) {
     rlxs_init(1, 123456);
@@ -101,10 +124,13 @@ void online_measurement(const int traj, const int id, const int ieo) {
 #endif
   source_generation_pion_only(g_spinor_field[0], g_spinor_field[1], 
 			      t0, 0, traj);
+  optr->sr0 = g_spinor_field[0];
+  optr->sr1 = g_spinor_field[1];
+  optr->prop0 = g_spinor_field[2];
+  optr->prop1 = g_spinor_field[3];
 
-  invert_eo(g_spinor_field[2], g_spinor_field[3], 
-	    g_spinor_field[0], g_spinor_field[1],
-	    1.e-14, measurement_list[id].max_iter, CG, 1, 0, ieo, 0, NULL, -1);
+  // op_id = 0, index_start = 0, write_prop = 0
+  optr->inverter(0, 0, 0);
 
   /* now we bring it to normal format */
   /* here we use implicitly DUM_MATRIX and DUM_MATRIX+1 */
diff --git a/operator.c b/operator.c
index 7279696ec..c3de519f5 100644
--- a/operator.c
+++ b/operator.c
@@ -62,7 +62,7 @@
 
 void dummy_D(spinor * const, spinor * const);
 void dummy_DbD(spinor * const s, spinor * const r, spinor * const p, spinor * const q);
-void op_invert(const int op_id, const int index_start);
+void op_invert(const int op_id, const int index_start, const int write_prop);
 void op_write_prop(const int op_id, const int index_start, const int append_);
 
 operator operator_list[max_no_operators];
@@ -143,58 +143,61 @@ int add_operator(const int type) {
 }
 
 int init_operators() {
-  int i;
+  static int oinit = 0;
   operator * optr;
-  for(i = 0; i < no_operators; i++) {
-    optr = operator_list + i;
-    /* This is a hack, it should be set on an operator basis. */
-    optr->rel_prec = g_relative_precision_flag;
-    if(optr->type == TMWILSON || optr->type == WILSON) {
-      if(optr->c_sw > 0) {
-	init_sw_fields();
+  if(!oinit) {
+    oinit = 1;
+    for(int i = 0; i < no_operators; i++) {
+      optr = operator_list + i;
+      /* This is a hack, it should be set on an operator basis. */
+      optr->rel_prec = g_relative_precision_flag;
+      if(optr->type == TMWILSON || optr->type == WILSON) {
+	if(optr->c_sw > 0) {
+	  init_sw_fields();
+	}
+	if(optr->even_odd_flag) {
+	  optr->applyQp = &Qtm_plus_psi;
+	  optr->applyQm = &Qtm_minus_psi;
+	  optr->applyQsq = &Qtm_pm_psi;
+	  optr->applyMp = &Mtm_plus_psi;
+	  optr->applyMm = &Mtm_minus_psi;
+	}
+	else {
+	  optr->applyQp = &Q_plus_psi;
+	  optr->applyQm = &Q_minus_psi;
+	  optr->applyQsq = &Q_pm_psi;
+	  optr->applyMp = &D_psi;
+	  optr->applyMm = &D_psi;
+	}
+	if(optr->solver == 12) {
+	  if (g_cart_id == 0 && optr->even_odd_flag == 1)
+	    fprintf(stderr, "CG Multiple mass solver works only without even/odd! Forcing!\n");
+	  optr->even_odd_flag = 0;
+	  if (g_cart_id == 0 && optr->DownProp)
+	    fprintf(stderr, "CGMMS doesn't need AddDownPropagator! Switching Off!\n");
+	  optr->DownProp = 0;
+	}
       }
-      if(optr->even_odd_flag) {
-        optr->applyQp = &Qtm_plus_psi;
-        optr->applyQm = &Qtm_minus_psi;
-        optr->applyQsq = &Qtm_pm_psi;
-        optr->applyMp = &Mtm_plus_psi;
-        optr->applyMm = &Mtm_minus_psi;
+      else if(optr->type == OVERLAP) {
+	optr->even_odd_flag = 0;
+	optr->applyM = &Dov_psi;
+	optr->applyQ = &Qov_psi;
       }
-      else {
-        optr->applyQp = &Q_plus_psi;
-        optr->applyQm = &Q_minus_psi;
-        optr->applyQsq = &Q_pm_psi;
-        optr->applyMp = &D_psi;
-        optr->applyMm = &D_psi;
+      else if(optr->type == DBTMWILSON) {
+	optr->even_odd_flag = 1;
+	optr->applyDbQsq = &Qtm_pm_ndpsi;
+	/* TODO: this should be here!       */
+	/* Chi`s-spinors  memory allocation */
+	/*       if(init_chi_spinor_field(VOLUMEPLUSRAND/2, 20) != 0) { */
+	/* 	fprintf(stderr, "Not enough memory for 20 NDPHMC Chi fields! Aborting...\n"); */
+	/* 	exit(0); */
+	/*       } */
       }
-      if(optr->solver == 12) {
-        if (g_cart_id == 0 && optr->even_odd_flag == 1)
-          fprintf(stderr, "CG Multiple mass solver works only without even/odd! Forcing!\n");
-        optr->even_odd_flag = 0;
-        if (g_cart_id == 0 && optr->DownProp)
-          fprintf(stderr, "CGMMS doesn't need AddDownPropagator! Switching Off!\n");
-        optr->DownProp = 0;
+      else if(optr->type == DBCLOVER) {
+	optr->even_odd_flag = 1;
+	optr->applyDbQsq = &Qtm_pm_ndpsi;
       }
     }
-    else if(optr->type == OVERLAP) {
-      optr->even_odd_flag = 0;
-      optr->applyM = &Dov_psi;
-      optr->applyQ = &Qov_psi;
-    }
-    else if(optr->type == DBTMWILSON) {
-      optr->even_odd_flag = 1;
-      optr->applyDbQsq = &Qtm_pm_ndpsi;
-      /* TODO: this should be here!       */
-      /* Chi`s-spinors  memory allocation */
-      /*       if(init_chi_spinor_field(VOLUMEPLUSRAND/2, 20) != 0) { */
-      /* 	fprintf(stderr, "Not enough memory for 20 NDPHMC Chi fields! Aborting...\n"); */
-      /* 	exit(0); */
-      /*       } */
-    }
-    else if(optr->type == DBCLOVER) {
-      optr->even_odd_flag = 1;
-      optr->applyDbQsq = &Qtm_pm_ndpsi;
-    }
   }
   return(0);
 }
@@ -213,7 +216,7 @@ void dummy_DbD(spinor * const s, spinor * const r, spinor * const p, spinor * co
   return;
 }
 
-void op_invert(const int op_id, const int index_start) {
+void op_invert(const int op_id, const int index_start, const int write_prop) {
   operator * optr = &operator_list[op_id];
   double atime = 0., etime = 0., nrm1 = 0., nrm2 = 0.;
   int i;
@@ -279,7 +282,7 @@ void op_invert(const int op_id, const int index_start) {
         mul_r(optr->prop0, (2*optr->kappa), optr->prop0, VOLUME / 2);
         mul_r(optr->prop1, (2*optr->kappa), optr->prop1, VOLUME / 2);
       }
-      if (optr->solver != CGMMS) /* CGMMS handles its own I/O */
+      if (optr->solver != CGMMS && write_prop) /* CGMMS handles its own I/O */
         optr->write_prop(op_id, index_start, i);
       if(optr->DownProp) {
         optr->mu = -optr->mu;
@@ -361,7 +364,7 @@ void op_invert(const int op_id, const int index_start) {
       mul_one_pm_itau2(optr->prop1, optr->prop3, g_spinor_field[DUM_DERI+1], 
                        g_spinor_field[DUM_DERI+3], -1., VOLUME/2);
       /* write propagator */
-      optr->write_prop(op_id, index_start, i);
+      if(write_prop) optr->write_prop(op_id, index_start, i);
 
       mul_r(optr->prop0, 1./(2*optr->kappa), g_spinor_field[DUM_DERI], VOLUME/2);
       mul_r(optr->prop1, 1./(2*optr->kappa), g_spinor_field[DUM_DERI+1], VOLUME/2);
@@ -403,7 +406,7 @@ void op_invert(const int op_id, const int index_start) {
 
     invert_overlap(op_id, index_start); 
 
-    optr->write_prop(op_id, index_start, 0);
+    if(write_prop) optr->write_prop(op_id, index_start, 0);
   }
   etime = gettime();
   if (g_cart_id == 0 && g_debug_level > 0) {
diff --git a/operator.h b/operator.h
index 486ec920d..2984ac23d 100644
--- a/operator.h
+++ b/operator.h
@@ -99,7 +99,7 @@ typedef struct {
   void (*applyMm) (spinor * const, spinor * const);
   void (*applyDbQsq) (spinor * const, spinor * const, spinor * const, spinor * const);
   /* the generic invert function */
-  void (*inverter) (const int op_id, const int index_start);
+  void (*inverter) (const int op_id, const int index_start, const int write_prop);
   /* write the propagator */
   void (*write_prop) (const int op_id, const int index_start, const int append_);
   char * conf_input;
diff --git a/read_input.h b/read_input.h
index 13fed801c..bd456106f 100644
--- a/read_input.h
+++ b/read_input.h
@@ -62,8 +62,6 @@ extern "C"
   extern int index_end;
   extern int random_seed;
   extern int rlxd_level;
-  extern int ITER_MAX_BCG;
-  extern int ITER_MAX_CG;
   extern double X0, X1, X2, X3;
   extern int read_source_flag;
   extern int return_check_flag;
diff --git a/read_input.l b/read_input.l
index 72c352c89..f171f92cb 100644
--- a/read_input.l
+++ b/read_input.l
@@ -210,8 +210,6 @@ inline void rmQuotes(char *str){
 %x NPROCZ
 %x IOPROC
 %x IDX
-%x CGMAX
-%x BCGMAX
 %x BOUNDT
 %x BOUNDX
 %x BOUNDY
@@ -336,8 +334,6 @@ inline void rmQuotes(char *str){
 ^InitialStoreCounter{EQL}          BEGIN(NSTORE);
 ^StdIOProcessor{EQL}               BEGIN(IOPROC);
 ^Indices{EQL}                      BEGIN(IDX);
-^BCGstabMaxIter{EQL}               BEGIN(BCGMAX);
-^CGMaxIter{EQL}                    BEGIN(CGMAX);
 ^BCAngleT{EQL}                     BEGIN(BOUNDT);
 ^ThetaT{EQL}                       BEGIN(BOUNDT);
 ^ThetaX{EQL}                       BEGIN(BOUNDX);
@@ -1584,14 +1580,6 @@ inline void rmQuotes(char *str){
   if(myverbose!=0) printf("inverting up to spin-color index %d\n", index_end);
   index_end+=1;
 }
-<BCGMAX>{DIGIT}+ {
-  ITER_MAX_BCG = atoi(yytext);
-  if(myverbose != 0) printf("Maximal number of iterations for BCGstab set ro %d\n", ITER_MAX_BCG);
-}
-<CGMAX>{DIGIT}+  {
-  ITER_MAX_CG = atoi(yytext);
-  if(myverbose != 0) printf("Maximal number of iterations for CG set ro %d\n", ITER_MAX_CG);
-}
 <BOUNDT>{FLT} {
   X0 = atof(yytext);
   if(myverbose != 0) printf("X0 for boundary cond. in time set to %e\n", X0);
@@ -1949,8 +1937,6 @@ int read_input(char * conf_file){
   g_stdio_proc = _default_g_stdio_proc;
   index_start = _default_index_start;
   index_end = _default_index_end;
-  ITER_MAX_CG = _default_ITER_MAX_CG;
-  ITER_MAX_BCG = _default_ITER_MAX_BCG;
   X0 = _default_X0;
   X1 = _default_X1;
   X2 = _default_X2;
diff --git a/sample-input/sample-hmc-cloverdet.input b/sample-input/sample-hmc-cloverdet.input
index 9f2e056ce..a331e7d1c 100644
--- a/sample-input/sample-hmc-cloverdet.input
+++ b/sample-input/sample-hmc-cloverdet.input
@@ -14,8 +14,6 @@ CSW = 1.76
 kappa = 0.138
 NSave = 500000
 ThetaT = 1.0
-BCGstabMaxIter = 0
-CGMaxIter = 1000
 UseEvenOdd = yes
 ReversibilityCheck = yes
 ReversibilityCheckIntervall = 100
@@ -52,3 +50,12 @@ BeginIntegrator
   Lambda1 = 0.20
   NumberOfTimescales = 2
 EndIntegrator
+
+BeginOperator CLOVER
+  CSW = 1.76
+  kappa = 0.208333
+  UseEvenOdd = yes
+  Solver = CG
+  SolverPrecision = 1e-14
+  MaxSolverIterations = 1000
+EndOperator
diff --git a/sample-input/sample-hmc-poly.input b/sample-input/sample-hmc-poly.input
index 53434d883..0dae0937f 100644
--- a/sample-input/sample-hmc-poly.input
+++ b/sample-input/sample-hmc-poly.input
@@ -15,8 +15,6 @@ Measurements = 10000
 kappa = 0.177
 NSave = 10000
 ThetaT = 1
-BCGstabMaxIter = 0
-CGMaxIter = 1000
 UseEvenOdd = yes
 ReversibilityCheck = yes
 ReversibilityCheckIntervall = 100
@@ -58,3 +56,12 @@ BeginIntegrator
   Lambda1 = 0.20
   NumberOfTimescales = 2
 EndIntegrator
+
+BeginOperator TMWILSON
+  2KappaMu = 0.177
+  kappa = 0.177
+  UseEvenOdd = yes
+  Solver = CG
+  SolverPrecision = 1e-14
+  MaxSolverIterations = 1000
+EndOperator
diff --git a/sample-input/sample-hmc-tmcloverdet.input b/sample-input/sample-hmc-tmcloverdet.input
index eb22997bf..14bd4202b 100644
--- a/sample-input/sample-hmc-tmcloverdet.input
+++ b/sample-input/sample-hmc-tmcloverdet.input
@@ -14,8 +14,6 @@ CSW = 1.76
 kappa = 0.138
 NSave = 500000
 ThetaT = 1.0
-BCGstabMaxIter = 0
-CGMaxIter = 1000
 UseEvenOdd = yes
 ReversibilityCheck = yes
 ReversibilityCheckIntervall = 100
@@ -54,3 +52,13 @@ BeginIntegrator
   Lambda1 = 0.20
   NumberOfTimescales = 2
 EndIntegrator
+
+BeginOperator TMCLOVER
+  2KappaMu = 0.01
+  CSW = 1.00
+  kappa = 0.138
+  UseEvenOdd = yes
+  Solver = CG
+  SolverPrecision = 1e-14
+  MaxSolverIterations = 1000
+EndOperator
diff --git a/sample-input/sample-hmc-tmcloverdetratio.input b/sample-input/sample-hmc-tmcloverdetratio.input
index 095848352..8b6543f4d 100644
--- a/sample-input/sample-hmc-tmcloverdetratio.input
+++ b/sample-input/sample-hmc-tmcloverdetratio.input
@@ -14,8 +14,6 @@ CSW = 1.00
 kappa = 0.138
 NSave = 500000
 ThetaT = 1.0
-BCGstabMaxIter = 0
-CGMaxIter = 1000
 UseEvenOdd = yes
 ReversibilityCheck = yes
 ReversibilityCheckIntervall = 100
@@ -60,7 +58,6 @@ BeginMonomial CLOVERDETRATIO
   solver = CG
 EndMonomial
 
-
 BeginIntegrator 
   Type0 = 2MN
   Type1 = 2MN
@@ -71,3 +68,13 @@ BeginIntegrator
   Lambda1 = 0.20
   NumberOfTimescales = 2
 EndIntegrator
+
+BeginOperator TMCLOVER
+  2KappaMu = 0.01
+  CSW = 1.00
+  kappa = 0.138
+  UseEvenOdd = yes
+  Solver = CG
+  SolverPrecision = 1e-14
+  MaxSolverIterations = 1000
+EndOperator
diff --git a/sample-input/sample-hmc0.input b/sample-input/sample-hmc0.input
index 319bae2ea..88d9ee14a 100644
--- a/sample-input/sample-hmc0.input
+++ b/sample-input/sample-hmc0.input
@@ -15,8 +15,6 @@ StartCondition = hot
 kappa = 0.177
 NSave = 500000
 ThetaT = 1
-BCGstabMaxIter = 0
-CGMaxIter = 1000
 UseEvenOdd = yes
 ReversibilityCheck = yes
 ReversibilityCheckIntervall = 100
@@ -53,3 +51,12 @@ BeginIntegrator
   Lambda1 = 0.20
   NumberOfTimescales = 2
 EndIntegrator
+
+BeginOperator TMWILSON
+  2kappaMu = 0.177
+  kappa = 0.177
+  UseEvenOdd = yes
+  Solver = CG
+  SolverPrecision = 1e-14
+  MaxSolverIterations = 1000
+EndOperator
diff --git a/sample-input/sample-hmc1.input b/sample-input/sample-hmc1.input
index f20569ba0..156115fc8 100644
--- a/sample-input/sample-hmc1.input
+++ b/sample-input/sample-hmc1.input
@@ -17,8 +17,6 @@ StartCondition = hot
 kappa = 0.177
 NSave = 500000
 ThetaT = 1
-BCGstabMaxIter = 0
-CGMaxIter = 1000
 UseEvenOdd = yes
 ReversibilityCheck = yes
 ReversibilityCheckIntervall = 100
@@ -67,3 +65,12 @@ BeginIntegrator
   Lambda0 = 0.19
   NumberOfTimescales = 3
 EndIntegrator
+
+BeginOperator TMWILSON
+  2kappaMu = 0.177
+  kappa = 0.177
+  UseEvenOdd = yes
+  Solver = CG
+  SolverPrecision = 1e-14
+  MaxSolverIterations = 1000
+EndOperator
diff --git a/sample-input/sample-hmc2.input b/sample-input/sample-hmc2.input
index ae97596e7..ad999a8bb 100644
--- a/sample-input/sample-hmc2.input
+++ b/sample-input/sample-hmc2.input
@@ -24,8 +24,6 @@ StartCondition = hot
 kappa = 0.170
 NSave = 500000
 ThetaT = 1
-BCGstabMaxIter = 0
-CGMaxIter = 1000
 GaugeConfigInputFile = conf.save
 UseEvenOdd = yes
 ReversibilityCheck = yes
@@ -75,3 +73,12 @@ BeginIntegrator
   Lambda0 = 0.19
   NumberOfTimescales = 2
 EndIntegrator
+
+BeginOperator TMWILSON
+  2kappaMu = 0.01
+  kappa = 0.177
+  UseEvenOdd = yes
+  Solver = CG
+  SolverPrecision = 1e-14
+  MaxSolverIterations = 1000
+EndOperator
diff --git a/sample-input/sample-hmc3.input b/sample-input/sample-hmc3.input
index cf35109ea..b16211951 100644
--- a/sample-input/sample-hmc3.input
+++ b/sample-input/sample-hmc3.input
@@ -15,8 +15,6 @@ StartCondition = hot
 kappa = 0.163260
 NSave = 100
 ThetaT = 1
-BCGstabMaxIter = 1000
-CGMaxIter = 10000
 GaugeConfigInputFile = conf.save
 UseEvenOdd = yes
 ReversibilityCheck = yes
@@ -91,3 +89,12 @@ BeginIntegrator
   Lambda2 = 0.2
   NumberOfTimescales = 3
 EndIntegrator
+
+BeginOperator TMWILSON
+  kappa = 0.163260
+  2KappaMu = 0.002740961
+  UseEvenOdd = yes
+  Solver = CG
+  SolverPrecision = 1e-14
+  MaxSolverIterations = 1000
+EndOperator
diff --git a/sample-input/sample-hmc4.input b/sample-input/sample-hmc4.input
index 0d4e4e182..2d4ecff11 100644
--- a/sample-input/sample-hmc4.input
+++ b/sample-input/sample-hmc4.input
@@ -17,8 +17,6 @@ ThermalisationSweeps=100
 kappa = 0.208333
 NSave = 10000
 ThetaT = 1
-BCGstabMaxIter = 0
-CGMaxIter = 1000
 UseEvenOdd = yes
 ReversibilityCheck = no
 ReversibilityCheckIntervall = 1
@@ -57,3 +55,12 @@ BeginIntegrator
   Lambda1 = 0.21
   NumberOfTimescales = 2
 EndIntegrator
+
+BeginOperator TMWILSON
+  2KappaMu = 0.083333
+  kappa = 0.208333
+  UseEvenOdd = yes
+  Solver = CG
+  SolverPrecision = 1e-14
+  MaxSolverIterations = 1000
+EndOperator
diff --git a/sample-input/sample-ndclover.input b/sample-input/sample-ndclover.input
index 526c769db..04c77925c 100644
--- a/sample-input/sample-ndclover.input
+++ b/sample-input/sample-ndclover.input
@@ -13,8 +13,6 @@ StartCondition = hot
 kappa = 0.170
 NSave = 500000
 ThetaT = 1
-BCGstabMaxIter = 0
-CGMaxIter = 1000
 GaugeConfigInputFile = conf.save
 UseEvenOdd = yes
 ReversibilityCheck = yes
@@ -66,3 +64,12 @@ BeginIntegrator
   Lambda0 = 0.19
   NumberOfTimescales = 2
 EndIntegrator
+
+BeginOperator TMWILSON
+  2KappaMu = 0.01
+  kappa = 0.170
+  UseEvenOdd = yes
+  Solver = CG
+  SolverPrecision = 1e-14
+  MaxSolverIterations = 1000
+EndOperator
diff --git a/sample-input/sample-sf-quenched0.input b/sample-input/sample-sf-quenched0.input
index fb014aa7b..53bde326d 100644
--- a/sample-input/sample-sf-quenched0.input
+++ b/sample-input/sample-sf-quenched0.input
@@ -24,8 +24,6 @@ DebugLevel = 1
 # hmc_tm parameters: #
 ######################
 StartCondition = hot
-BCGstabMaxIter = 0
-CGMaxIter = 1000
 ReversibilityCheck = no
 ReversibilityCheckIntervall = 100
 PerformOnlineMeasurements = no
diff --git a/test/test_eigenvalues.c b/test/test_eigenvalues.c
index 02f00b96a..b4a407d90 100644
--- a/test/test_eigenvalues.c
+++ b/test/test_eigenvalues.c
@@ -184,7 +184,7 @@ int main(int argc,char *argv[]) {
     g_csg_N[0] = g_csg_N[4];
     g_csg_N[4] = j;
     g_csg_N[6] = j;
-    if(ITER_MAX_BCG == 0 || fabs(g_mu3) > 0) {
+    if(fabs(g_mu3) > 0) {
       g_csg_N[6] = 0;
     }
 
@@ -203,7 +203,7 @@ int main(int argc,char *argv[]) {
     g_csg_N[4] = g_csg_N[0];
     g_csg_N[0] = g_csg_N[2];
     g_csg_N[2] = g_csg_N[4];
-    if(ITER_MAX_BCG == 0 || fabs(g_mu2) > 0) {
+    if(fabs(g_mu2) > 0) {
       g_csg_N[4] = 0;
     }
     g_csg_N[6] = 0;
@@ -212,7 +212,7 @@ int main(int argc,char *argv[]) {
   }
   else {
     g_csg_N[2] = g_csg_N[0];
-    if(ITER_MAX_BCG == 0 || fabs(g_mu2) > 0) {
+    if(fabs(g_mu2) > 0) {
       g_csg_N[2] = 0;
     }
     g_csg_N[4] = 0;
@@ -339,8 +339,8 @@ int main(int argc,char *argv[]) {
     fprintf(parameterfile, "The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY), (int)(LZ));
     fprintf(parameterfile, "g_beta = %f , g_kappa= %f, g_kappa*csw/8= %f \n",g_beta,g_kappa,g_ka_csw_8);
     fprintf(parameterfile, "boundary of fermion fields (t,x,y,z): %f %f %f %f \n",X0,X1,X2,X3);
-    fprintf(parameterfile, "ITER_MAX_BCG=%d, EPS_SQ0=%e, EPS_SQ1=%e EPS_SQ2=%e, EPS_SQ3=%e \n"
-	    ,ITER_MAX_BCG,EPS_SQ0,EPS_SQ1,EPS_SQ2,EPS_SQ3);
+    fprintf(parameterfile, "EPS_SQ0=%e, EPS_SQ1=%e EPS_SQ2=%e, EPS_SQ3=%e \n"
+	    ,EPS_SQ0,EPS_SQ1,EPS_SQ2,EPS_SQ3);
     fprintf(parameterfile, "g_eps_sq_force = %e, g_eps_sq_acc = %e\n", g_eps_sq_force, g_eps_sq_acc);
     fprintf(parameterfile, "dtau=%f, Nsteps=%d, Nmeas=%d, Nsave=%d, integtyp=%d, nsmall=%d \n",
 	    dtau,Nsteps,Nmeas,Nsave,integtyp,nsmall);

From 3af155748e10d57d5a652e5ff44240d5d054ebd0 Mon Sep 17 00:00:00 2001
From: Carsten Urbach <curbach@gmx.de>
Date: Sun, 9 Dec 2012 13:05:57 +0100
Subject: [PATCH 109/110] operator description corrected

---
 sample-input/sample-hmc-cloverdet.input        | 2 --
 sample-input/sample-hmc-tmcloverdet.input      | 4 +---
 sample-input/sample-hmc-tmcloverdetratio.input | 4 +---
 3 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/sample-input/sample-hmc-cloverdet.input b/sample-input/sample-hmc-cloverdet.input
index a331e7d1c..69f08eedc 100644
--- a/sample-input/sample-hmc-cloverdet.input
+++ b/sample-input/sample-hmc-cloverdet.input
@@ -54,8 +54,6 @@ EndIntegrator
 BeginOperator CLOVER
   CSW = 1.76
   kappa = 0.208333
-  UseEvenOdd = yes
-  Solver = CG
   SolverPrecision = 1e-14
   MaxSolverIterations = 1000
 EndOperator
diff --git a/sample-input/sample-hmc-tmcloverdet.input b/sample-input/sample-hmc-tmcloverdet.input
index 14bd4202b..a5644902a 100644
--- a/sample-input/sample-hmc-tmcloverdet.input
+++ b/sample-input/sample-hmc-tmcloverdet.input
@@ -53,12 +53,10 @@ BeginIntegrator
   NumberOfTimescales = 2
 EndIntegrator
 
-BeginOperator TMCLOVER
+BeginOperator CLOVER
   2KappaMu = 0.01
   CSW = 1.00
   kappa = 0.138
-  UseEvenOdd = yes
-  Solver = CG
   SolverPrecision = 1e-14
   MaxSolverIterations = 1000
 EndOperator
diff --git a/sample-input/sample-hmc-tmcloverdetratio.input b/sample-input/sample-hmc-tmcloverdetratio.input
index 8b6543f4d..cf672d6e7 100644
--- a/sample-input/sample-hmc-tmcloverdetratio.input
+++ b/sample-input/sample-hmc-tmcloverdetratio.input
@@ -69,12 +69,10 @@ BeginIntegrator
   NumberOfTimescales = 2
 EndIntegrator
 
-BeginOperator TMCLOVER
+BeginOperator CLOVER
   2KappaMu = 0.01
   CSW = 1.00
   kappa = 0.138
-  UseEvenOdd = yes
-  Solver = CG
   SolverPrecision = 1e-14
   MaxSolverIterations = 1000
 EndOperator

From 2ffe5412b8954cc6c5e9167545fc9083cd0afc90 Mon Sep 17 00:00:00 2001
From: Bartosz Kostrzewa <bartosz.kostrzewa@desy.de>
Date: Mon, 10 Dec 2012 16:40:31 +0100
Subject: [PATCH 110/110] use RN_TYPE enum type for last argument of
 random_spinor_field_* functions

---
 start.c | 4 ++--
 start.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/start.c b/start.c
index 3c79ed272..0ebe7217c 100644
--- a/start.c
+++ b/start.c
@@ -228,7 +228,7 @@ void unit_spinor_field(const int k)
 
 /* Function provides a spinor field of length VOLUME with
    distributions given by rn_type as defined in start.h */
-void random_spinor_field_lexic(spinor * const k, const int repro, const int rn_type) {
+void random_spinor_field_lexic(spinor * const k, const int repro, const enum RN_TYPE rn_type) {
   int x, y, z, t, X, Y, Z, tt, id=0;
 
   void (*random_vector)(double*,int) = NULL;
@@ -307,7 +307,7 @@ void random_spinor_field_lexic(spinor * const k, const int repro, const int rn_t
 /* Function provides a spinor field of length VOLUME/2 for even odd preconditioning 
    with distributions given by rn_type as defined in start.h */
 
-void random_spinor_field_eo(spinor * const k, const int repro, const int rn_type ) {
+void random_spinor_field_eo(spinor * const k, const int repro, const enum RN_TYPE rn_type ) {
   int x, X, y, Y, z, Z, t, t0, id = 0;
 
   void (*random_vector)(double*,int) = NULL;
diff --git a/start.h b/start.h
index 7d9043ab7..403d9cc36 100644
--- a/start.h
+++ b/start.h
@@ -26,8 +26,8 @@ void unit_spinor_field(const int k);
 void zero_spinor_field(spinor * const k, const int N);
 void constant_spinor_field(spinor * const k, const int p, const int N);
 
-void random_spinor_field_lexic(spinor * const k, const int repro, const int rn_type);
-void random_spinor_field_eo(spinor * const k, const int repro, const int rn_type);
+void random_spinor_field_lexic(spinor * const k, const int repro, const enum RN_TYPE);
+void random_spinor_field_eo(spinor * const k, const int repro, const enum RN_TYPE);
 
 void unit_g_gauge_field(void);