From 7052d6f6b17772d65832f66229e3be9104be5c5a Mon Sep 17 00:00:00 2001
From: Luigi Scorzato <scorzato@ect.it>
Date: Sat, 10 Sep 2011 15:06:29 +0000
Subject: [PATCH] LapH: computing eigensystem for the Laplacial Heaviside
 method

---
 LapH_ev.c                      | 218 +++++++++
 Makefile.in                    |   5 +-
 config.h.in                    |   3 +
 configure.in                   |  44 +-
 fixed_volume.h.in              |   3 +-
 geometry_eo.c                  |  55 +++
 global.h                       |  13 +
 init_geometry_indices.c        |  11 +
 init_jacobi_field.c            | 107 +++++
 init_jacobi_field.h            |  34 ++
 jacobi.c                       |  77 +++
 jacobi.h                       |  32 ++
 linalg/Makefile.in             |   3 +-
 linalg/assign.c                |  21 +
 linalg/assign.h                |   1 +
 linalg/assign_add_mul_r.c      |  23 +
 linalg/assign_add_mul_r.h      |   1 +
 linalg/assign_mul_add_r.c      |  24 +-
 linalg/assign_mul_add_r.h      |   1 +
 linalg/diff.c                  |  22 +
 linalg/diff.h                  |   1 +
 linalg/scalar_prod.c           |  81 ++++
 linalg/scalar_prod.h           |   1 +
 linalg/scalar_prod_r.c         |  39 +-
 linalg/scalar_prod_r.h         |   1 +
 linalg/scalar_prod_su3spinor.c | 230 +++++++++
 linalg/scalar_prod_su3spinor.h |  28 ++
 linalg/square_norm.c           |  35 ++
 linalg/square_norm.h           |   1 +
 mpi_init.c                     |  31 ++
 mpi_init.h                     |  10 +
 solver/Makefile.in             |   3 +-
 solver/cg_her_su3vect.c        | 108 +++++
 solver/cg_her_su3vect.h        |  28 ++
 solver/eigenvalues_Jacobi.c    | 228 +++++++++
 solver/eigenvalues_Jacobi.h    |  34 ++
 solver/gram-schmidt.c          |  62 +++
 solver/gram-schmidt.h          |   3 +
 solver/jdher_su3vect.c         | 828 +++++++++++++++++++++++++++++++++
 solver/jdher_su3vect.h         |  49 ++
 solver/matrix_mult_typedef.h   |   1 +
 su3.h                          |  11 +
 xchange.h                      |   1 +
 xchange_jacobi.c               | 110 +++++
 xchange_jacobi.h               |  25 +
 45 files changed, 2626 insertions(+), 21 deletions(-)
 create mode 100644 LapH_ev.c
 create mode 100755 init_jacobi_field.c
 create mode 100755 init_jacobi_field.h
 create mode 100644 jacobi.c
 create mode 100644 jacobi.h
 create mode 100644 linalg/scalar_prod_su3spinor.c
 create mode 100644 linalg/scalar_prod_su3spinor.h
 create mode 100755 solver/cg_her_su3vect.c
 create mode 100755 solver/cg_her_su3vect.h
 create mode 100644 solver/eigenvalues_Jacobi.c
 create mode 100755 solver/eigenvalues_Jacobi.h
 create mode 100644 solver/jdher_su3vect.c
 create mode 100755 solver/jdher_su3vect.h
 create mode 100644 xchange_jacobi.c
 create mode 100644 xchange_jacobi.h

diff --git a/LapH_ev.c b/LapH_ev.c
new file mode 100644
index 000000000..6a027c9a5
--- /dev/null
+++ b/LapH_ev.c
@@ -0,0 +1,218 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+/* 
+ *  Program for computing the eigensystem of the Laplacian operator
+ * Authors Luigi Scorzato, Marco Cristoforetti
+ *
+ *
+ *******************************************************************************/
+
+#define MAIN_PROGRAM
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#else
+#error "no config.h"
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#include <string.h>
+#if (defined BGL && !defined BGP)
+#  include <rts.h>
+#endif
+#ifdef MPI
+# include <mpi.h>
+#endif
+#include "global.h"
+#include <io/params.h>
+#include <io/gauge.h>
+#include "su3.h"
+#include "ranlxd.h"
+#include "geometry_eo.h"
+#include "read_input.h"
+#include "start.h"
+#include "xchange.h"
+#include "init_gauge_field.h"
+#include "init_geometry_indices.h"
+#include "mpi_init.h"
+#include "solver/eigenvalues_Jacobi.h"
+#include "init_jacobi_field.h"
+
+int main(int argc,char *argv[])
+{
+  int tslice,j,k;
+  char conf_filename[50];
+  
+#ifdef MPI
+  MPI_Init(&argc, &argv);
+#endif
+  
+  /* Read the input file */
+  read_input("LapH.input");
+  
+  tmlqcd_mpi_init(argc, argv);
+  
+  if(g_proc_id==0) {
+#ifdef SSE
+    printf("# The code was compiled with SSE instructions\n");
+#endif
+#ifdef SSE2
+    printf("# The code was compiled with SSE2 instructions\n");
+#endif
+#ifdef SSE3
+    printf("# The code was compiled with SSE3 instructions\n");
+#endif
+#ifdef P4
+    printf("# The code was compiled for Pentium4\n");
+#endif
+#ifdef OPTERON
+    printf("# The code was compiled for AMD Opteron\n");
+#endif
+#ifdef _GAUGE_COPY
+    printf("# The code was compiled with -D_GAUGE_COPY\n");
+#endif
+#ifdef BGL
+    printf("# The code was compiled for Blue Gene/L\n");
+#endif
+#ifdef BGP
+    printf("# The code was compiled for Blue Gene/P\n");
+#endif
+#ifdef _USE_HALFSPINOR
+    printf("# The code was compiled with -D_USE_HALFSPINOR\n");
+#endif    
+#ifdef _USE_SHMEM
+    printf("# the code was compiled with -D_USE_SHMEM\n");
+#  ifdef _PERSISTENT
+    printf("# the code was compiled for persistent MPI calls (halfspinor only)\n");
+#  endif
+#endif
+#ifdef MPI
+#  ifdef _NON_BLOCKING
+    printf("# the code was compiled for non-blocking MPI calls (spinor and gauge)\n");
+#  endif
+#endif
+    printf("\n");
+    fflush(stdout);
+  }
+  
+
+#ifndef WITHLAPH
+  printf(" Error: WITHLAPH not defined");
+  exit(0);
+  #error " Error: WITHLAPH not defined"
+#endif
+#ifdef MPI
+#ifndef _INDEX_INDEP_GEOM
+  printf(" Error: _INDEX_INDEP_GEOM not defined");
+  exit(0);
+  #error " Error: _INDEX_INDEP_GEOM not defined"
+#endif
+#ifndef _USE_TSPLITPAR
+  printf(" Error: _USE_TSPLITPAR not defined");
+  exit(0);
+  #error " Error: _USE_TSPLITPAR not defined"
+#endif
+#endif
+#ifdef FIXEDVOLUME
+  printf(" Error: FIXEDVOLUME not allowed");
+  exit(0);
+  #error " Error: FIXEDVOLUME not defined"
+#endif
+
+  
+  init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
+  init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand);
+
+  if(g_proc_id == 0) {
+    fprintf(stdout,"The number of processes is %d \n",g_nproc);
+    printf("# The lattice size is %d x %d x %d x %d\n",
+	   (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ));
+    printf("# The local lattice size is %d x %d x %d x %d\n", 
+	   (int)(T), (int)(LX), (int)(LY),(int) LZ);
+    printf("# Computing LapH eigensystem \n");
+
+    fflush(stdout);
+  }
+  
+  /* define the geometry */
+  geometry();
+
+  start_ranlux(1, 123456);
+
+  /* Read Gauge field */
+  sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore);
+  if (g_cart_id == 0) {
+    printf("#\n# Trying to read gauge field from file %s in %s precision.\n",
+	   conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double"));
+    fflush(stdout);
+  }
+  if( (j = read_gauge_field(conf_filename)) !=0) {
+    fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", j, conf_filename);
+    exit(-2);
+  }
+
+  
+  if (g_cart_id == 0) {
+    printf("# Finished reading gauge field.\n");
+    fflush(stdout);
+  }
+  
+#ifdef MPI
+  /*For parallelization: exchange the gaugefield */
+  xchange_gauge();
+#endif
+  
+  /* Init Jacobi field */
+  init_jacobi_field(SPACEVOLUME+SPACERAND,3);
+
+#ifdef MPI
+  {
+     /* for debugging in parallel set i_gdb = 0 */
+    volatile int i_gdb = 8;
+    char hostname[256];
+    gethostname(hostname, sizeof(hostname));
+    printf("PID %d on %s ready for attach\n", getpid(), hostname);
+    fflush(stdout);
+    if(g_cart_id == 0){
+      while (0 == i_gdb){
+	sleep(5);
+      }
+    }
+  }
+
+  MPI_Barrier(MPI_COMM_WORLD);
+#endif
+
+  for (k=0 ; k<3 ; k++)
+    random_jacobi_field(g_jacobi_field[k],SPACEVOLUME);
+
+
+  /* Compute LapH Eigensystem */
+  
+  for(tslice=0; tslice<T; tslice++){ 
+    eigenvalues_Jacobi(&no_eigenvalues,5000, eigenvalue_precision,0,tslice,nstore);
+  }
+  
+#ifdef MPI
+  MPI_Finalize();
+#endif
+  return(0);
+}
diff --git a/Makefile.in b/Makefile.in
index a24bb5211..65e2d1b57 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -66,7 +66,8 @@ MODULES = read_input gamma hybrid_update observables start \
 	little_D block sf_gauge_monomial sf_utils sf_calc_action \
 	sf_get_staples sf_get_rectangle_staples sf_observables \
 	Dov_psi operator poly_monomial measurements pion_norm Dov_proj \
-	xchange_field_tslice temporalgauge spinor_fft X_psi P_M_eta
+	xchange_field_tslice temporalgauge spinor_fft X_psi P_M_eta \
+	xchange_jacobi jacobi init_jacobi_field
 
 
 ## the GPU modules (all .cu files in $GPUDIR)
@@ -81,7 +82,7 @@ NOOPTMOD = test/check_xchange test/check_geometry
 
 PROGRAMS = hmc_tm benchmark invert gwc2ildg \
 	ildg2gwc single2double double2single reducenoise gen_sources  \
-	check_locallity test_lemon hopping_test
+	check_locallity test_lemon hopping_test LapH_ev
 
 ALLOBJ = ${MODULES} ${PROGRAMS} ${SMODULES}
 SUBDIRS = ${USESUBDIRS}
diff --git a/config.h.in b/config.h.in
index ae7d09ea1..b2cbe56e9 100644
--- a/config.h.in
+++ b/config.h.in
@@ -176,5 +176,8 @@
 /* Define if we want to use CUDA GPU */
 #undef HAVE_GPU
 
+/* Define if we want to compute the LapH eigenvectors */
+#undef WITHLAPH
+
 #endif
 
diff --git a/configure.in b/configure.in
index 332a3c70f..307cf8369 100644
--- a/configure.in
+++ b/configure.in
@@ -622,6 +622,17 @@ else
  AC_MSG_RESULT(no)
 fi
 
+AC_MSG_CHECKING(whether we want to compute the LapH eigenvalues)
+AC_ARG_ENABLE(laph,
+  [  --enable-laph   enable computation of LapH eigensystem [default=no]],
+  enable_laph=$enableval, enable_laph=no)
+if test $enable_laph = yes; then
+  AC_MSG_RESULT(yes)
+  AC_DEFINE(WITHLAPH,1,LapH eigensystem)
+else
+ AC_MSG_RESULT(no)
+fi
+
 
 AC_MSG_CHECKING(whether we want to use CUDA GPU)
 AC_ARG_ENABLE(gpu,
@@ -676,20 +687,25 @@ AC_SUBST(GPUMPICOMPILER)
 
 
 AC_MSG_CHECKING(checking consistency)
-if test $enable_iig = yes && test $withpersistent = yes ; then
- AC_MSG_ERROR(ERROR! indexindepgeom is not compatible with persistent communications )
-fi
-if test $enable_iig = yes && test $enable_shmem = yes ; then
-  AC_MSG_ERROR(ERROR! indexindepgeom is not compatible with shmem API )
-fi
-if test $enable_tsp = yes && test $enable_iig = no; then
-  AC_MSG_ERROR(ERROR! tsplitpar needs indexindepgeom)
-fi
-if test $enable_tsp = yes && test $enable_sse2 != yes ; then
-  AC_MSG_ERROR(ERROR! tsplitpar needs at least SSE2 )
-fi
-if test $enable_tsp = yes && test $enable_gaugecopy != yes ; then
-  AC_MSG_ERROR(ERROR! tsplitpar needs gaugecopy)
+if test $enable_mpi = yes ; then
+ if test $enable_iig = yes && test $withpersistent = yes ; then
+  AC_MSG_ERROR(ERROR! indexindepgeom is not compatible with persistent communications )
+ fi
+ if test $enable_iig = yes && test $enable_shmem = yes ; then
+   AC_MSG_ERROR(ERROR! indexindepgeom is not compatible with shmem API )
+ fi
+ if test $enable_tsp = yes && test $enable_iig = no; then
+   AC_MSG_ERROR(ERROR! tsplitpar needs indexindepgeom)
+ fi
+ if test $enable_tsp = yes && test $enable_sse2 != yes ; then
+   AC_MSG_ERROR(ERROR! tsplitpar needs at least SSE2 )
+ fi
+ if test $enable_tsp = yes && test $enable_gaugecopy != yes ; then
+   AC_MSG_ERROR(ERROR! tsplitpar needs gaugecopy)
+ fi
+ if test $enable_laph = yes && test $enable_tsp != yes ; then
+   AC_MSG_ERROR(ERROR! laph needs tsplitpar)
+ fi
 fi
 
 if test ! -e lib; then
diff --git a/fixed_volume.h.in b/fixed_volume.h.in
index 127dfd067..d78e1af15 100644
--- a/fixed_volume.h.in
+++ b/fixed_volume.h.in
@@ -43,6 +43,7 @@
 #    define LZ (Zdef/N_PROC_Z)
 #    define L  LX
 #    define VOLUME (T*LX*LY*LZ)
+#    define SPACEVOLUME (LX*LY*LZ)
 #    define TEOSLICE ((LX*LY*LZ)/2)
 
 #    ifdef PARALLELT  
@@ -64,7 +65,7 @@
   /* Note that VOLUMEPLUSRAND is in general not equal to VOLUME+RAND */
   /* VOLUMEPLUSRAND rather includes the edges */
 #    define VOLUMEPLUSRAND (VOLUME + RAND + EDGES)
-
+#    define SPACERAND (RAND/T)
 #  endif
 
 #endif
diff --git a/geometry_eo.c b/geometry_eo.c
index 834720e00..e8347428d 100644
--- a/geometry_eo.c
+++ b/geometry_eo.c
@@ -277,7 +277,11 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
 int Index(const int x0, const int x1, const int x2, const int x3) {
   int y0, y1, y2, y3, ix;
 
+#ifdef  WITHLAPH
+  y0 = x0;
+#else
   y0 = (x0 + T ) % T; 
+#endif
   y1 = (x1 + LX) % LX; 
   y2 = (x2 + LY) % LY; 
   y3 = (x3 + LZ) % LZ;
@@ -890,6 +894,35 @@ void geometry(){
     }
   }
 
+
+#ifdef WITHLAPH
+  tempT=T;
+  T=1;
+  tempV=VOLUME;
+  VOLUME=SPACEVOLUME;
+  tempR=RAND;
+  RAND=SPACERAND;
+  x0=0;
+  for (x1 = 0; x1 < (LX); x1++){
+    for (x2 = 0; x2 < (LY); x2++){
+      for (x3 = 0; x3 < (LZ); x3++){
+	ix=Index(x0, x1, x2, x3);
+	g_iup3d[ix][0] = -1;
+	g_idn3d[ix][0] = -1;
+	g_iup3d[ix][1] = Index(x0, x1+1, x2, x3);
+	g_idn3d[ix][1] = Index(x0, x1-1, x2, x3);
+	g_iup3d[ix][2] = Index(x0, x1, x2+1, x3);
+	g_idn3d[ix][2] = Index(x0, x1, x2-1, x3);
+	g_iup3d[ix][3] = Index(x0, x1, x2, x3+1);
+	g_idn3d[ix][3] = Index(x0, x1, x2, x3-1);
+      }
+    }
+  }
+  T=tempT;
+  VOLUME=tempV;
+  RAND=tempR;
+#endif
+
   i_even=0;
   i_odd=0;
   /*For the spinor fields we need only till VOLUME+RAND */
@@ -1099,6 +1132,28 @@ void geometry(){
   gI_m1_0_0_m2=Index(-1,0,0,-2);
 #endif /* _INDEX_INDEP_GEOM */
 
+#ifdef WITHLAPH
+  tempT=T;
+  T=1;
+  tempV=VOLUME;
+  VOLUME=SPACEVOLUME;
+  tempR=RAND;
+  RAND=SPACERAND;
+  gI_0_0_0=Index(0,0,0,0);
+  gI_L_0_0=Index(0,LX,0,0);
+  gI_Lm1_0_0=Index(0,LX-1,0,0);
+  gI_m1_0_0=Index(0,-1,0,0);
+  gI_0_L_0=Index(0,0,LY,0);
+  gI_0_Lm1_0=Index(0,0,LY-1,0);
+  gI_0_m1_0=Index(0,0,-1,0);
+  gI_0_0_L=Index(0,0,0,LZ);
+  gI_0_0_Lm1=Index(0,0,0,LZ-1);
+  gI_0_0_m1=Index(0,0,0,-1);
+  T=tempT;
+  VOLUME=tempV;
+  RAND=tempR;
+#endif
+
 #if ( defined PARALLELXYZT || defined PARALLELXYZ )
   check_struct_zt=0;
   ix = 0;
diff --git a/global.h b/global.h
index 779396c1a..824fc3faf 100644
--- a/global.h
+++ b/global.h
@@ -31,6 +31,9 @@
  *
  *
  ***************************************************************/
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
 #include <stdlib.h>
 #include <stdio.h>
 #ifdef MPI
@@ -79,6 +82,7 @@ EXTERN int T, L, LX, LY, LZ, VOLUME;
 EXTERN int N_PROC_T, N_PROC_X, N_PROC_Y, N_PROC_Z;
 EXTERN int RAND, EDGES, VOLUMEPLUSRAND;
 EXTERN int TEOSLICE;
+EXTERN int SPACEVOLUME, SPACERAND;
 #endif
 
 /* translates from lexicographic order to even/odd order */
@@ -257,6 +261,15 @@ EXTERN int ITER_MAX_CG;
 
 EXTERN void* g_precWS;
 
+#ifdef WITHLAPH
+/* Jacobi operator per Laplacian Heaviside (LapH) */
+EXTERN su3_vector ** g_jacobi_field;
+EXTERN int gI_0_0_0, gI_L_0_0, gI_Lm1_0_0, gI_m1_0_0, gI_0_L_0, gI_0_Lm1_0, gI_0_m1_0, gI_0_0_L, gI_0_0_Lm1, gI_0_0_m1;
+EXTERN int tempT,tempV,tempR;
+EXTERN int ** g_iup3d;
+EXTERN int ** g_idn3d;
+#endif
+
 #undef EXTERN
 /* #undef ALIGN */
 
diff --git a/init_geometry_indices.c b/init_geometry_indices.c
index 724fe3e45..1fa512cf7 100644
--- a/init_geometry_indices.c
+++ b/init_geometry_indices.c
@@ -152,6 +152,17 @@ int init_geometry_indices(const int V) {
     g_ipt[i] = g_ipt[i-1]+(LX+4);
   }
 
+#ifdef WITHLAPH
+  g_idn3d = (int**)calloc(SPACEVOLUME, sizeof(int*));
+  if((void*)g_idn == NULL) return(31);
+  g_iup3d = (int**)calloc(SPACEVOLUME, sizeof(int*));
+  if((void*)g_iup == NULL) return(32);
+  for (i=0;i<SPACEVOLUME;i++){
+    g_idn3d[i] = (int*)calloc(4, sizeof(int));
+    g_iup3d[i] = (int*)calloc(4, sizeof(int));
+  }
+#endif
+
   return(0);
 }
 
diff --git a/init_jacobi_field.c b/init_jacobi_field.c
new file mode 100755
index 000000000..3119df933
--- /dev/null
+++ b/init_jacobi_field.c
@@ -0,0 +1,107 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+/* 
+ *  routine for the initialization of the jocobi field (for use in LapH_ev)
+ *  Authors Luigi Scorzato, Marco Cristoforetti
+ *
+ *
+ *******************************************************************************/
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include "global.h"
+#include "su3.h"
+#include "start.h"
+#include "xchange_jacobi.h"
+#include "init_jacobi_field.h"
+
+#ifdef WITHLAPH
+
+su3_vector  *jacobi_field = NULL;
+
+int init_jacobi_field(const int V, const int nr)
+{
+int i=0;
+
+	if((void*)(jacobi_field = (su3_vector*)calloc(nr*V+1, sizeof(su3_vector))) == NULL)
+	{
+		printf("malloc errno : %d\n",errno);
+		errno = 0;
+		return(1);
+	}
+	if((void*)(g_jacobi_field = (su3_vector**)malloc(nr*sizeof(su3_vector*))) == NULL)
+	{
+		printf("malloc errno : %d\n",errno);
+		errno = 0;
+		return(2);
+	}
+
+	g_jacobi_field[0] = jacobi_field;
+	for(i=1; i<nr; i++)
+	{
+		g_jacobi_field[i] = g_jacobi_field[i-1]+V;
+	}
+
+	return(0);
+}
+
+
+void free_jacobi_field(){
+	
+	free(jacobi_field);
+}
+
+
+void random_gauss_jacobi_field(su3_vector * const k, const int V)
+{
+int ix;
+su3_vector *s;
+double v[6];
+
+ for (ix=0; ix<V ;ix++) {
+     s=k+ix;
+     gauss_vector(v,6);
+     (*s).c0.re=v[0];
+     (*s).c0.im=v[1];
+     (*s).c1.re=v[2];
+     (*s).c1.im=v[3];
+     (*s).c2.re=v[4];
+     (*s).c2.im=v[5];
+ }
+#ifdef MPI
+ xchange_jacobi(k);
+#endif
+}
+
+void random_jacobi_field(su3_vector * const k, const int V)
+{
+int ix;
+su3_vector *s;
+double v[6];
+
+ for (ix=0; ix<V ;ix++)
+   {
+     s=k+ix;
+     *s=unif_su3_vector();
+   }
+#ifdef MPI
+ xchange_jacobi(k);
+#endif
+}
+#endif // WITHLAPH
diff --git a/init_jacobi_field.h b/init_jacobi_field.h
new file mode 100755
index 000000000..c5e9df652
--- /dev/null
+++ b/init_jacobi_field.h
@@ -0,0 +1,34 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+/* 
+ *  routine for the initialization of the jocobi field (for use in LapH_ev)
+ *  Authors Luigi Scorzato, Marco Cristoforetti
+ *
+ *
+ *******************************************************************************/
+#ifndef _INIT_JACOBI_FIELD_H
+#define _INIT_JACOBI_FIELD_H
+
+# ifdef WITHLAPH
+int init_jacobi_field(const int V, const int nr);
+void free_jacobi_field();
+void random_gauss_jacobi_field(su3_vector * const k, const int V);
+void random_jacobi_field(su3_vector * const k, const int V);
+# endif
+#endif
diff --git a/jacobi.c b/jacobi.c
new file mode 100644
index 000000000..9d6c25034
--- /dev/null
+++ b/jacobi.c
@@ -0,0 +1,77 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+/* 
+ *  Routine for the computation of the Jacobi operator (for use into LapH_ev)
+ *  Authors Luigi Scorzato, Marco Cristoforetti
+ *
+ *
+ *******************************************************************************/
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <errno.h>
+#include <time.h>
+#ifdef MPI
+# include <mpi.h>
+#endif
+#include "global.h"
+#include "su3.h"
+#include "xchange_jacobi.h"
+
+void Jacobi(su3_vector * const l, su3_vector * const k,int t)
+{
+  int ix,mu,tcoord,coord;
+  su3_vector lt;
+        
+#ifdef MPI
+  xchange_jacobi(k);
+#endif
+
+  tcoord=t*SPACEVOLUME;
+  for(ix=0;ix<SPACEVOLUME;ix++)
+    {
+      coord=tcoord+ix;
+      _vector_mul(l[ix],6,k[ix]);
+      for(mu=1;mu<4;mu++)
+	{
+	  _su3_multiply(lt,g_gauge_field[coord][mu],k[g_iup3d[ix][mu]]);
+	  l[ix].c0.re-=lt.c0.re;
+	  l[ix].c0.im-=lt.c0.im;
+	  l[ix].c1.re-=lt.c1.re;
+	  l[ix].c1.im-=lt.c1.im;
+	  l[ix].c2.re-=lt.c2.re;
+	  l[ix].c2.im-=lt.c2.im;
+	  _su3_inverse_multiply(lt,g_gauge_field[g_idn[coord][mu]][mu],k[g_idn3d[ix][mu]]);
+	  l[ix].c0.re-=lt.c0.re;
+	  l[ix].c0.im-=lt.c0.im;
+	  l[ix].c1.re-=lt.c1.re;
+	  l[ix].c1.im-=lt.c1.im;
+	  l[ix].c2.re-=lt.c2.re;
+	  l[ix].c2.im-=lt.c2.im;
+	}
+    }
+#ifdef MPI
+  xchange_jacobi(l);
+#endif
+}
+
diff --git a/jacobi.h b/jacobi.h
new file mode 100644
index 000000000..d6b332e92
--- /dev/null
+++ b/jacobi.h
@@ -0,0 +1,32 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+/* 
+ *  Routine for the computation of the Jacobi operator (for use into LapH_ev)
+ *  Authors Luigi Scorzato, Marco Cristoforetti
+ *
+ *
+ *******************************************************************************/
+#ifndef _JACOBI_H
+#define _JACOBI_H
+
+#include "su3.h"
+
+void Jacobi(su3_vector * const l, su3_vector * const k,int t);
+
+#endif
diff --git a/linalg/Makefile.in b/linalg/Makefile.in
index d0de76956..945a38c73 100644
--- a/linalg/Makefile.in
+++ b/linalg/Makefile.in
@@ -45,7 +45,8 @@ liblinalg_TARGETS = assign_add_mul_r_add_mul \
         assign_mul_bra_add_mul_ket_add_bi mul_r_bi \
         scalar_prod_r_bi assign_add_mul_r_bi assign_mul_add_r_bi \
 	convert_eo_to_lexic assign_mul_add_mul_r mul_add_mul_r \
-	assign_mul_add_mul_add_mul_r mattimesvec
+	assign_mul_add_mul_add_mul_r mattimesvec \
+	scalar_prod_su3spinor
 
 liblinalg_STARGETS = diff assign_add_mul_r assign_mul_add_r square_norm
 
diff --git a/linalg/assign.c b/linalg/assign.c
index 177d15a00..6cb6ec896 100644
--- a/linalg/assign.c
+++ b/linalg/assign.c
@@ -177,3 +177,24 @@ void assign(spinor * const R, spinor * const S, const int N){
   }
 }
 #endif
+
+#ifdef WITHLAPH
+void assign_su3vect(su3_vector * const R, su3_vector * const S, const int N)
+{
+int ix;
+su3_vector *r,*s;
+
+	for (ix = 0; ix < N; ix++)
+	{
+    r=(su3_vector *) R + ix;
+    s=(su3_vector *) S + ix;
+    
+    (*r).c0.re = (*s).c0.re;
+    (*r).c0.im = (*s).c0.im;
+    (*r).c1.re = (*s).c1.re;
+    (*r).c1.im = (*s).c1.im;
+    (*r).c2.re = (*s).c2.re;
+    (*r).c2.im = (*s).c2.im;
+	}
+}
+#endif
diff --git a/linalg/assign.h b/linalg/assign.h
index bad8b0484..5afea3f0f 100644
--- a/linalg/assign.h
+++ b/linalg/assign.h
@@ -25,5 +25,6 @@
 
 /* Assign (*R) = (*S) */
 void assign(spinor * const R, spinor * const S, const int N);
+void assign_su3vect(su3_vector * const R, su3_vector * const S, const int N);
 
 #endif
diff --git a/linalg/assign_add_mul_r.c b/linalg/assign_add_mul_r.c
index 77420f30f..f9e30ebe2 100644
--- a/linalg/assign_add_mul_r.c
+++ b/linalg/assign_add_mul_r.c
@@ -406,3 +406,26 @@ void assign_add_mul_r(spinor * const P, spinor * const Q, const double c, const
   }
 }
 #endif
+
+#ifdef WITHLAPH
+void assign_add_mul_r_su3vect(su3_vector * const P, su3_vector * const Q, const double c, const int N) {
+  int ix;
+  static double fact;
+  su3_vector *r,*s;
+
+  fact=c;
+
+  for (ix = 0; ix < N; ix++) 
+	{
+    r=P+ix;      
+    s=Q+ix;
+    
+    (*r).c0.re+=fact*(*s).c0.re;
+    (*r).c0.im+=fact*(*s).c0.im;
+    (*r).c1.re+=fact*(*s).c1.re;
+    (*r).c1.im+=fact*(*s).c1.im;
+    (*r).c2.re+=fact*(*s).c2.re;
+    (*r).c2.im+=fact*(*s).c2.im;
+	}
+}
+#endif
diff --git a/linalg/assign_add_mul_r.h b/linalg/assign_add_mul_r.h
index e36d8af28..57fd0e7f4 100644
--- a/linalg/assign_add_mul_r.h
+++ b/linalg/assign_add_mul_r.h
@@ -24,5 +24,6 @@
 #include "su3.h"
 
 void assign_add_mul_r(spinor * const P, spinor * const Q, const double c, const int N);
+void assign_add_mul_r_su3vect(su3_vector * const P, su3_vector * const Q, const double c, const int N);
 
 #endif
diff --git a/linalg/assign_mul_add_r.c b/linalg/assign_mul_add_r.c
index 6daf81c7e..a41b76986 100644
--- a/linalg/assign_mul_add_r.c
+++ b/linalg/assign_mul_add_r.c
@@ -430,4 +430,26 @@ void assign_mul_add_r(spinor * const R, const double c, spinor * const S, const
 }
 #endif
 
-
+#ifdef WITHLAPH
+void assign_mul_add_r_su3vect(su3_vector * const R, const double c, su3_vector * const S, const int N)
+{
+  int ix;
+  static double fact;
+  su3_vector *r,*s;
+  
+  fact=c;
+  
+  for (ix = 0; ix < N; ix++) 
+	{
+    r = R + ix;
+    s = S + ix;
+    
+    (*r).c0.re = fact*(*r).c0.re + (*s).c0.re;
+    (*r).c0.im = fact*(*r).c0.im + (*s).c0.im;
+    (*r).c1.re = fact*(*r).c1.re + (*s).c1.re;
+    (*r).c1.im = fact*(*r).c1.im + (*s).c1.im;
+    (*r).c2.re = fact*(*r).c2.re + (*s).c2.re;
+    (*r).c2.im = fact*(*r).c2.im + (*s).c2.im;
+	}
+}
+#endif
diff --git a/linalg/assign_mul_add_r.h b/linalg/assign_mul_add_r.h
index bcc7e9b39..fc1d74138 100644
--- a/linalg/assign_mul_add_r.h
+++ b/linalg/assign_mul_add_r.h
@@ -24,5 +24,6 @@
 #include "su3.h"
 
 void assign_mul_add_r(spinor * const S, const double c, spinor * const R, const int N);
+void assign_mul_add_r_su3vect(su3_vector * const S, const double c, su3_vector * const R, const int N);
 
 #endif
diff --git a/linalg/diff.c b/linalg/diff.c
index 27a420cee..61f44b0ac 100644
--- a/linalg/diff.c
+++ b/linalg/diff.c
@@ -346,3 +346,25 @@ void diff(spinor * const Q,spinor * const R,spinor * const S, const int N){
 }
 
 #endif
+
+#ifdef WITHLAPH
+void diff_su3vect(su3_vector * const Q,su3_vector * const R,su3_vector * const S, const int N)
+{
+int ix;
+su3_vector *q,*r,*s;
+
+	for (ix = 0; ix < N; ix++) 
+	{
+  	q=(su3_vector *) Q + ix;
+		r=(su3_vector *) R + ix;
+    s=(su3_vector *) S + ix;
+     
+    (*q).c0.re=(*r).c0.re-(*s).c0.re;
+    (*q).c0.im=(*r).c0.im-(*s).c0.im;
+    (*q).c1.re=(*r).c1.re-(*s).c1.re;
+    (*q).c1.im=(*r).c1.im-(*s).c1.im;
+    (*q).c2.re=(*r).c2.re-(*s).c2.re;
+    (*q).c2.im=(*r).c2.im-(*s).c2.im;
+	} 
+}
+#endif
diff --git a/linalg/diff.h b/linalg/diff.h
index a9cd0730c..e4609a19f 100644
--- a/linalg/diff.h
+++ b/linalg/diff.h
@@ -25,6 +25,7 @@
 
 /* Makes the difference (*Q) = (*R) - (*S) */
 void diff(spinor * const Q, spinor * const R, spinor * const S, const int N);
+void diff_su3vect(su3_vector * const Q, su3_vector * const R, su3_vector * const S, const int N);
 
 
 #endif
diff --git a/linalg/scalar_prod.c b/linalg/scalar_prod.c
index 1c4782347..23dd1b2e2 100644
--- a/linalg/scalar_prod.c
+++ b/linalg/scalar_prod.c
@@ -127,3 +127,84 @@ complex scalar_prod(spinor * const S, spinor * const R, const int N, const int p
 #endif
   return(c);
 }
+
+#ifdef WITHLAPH
+complex scalar_prod_su3vect(su3_vector * const S, su3_vector * const R, const int N, const int parallel){
+  int ix;
+  static double ks,kc,ds,tr,ts,tt;
+  su3_vector *s,*r;
+  complex c;
+#ifdef MPI
+  complex d;
+#endif
+
+  /* Real Part */
+
+  ks=0.0;
+  kc=0.0;
+  for (ix = 0; ix < N; ix++)
+    {
+      s=(su3_vector *) S + ix;
+      r=(su3_vector *) R + ix;
+    
+      ds=(*r).c0.re*(*s).c0.re+(*r).c0.im*(*s).c0.im+
+	(*r).c1.re*(*s).c1.re+(*r).c1.im*(*s).c1.im+
+	(*r).c2.re*(*s).c2.re+(*r).c2.im*(*s).c2.im;
+
+      /* Kahan Summation */    
+      tr=ds+kc;
+      ts=tr+ks;
+      tt=ts-ks;
+      ks=ts;
+      kc=tr-tt;
+    }
+  kc=ks+kc;
+
+#if defined MPI0
+  if(parallel == 1) {
+    MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+    kc = ks;
+  }
+#endif
+
+  c.re = kc;
+
+  /* Imaginary Part */
+
+  ks=0.0;
+  kc=0.0;
+  
+  for (ix=0;ix<N;ix++)
+    {
+      s=(su3_vector *) S + ix;
+      r=(su3_vector *) R + ix;
+    
+      ds=-(*r).c0.re*(*s).c0.im+(*r).c0.im*(*s).c0.re-
+	(*r).c1.re*(*s).c1.im+(*r).c1.im*(*s).c1.re-
+	(*r).c2.re*(*s).c2.im+(*r).c2.im*(*s).c2.re;
+    
+      tr=ds+kc;
+      ts=tr+ks;
+      tt=ts-ks;
+      ks=ts;
+      kc=tr-tt;
+    }
+  kc=ks+kc;
+
+#if defined MPI0
+  if(parallel == 1) {
+    MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+    kc = ks;
+  }
+#endif
+
+  c.im = kc;
+#ifdef MPI
+  if(parallel == 1) {
+    d = c;
+    MPI_Allreduce(&d, &c, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD);
+  }
+#endif
+  return(c);
+}
+#endif
diff --git a/linalg/scalar_prod.h b/linalg/scalar_prod.h
index 352dd4d12..3c1434376 100644
--- a/linalg/scalar_prod.h
+++ b/linalg/scalar_prod.h
@@ -24,5 +24,6 @@
 #include "su3.h"
 /*  <S,R>=SxR^* */
 complex scalar_prod(spinor * const S,spinor * const R, const int N, const int parallel);
+complex scalar_prod_su3vect(su3_vector * const S,su3_vector * const R, const int N, const int parallel);
 
 #endif
diff --git a/linalg/scalar_prod_r.c b/linalg/scalar_prod_r.c
index 8d9d5d94e..3cd8f1003 100644
--- a/linalg/scalar_prod_r.c
+++ b/linalg/scalar_prod_r.c
@@ -91,7 +91,6 @@ double scalar_prod_r(spinor * const S,spinor * const R, const int N, const int p
     MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
     return ks;
   }
-
 #endif
 
   return kc;
@@ -238,4 +237,42 @@ double scalar_prod_r(spinor * const S,spinor * const R, const int N, const int p
   return kc;
 
 }
+#endif // apenext
+
+#ifdef WITHLAPH
+double scalar_prod_r_su3vect(su3_vector * const S,su3_vector * const R, const int N, const int parallel)
+{
+  int ix;
+  static double ks,kc,ds,tr,ts,tt;
+  su3_vector *s,*r;
+
+  ks=0.0;
+  kc=0.0;
+  for (ix=0;ix<N;ix++)
+    {
+      s = (su3_vector *) S + ix;
+      r = (su3_vector *) R + ix;
+    
+      ds=(*r).c0.re*(*s).c0.re + (*r).c0.im*(*s).c0.im + 
+	(*r).c1.re*(*s).c1.re + (*r).c1.im*(*s).c1.im + 
+	(*r).c2.re*(*s).c2.re + (*r).c2.im*(*s).c2.im;
+    
+      tr = ds + kc;
+      ts = tr + ks;
+      tt = ts-ks;
+      ks = ts;
+      kc = tr-tt;
+    }
+  kc = ks + kc;
+#if defined MPI
+  if(parallel) {
+    MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+    return ks;
+  }
 #endif
+
+  return kc;
+}
+
+#endif // WITHLAPH
+
diff --git a/linalg/scalar_prod_r.h b/linalg/scalar_prod_r.h
index 1da96ae73..ce0d28152 100644
--- a/linalg/scalar_prod_r.h
+++ b/linalg/scalar_prod_r.h
@@ -25,5 +25,6 @@
 
 /* Returns the real part of the scalar product (*R,*S) */
 double scalar_prod_r(spinor * const S,spinor * const R, const int N, const int parallel);
+double scalar_prod_r_su3vect(su3_vector * const S,su3_vector * const R, const int N, const int parallel);
 
 #endif
diff --git a/linalg/scalar_prod_su3spinor.c b/linalg/scalar_prod_su3spinor.c
new file mode 100644
index 000000000..de84f1454
--- /dev/null
+++ b/linalg/scalar_prod_su3spinor.c
@@ -0,0 +1,230 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+/* $Id: scalar_prod.c 1173 2009-03-30 15:27:59Z urbach $ */
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#ifdef MPI
+#include <mpi.h>
+#endif
+#include "su3.h"
+#include "scalar_prod_su3spinor.h"
+
+#ifdef WITHLAPH
+complex_spinor scalar_prod_su3spinor(su3_vector * const S, spinor * const R, const int N, const int parallel){
+  int ix;
+  static double ks,kc,ds,tr,ts,tt;
+  su3_vector *s,*r;
+  complex_spinor c;
+#ifdef MPI
+  complex_spinor d;
+#endif
+
+  /* sc0.re */
+
+  ks=0.0;
+  kc=0.0;
+  for (ix = 0; ix < N; ix++)
+    {
+      s=(su3_vector *) S + ix;
+      r=&(R[ix].s0);
+    
+      ds=(*r).c0.re*(*s).c0.re+(*r).c0.im*(*s).c0.im+
+	(*r).c1.re*(*s).c1.re+(*r).c1.im*(*s).c1.im+
+	(*r).c2.re*(*s).c2.re+(*r).c2.im*(*s).c2.im;
+
+      /* Kahan Summation */    
+      tr=ds+kc;
+      ts=tr+ks;
+      tt=ts-ks;
+      ks=ts;
+      kc=tr-tt;
+    }
+  kc=ks+kc;
+  c.sc0.re = kc;
+
+  /* sc0.im */
+
+  ks=0.0;
+  kc=0.0;
+  for (ix=0;ix<N;ix++)
+    {
+      s=(su3_vector *) S + ix;
+      r=&(R[ix].s0);
+    
+      ds=-(*r).c0.re*(*s).c0.im+(*r).c0.im*(*s).c0.re-
+	(*r).c1.re*(*s).c1.im+(*r).c1.im*(*s).c1.re-
+	(*r).c2.re*(*s).c2.im+(*r).c2.im*(*s).c2.re;
+    
+      tr=ds+kc;
+      ts=tr+ks;
+      tt=ts-ks;
+      ks=ts;
+      kc=tr-tt;
+    }
+  kc=ks+kc;
+  c.sc0.im = kc;
+
+  /* sc1.re */
+
+  ks=0.0;
+  kc=0.0;
+  for (ix = 0; ix < N; ix++)
+    {
+      s=(su3_vector *) S + ix;
+      r=&(R[ix].s0);
+    
+      ds=(*r).c0.re*(*s).c0.re+(*r).c0.im*(*s).c0.im+
+	(*r).c1.re*(*s).c1.re+(*r).c1.im*(*s).c1.im+
+	(*r).c2.re*(*s).c2.re+(*r).c2.im*(*s).c2.im;
+
+      /* Kahan Summation */    
+      tr=ds+kc;
+      ts=tr+ks;
+      tt=ts-ks;
+      ks=ts;
+      kc=tr-tt;
+    }
+  kc=ks+kc;
+  c.sc1.re = kc;
+
+  /* sc1.im */
+
+  ks=0.0;
+  kc=0.0;
+  for (ix=0;ix<N;ix++)
+    {
+      s=(su3_vector *) S + ix;
+      r=&(R[ix].s1);
+    
+      ds=-(*r).c0.re*(*s).c0.im+(*r).c0.im*(*s).c0.re-
+	(*r).c1.re*(*s).c1.im+(*r).c1.im*(*s).c1.re-
+	(*r).c2.re*(*s).c2.im+(*r).c2.im*(*s).c2.re;
+    
+      tr=ds+kc;
+      ts=tr+ks;
+      tt=ts-ks;
+      ks=ts;
+      kc=tr-tt;
+    }
+  kc=ks+kc;
+  c.sc1.im = kc;
+
+  /* sc2.re */
+
+  ks=0.0;
+  kc=0.0;
+  for (ix = 0; ix < N; ix++)
+    {
+      s=(su3_vector *) S + ix;
+      r=&(R[ix].s2);
+    
+      ds=(*r).c0.re*(*s).c0.re+(*r).c0.im*(*s).c0.im+
+	(*r).c1.re*(*s).c1.re+(*r).c1.im*(*s).c1.im+
+	(*r).c2.re*(*s).c2.re+(*r).c2.im*(*s).c2.im;
+
+      /* Kahan Summation */    
+      tr=ds+kc;
+      ts=tr+ks;
+      tt=ts-ks;
+      ks=ts;
+      kc=tr-tt;
+    }
+  kc=ks+kc;
+  c.sc2.re = kc;
+
+  /* sc2.im */
+
+  ks=0.0;
+  kc=0.0;
+  for (ix=0;ix<N;ix++)
+    {
+      s=(su3_vector *) S + ix;
+      r=&(R[ix].s2);
+    
+      ds=-(*r).c0.re*(*s).c0.im+(*r).c0.im*(*s).c0.re-
+	(*r).c1.re*(*s).c1.im+(*r).c1.im*(*s).c1.re-
+	(*r).c2.re*(*s).c2.im+(*r).c2.im*(*s).c2.re;
+    
+      tr=ds+kc;
+      ts=tr+ks;
+      tt=ts-ks;
+      ks=ts;
+      kc=tr-tt;
+    }
+  kc=ks+kc;
+  c.sc2.im = kc;
+
+  /* sc3.re */
+
+  ks=0.0;
+  kc=0.0;
+  for (ix = 0; ix < N; ix++)
+    {
+      s=(su3_vector *) S + ix;
+      r=&(R[ix].s3);
+    
+      ds=(*r).c0.re*(*s).c0.re+(*r).c0.im*(*s).c0.im+
+	(*r).c1.re*(*s).c1.re+(*r).c1.im*(*s).c1.im+
+	(*r).c2.re*(*s).c2.re+(*r).c2.im*(*s).c2.im;
+
+      /* Kahan Summation */    
+      tr=ds+kc;
+      ts=tr+ks;
+      tt=ts-ks;
+      ks=ts;
+      kc=tr-tt;
+    }
+  kc=ks+kc;
+  c.sc3.re = kc;
+
+  /* sc3.im */
+
+  ks=0.0;
+  kc=0.0;
+  for (ix=0;ix<N;ix++)
+    {
+      s=(su3_vector *) S + ix;
+      r=&(R[ix].s3);
+    
+      ds=-(*r).c0.re*(*s).c0.im+(*r).c0.im*(*s).c0.re-
+	(*r).c1.re*(*s).c1.im+(*r).c1.im*(*s).c1.re-
+	(*r).c2.re*(*s).c2.im+(*r).c2.im*(*s).c2.re;
+    
+      tr=ds+kc;
+      ts=tr+ks;
+      tt=ts-ks;
+      ks=ts;
+      kc=tr-tt;
+    }
+  kc=ks+kc;
+  c.sc3.im = kc;
+
+#ifdef MPI
+  if(parallel == 1) {
+    d = c;
+    MPI_Allreduce(&d, &c, 4, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); //???
+  }
+#endif
+
+  return(c);
+}
+#endif // WITHLAPH
diff --git a/linalg/scalar_prod_su3spinor.h b/linalg/scalar_prod_su3spinor.h
new file mode 100644
index 000000000..3cffebf2d
--- /dev/null
+++ b/linalg/scalar_prod_su3spinor.h
@@ -0,0 +1,28 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+/* $Id: scalar_prod.h 1150 2009-02-16 16:52:09Z urbach $  */
+
+#ifndef _SCALAR_PRODSU3S_H
+#define _SCALAR_PRODSU3S_H
+
+#include "su3.h"
+/*  T_alpha=S_a x R_alpha,a^* */
+complex_spinor scalar_prod_su3spinor(su3_vector * const S,spinor * const R, const int N, const int parallel);
+
+#endif
diff --git a/linalg/square_norm.c b/linalg/square_norm.c
index 8e374fc66..54255a1ee 100644
--- a/linalg/square_norm.c
+++ b/linalg/square_norm.c
@@ -350,3 +350,38 @@ double square_norm(spinor * const P, const int N, const int parallel) {
   return kc;
 }
 #endif
+
+#ifdef WITHLAPH
+double square_norm_su3vect(su3_vector * const P, const int N, const int parallel) 
+{
+  int ix;
+  static double ks,kc,ds,tr,ts,tt;
+  su3_vector *s;
+
+  ks = 0.0;
+  kc = 0.0;
+  
+  for (ix  =  0; ix < N; ix++) 
+    {
+      s = P  + ix;
+    
+      ds = (*s).c0.re*(*s).c0.re + (*s).c0.im*(*s).c0.im + 
+	(*s).c1.re*(*s).c1.re + (*s).c1.im*(*s).c1.im + 
+	(*s).c2.re*(*s).c2.re + (*s).c2.im*(*s).c2.im; 
+   
+      tr = ds + kc;
+      ts = tr + ks;
+      tt = ts-ks;
+      ks = ts;
+      kc = tr-tt;
+    }
+  kc = ks + kc;
+#  ifdef MPI
+  if(parallel) {
+    MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+    return ks;
+  }
+#endif
+  return kc;
+}
+#endif
diff --git a/linalg/square_norm.h b/linalg/square_norm.h
index c1f863798..ad46fb6e9 100644
--- a/linalg/square_norm.h
+++ b/linalg/square_norm.h
@@ -27,6 +27,7 @@
  *     Returns the square norm of *P */
 
 double square_norm(spinor * const P, const int N, const int parallel);
+double square_norm_su3vect(su3_vector * const P, const int N, const int parallel);
 
 
 #endif
diff --git a/mpi_init.c b/mpi_init.c
index f9f566733..cedb1e97e 100644
--- a/mpi_init.c
+++ b/mpi_init.c
@@ -138,6 +138,16 @@ MPI_Datatype field_zt_slice_odd_dn_ot;
 MPI_Datatype field_zt_slice_odd_up_ot;
 # endif
 #endif
+#ifdef WITHLAPH
+MPI_Datatype su3vect_point;
+MPI_Datatype jfield_x_slice_cont;
+MPI_Datatype jfield_y_slice_cont;
+MPI_Datatype jfield_z_slice_cont;
+MPI_Datatype jfield_x_slice_gath;
+MPI_Datatype jfield_y_slice_gath;
+MPI_Datatype jfield_z_slice_gath;
+MPI_Datatype jfield_y_subslice;
+#endif
 
 #if ( defined PARALLELXYZT || defined PARALLELXYZ )
 MPI_Datatype field_z_slice_even_dn;
@@ -299,6 +309,7 @@ void tmlqcd_mpi_init(int argc,char *argv[]) {
   LY = LY/g_nproc_y;
   LZ = LZ/g_nproc_z;
   VOLUME = (T*LX*LY*LZ);
+  SPACEVOLUME = VOLUME/T;
 #    ifdef _USE_TSPLITPAR
   TEOSLICE = (LX*LY*LZ)/2;
 #    endif
@@ -330,6 +341,7 @@ void tmlqcd_mpi_init(int argc,char *argv[]) {
   /* Note that VOLUMEPLUSRAND is not always equal to VOLUME+RAND */
   /* VOLUMEPLUSRAND rather includes the edges */
   VOLUMEPLUSRAND = VOLUME + RAND + EDGES;
+  SPACERAND=RAND/T;
 #  endif /* ifndef FIXEDVOLUME */
   g_dbw2rand = (RAND + 2*EDGES);
 
@@ -578,7 +590,24 @@ void tmlqcd_mpi_init(int argc,char *argv[]) {
   MPI_Type_commit(&field_zt_slice_ext_L);
   MPI_Type_commit(&field_zt_slice_ext_S);
 # endif
+#endif
 
+#ifdef WITHLAPH
+  MPI_Type_contiguous(6, MPI_DOUBLE, &su3vect_point); 
+
+  MPI_Type_contiguous(LY*LZ, su3vect_point, &jfield_x_slice_cont);
+  MPI_Type_contiguous(LX*LZ, su3vect_point, &jfield_y_slice_cont);
+  MPI_Type_contiguous(LX*LY, su3vect_point, &jfield_z_slice_cont);
+  MPI_Type_contiguous(LY*LZ, su3vect_point, &jfield_x_slice_gath);
+  MPI_Type_contiguous(LZ, su3vect_point, &jfield_y_subslice);
+  MPI_Type_vector(LX, 1, LY, jfield_y_subslice, &jfield_y_slice_gath);
+  MPI_Type_vector(LX*LY, 1, LZ, su3vect_point, &jfield_z_slice_gath);
+  MPI_Type_commit(&jfield_x_slice_gath);
+  MPI_Type_commit(&jfield_x_slice_cont);
+  MPI_Type_commit(&jfield_y_slice_cont);
+  MPI_Type_commit(&jfield_y_slice_gath);
+  MPI_Type_commit(&jfield_z_slice_cont);
+  MPI_Type_commit(&jfield_z_slice_gath);
 #endif
 
   /* The internal z_ and zt_ slices are constructed in geometry() with MPI_Type_indexed() */
@@ -681,12 +710,14 @@ void tmlqcd_mpi_init(int argc,char *argv[]) {
 #  ifndef FIXEDVOLUME
   T = T_global;
   VOLUME = (T*LX*LY*LZ);
+  SPACEVOLUME = VOLUME/T;
 #    ifdef _USE_TSPLITPAR
   TEOSLICE = (LX*LY*LZ)/2;
 #    endif
   RAND = 0;
   EDGES = 0;
   VOLUMEPLUSRAND = VOLUME;
+  SPACERAND=0;
   N_PROC_T = 1;
   N_PROC_X = 1;
   N_PROC_Y = 1;
diff --git a/mpi_init.h b/mpi_init.h
index 6598eda5d..f7bc93526 100644
--- a/mpi_init.h
+++ b/mpi_init.h
@@ -106,6 +106,16 @@ extern MPI_Datatype field_zt_slice_odd_dn_ot;
 extern MPI_Datatype field_zt_slice_odd_up_ot;
 # endif
 #endif
+#ifdef WITHLAPH
+extern MPI_Datatype su3vect_point;
+extern MPI_Datatype jfield_x_slice_cont;
+extern MPI_Datatype jfield_y_slice_cont;
+extern MPI_Datatype jfield_z_slice_cont;
+extern MPI_Datatype jfield_x_slice_gath;
+extern MPI_Datatype jfield_y_slice_gath;
+extern MPI_Datatype jfield_z_slice_gath;
+extern MPI_Datatype jfield_y_subslice;
+#endif
 
 #if ( defined PARALLELXYZT || defined PARALLELXYZ )
 extern MPI_Datatype field_z_slice_even_dn;
diff --git a/solver/Makefile.in b/solver/Makefile.in
index b1aee68c8..9a45d473c 100644
--- a/solver/Makefile.in
+++ b/solver/Makefile.in
@@ -40,7 +40,8 @@ libsolver_TARGETS = bicgstab_complex gmres \
                     sub_low_ev cg_her_nd poly_precon \
                     generate_dfl_subspace dfl_projector \
                     cg_mms_tm solver_field sumr mixed_cg_her index_jd \
-                    dirac_operator_eigenvectors	spectral_proj
+                    dirac_operator_eigenvectors	spectral_proj \
+                    jdher_su3vect cg_her_su3vect eigenvalues_Jacobi
 
 libsolver_OBJECTS = $(addsuffix .o, ${libsolver_TARGETS})
 
diff --git a/solver/cg_her_su3vect.c b/solver/cg_her_su3vect.c
new file mode 100755
index 000000000..842a54ba1
--- /dev/null
+++ b/solver/cg_her_su3vect.c
@@ -0,0 +1,108 @@
+/***********************************************************************
+ *
+ * Copyright (C) 2001 Martin Hasenbusch
+ *               2003 Thomas Chiarappa
+ *               2002,2003,2004,2005,2010 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ **************************************************************************/
+
+/* ************************************************************************
+ * Conjugate Gradient for su3 vectors
+ * Authors: Luigi Scorzato, Marco Cristoforetti
+ *
+ **************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#ifdef MPI
+# include <mpi.h>
+#endif
+#include "global.h"
+#include "su3.h"
+#include "linalg_eo.h"
+#include "start.h"
+#include "solver/matrix_mult_typedef.h"
+#include "cg_her_su3vect.h"
+
+#ifdef WITHLAPH
+
+int cg_her_su3vect(su3_vector * const P, su3_vector * const Q, const int max_iter, 
+		   double eps_sq, const int rel_prec, const int N,const int tslice,  matrix_mult_su3vect f) {
+
+  static double normsq,pro,err,alpha_cg,beta_cg,squarenorm;
+  int iteration;
+  int save_sloppy = g_sloppy_precision;
+  double atime, etime;
+
+
+#ifdef MPI
+  atime = MPI_Wtime();
+#else
+  atime = ((double)clock())/((double)(CLOCKS_PER_SEC));
+#endif
+  squarenorm = square_norm_su3vect(Q, N, 1);
+
+  f(g_jacobi_field[0],P,tslice);
+
+  diff_su3vect(g_jacobi_field[1], Q, g_jacobi_field[0], N);
+  assign_su3vect(g_jacobi_field[2], g_jacobi_field[1], N);
+  normsq=square_norm_su3vect(g_jacobi_field[1], N, 1);
+  
+  /* main loop */
+  for(iteration = 1; iteration <= max_iter; iteration++) {
+    f(g_jacobi_field[0], g_jacobi_field[2],tslice);
+    pro = scalar_prod_r_su3vect(g_jacobi_field[2], g_jacobi_field[0], N, 1);
+    alpha_cg = normsq / pro;
+    assign_add_mul_r_su3vect(P, g_jacobi_field[2], alpha_cg, N);
+    
+    assign_mul_add_r_su3vect(g_jacobi_field[0], -alpha_cg, g_jacobi_field[1], N);
+    err=square_norm_su3vect(g_jacobi_field[0], N, 1);
+
+    if(g_proc_id == g_stdio_proc && g_debug_level > 1) {
+      printf("CG: iterations: %d res^2 %e\n", iteration, err);
+      fflush(stdout);
+    }
+    
+    if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) {
+      break;
+    }
+    beta_cg = err / normsq;
+    assign_mul_add_r_su3vect(g_jacobi_field[2], beta_cg, g_jacobi_field[0], N);
+    assign_su3vect(g_jacobi_field[1], g_jacobi_field[0], N);
+    normsq = err;
+  }
+#ifdef MPI
+  etime = MPI_Wtime();
+#else
+  etime = ((double)clock())/((double)(CLOCKS_PER_SEC));
+#endif
+  g_sloppy_precision = save_sloppy;
+  /* FLOPS= 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */
+  if(g_debug_level > 0  && g_proc_id == 0) {
+    printf("CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime); 
+  }
+  if(iteration > max_iter) return(-1);
+  return(iteration);
+}
+
+#endif // WITHLAPH
diff --git a/solver/cg_her_su3vect.h b/solver/cg_her_su3vect.h
new file mode 100755
index 000000000..85e9541e1
--- /dev/null
+++ b/solver/cg_her_su3vect.h
@@ -0,0 +1,28 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+#ifndef _CG_HERSU3V_H
+#define _CG_HERSU3V_H
+
+#include"solver/matrix_mult_typedef.h"
+#include"su3.h"
+
+int cg_her_su3vect(su3_vector * const P, su3_vector * const Q, const int max_iter, double eps_sq, const int rel_prec, 
+		   const int N, const int tslice, matrix_mult_su3vect f);
+
+#endif
diff --git a/solver/eigenvalues_Jacobi.c b/solver/eigenvalues_Jacobi.c
new file mode 100644
index 000000000..9fefc302a
--- /dev/null
+++ b/solver/eigenvalues_Jacobi.c
@@ -0,0 +1,228 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+/* ************************************************************************
+ * Main routine for the LapH_ev program: computes eigensystem of the Laplacian operator.
+ * Authors: Luigi Scorzato, Marco Cristoforetti
+ *
+ **************************************************************************/
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "global.h"
+#include "su3.h"
+#include <io/eospinor.h>
+#include <io/params.h>
+#include <io/gauge.h>
+#include <io/spinor.h>
+#include <io/utils.h>
+#include "jacobi.h"
+#include "solver/solver.h"
+#include "solver/jdher_su3vect.h"
+#include "solver/matrix_mult_typedef.h"
+#include "linalg_eo.h"
+#include "eigenvalues_Jacobi.h"
+
+#ifdef WITHLAPH
+
+su3_vector *eigenvectors_su3v = NULL;
+double *eigenvls_su3v = NULL;
+double max_eigenvalue_su3v;
+double * inv_eigenvls_su3v = NULL;
+
+int eigenvalues_for_cg_computed_su3v = 0;
+int evlength_su3v;
+
+double eigenvalues_Jacobi(int * nr_of_eigenvalues, const int max_iterations, 
+			  const double precision, const int maxmin,int tslice, 
+			  const int nstore) {
+  double returnvalue;
+  static int allocated = 0;
+
+#ifdef HAVE_LAPACK
+
+
+  int verbosity = 1, converged = 0, blocksize = 1 , blockwise=0;
+  int solver_it_max = 50, j_max, j_min;
+  double decay_min = 1.7, decay_max = 1.5, prec, threshold_min = 1.e-3, threshold_max = 5.e-2;
+volatile  int v0dim = 0;
+  matrix_mult_su3vect f;
+  int N=SPACEVOLUME, N2=(SPACEVOLUME + SPACERAND);
+  su3_vector * max_eigenvector_ = NULL, *max_eigenvector;
+  
+  int returncode=0;
+  int returncode2=0;
+  su3_vector *s;
+  double sqnorm;
+  
+  char filename[200];
+  char eigvl_filename[200];
+  //  int dims[]={T*g_nproc_t, LX*g_nproc_x, LY*g_nproc_y, LZ*g_nproc_z};
+  int dims[]={1, LX*g_nproc_x, LY*g_nproc_y, LZ*g_nproc_z};
+  FILE *efp;
+
+#ifdef MPI
+  double atime, etime;
+  MPI_File fp;
+  MPI_Offset siteSize=3*2*sizeof(double);
+  LemonRecordHeader *header;
+  LemonWriter *writer;
+#else
+  FILE *fp;
+  int siteSize=3*2*sizeof(double);
+#endif
+
+  f = &Jacobi;
+  evlength_su3v = N2;
+  
+  if(g_proc_id == g_stdio_proc && g_debug_level >0) 
+    {
+      printf("Number of %s eigenvalues to compute = %d\n",
+	     maxmin ? "maximal" : "minimal",(*nr_of_eigenvalues));
+      printf("Using Jacobi-Davidson method! \n");
+    }
+  if((*nr_of_eigenvalues) < 8){
+    j_max = 15;
+    j_min = 8;
+  }
+  else{
+    j_max = 2*(*nr_of_eigenvalues);
+    j_min = (*nr_of_eigenvalues);
+  }
+  if(precision < 1.e-14){
+    prec = 1.e-14;
+  }
+  else{
+    prec = precision;
+  }
+  max_eigenvector_= calloc(N2, sizeof(su3_vector));
+  max_eigenvector = max_eigenvector_;
+  
+  if(allocated == 0) 
+    {
+      allocated = 1;
+      eigenvectors_su3v = calloc(N2*(*nr_of_eigenvalues), sizeof(su3_vector));;
+      eigenvls_su3v = (double*)malloc((*nr_of_eigenvalues)*sizeof(double));
+      inv_eigenvls_su3v = (double*)malloc((*nr_of_eigenvalues)*sizeof(double));
+    }
+  
+  solver_it_max = 64;
+  /* compute the maximal one first */
+  /* DEBUG 
+  jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex),
+		50., 1.e-12, 
+		1, 15, 8, max_iterations, 1, 0, 0, NULL,
+		CG, solver_it_max,
+		threshold_max, decay_max, verbosity,
+		&converged, (complex*) max_eigenvector, (double*) &max_eigenvalue_su3v,
+		&returncode2, JD_MAXIMAL, 1,tslice,f);
+  */
+  
+#ifdef MPI
+  atime = MPI_Wtime();
+#endif
+  
+  /* (re-) compute minimal eigenvalues */
+  converged = 0;
+  solver_it_max = 256;
+  
+  if(maxmin)
+    jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex),
+		  50., prec, 
+		  (*nr_of_eigenvalues), j_max, j_min, 
+		  max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors_su3v,
+		  CG, solver_it_max,
+		  threshold_max, decay_max, verbosity,
+		  &converged, (complex*) eigenvectors_su3v, eigenvls_su3v,
+		  &returncode, JD_MAXIMAL, 1,tslice,
+		  f);
+  else
+    jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex),
+		  0., prec, 
+		  (*nr_of_eigenvalues), j_max, j_min, 
+		  max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors_su3v,
+		  CG, solver_it_max,
+		  threshold_min, decay_min, verbosity,
+		  &converged, (complex*) eigenvectors_su3v, eigenvls_su3v,
+		  &returncode, JD_MINIMAL, 1,tslice,
+		  f);
+  
+#ifdef MPI
+  etime = MPI_Wtime();
+  if(g_proc_id == 0) {
+    printf("Eigenvalues computed in %e sec. (MPI_Wtime)\n", etime-atime);
+    }
+#endif
+
+  
+  /* Printout eigenvalues.  */
+  if(g_proc_id == 0) {
+    sprintf(eigvl_filename,"eigenvalues.%.3d.%.4d", tslice, nstore);
+    efp=fopen(eigvl_filename,"w");
+    for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
+      fprintf(efp,"%e\n",eigenvls_su3v[v0dim]);
+    }
+    fclose(efp);    
+  }
+
+  /* Printout eigenvectors.  */
+  for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) {
+    sprintf(filename, "eigenvector.%.3d.%.3d.%.4d", v0dim, tslice, nstore);
+    s=(su3_vector*)&eigenvectors_su3v[v0dim*N2];
+#ifdef MPI 
+# ifdef HAVE_LIBLEMON
+    // SEGNO: dovrebbe stampare 8*2*3*SPACEVOLUME data per file, ma ne stampa 8*2*4n*SPACEVOLUME (n=4-1 per ev 0-3)
+
+    MPI_File_open(g_cart_grid, filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fp);
+    writer = lemonCreateWriter(&fp, g_cart_grid);
+    header = lemonCreateHeader(1 /* MB */, 1 /* ME */, "lattice-su3_vector-data",SPACEVOLUME*3*sizeof(complex));
+    lemonWriteRecordHeader(header, writer);
+    lemonDestroyHeader(header);
+    lemonWriteLatticeParallel(writer, s, siteSize, dims);
+    lemonWriterCloseRecord(writer);
+    lemonDestroyWriter(writer);
+    MPI_File_close(&fp);
+# else
+  if(g_proc_id == 0) {
+    printf("Cannot write eigenvectors: you need LEMON for writing eigenvectors with MPI\n");
+    }
+# endif
+#else
+    fp=fopen(filename,"wb");
+    fwrite(s,siteSize,SPACEVOLUME,fp);
+    fclose(fp);
+#endif // MPI
+    sqnorm=square_norm_su3vect(s,SPACEVOLUME,1);
+    if(g_proc_id == 0) {
+      printf("wrote eigenvector | |^2 = %e \n",sqnorm);
+    }
+  }
+
+  returnvalue=eigenvls_su3v[0];
+  free(max_eigenvector_);
+#else
+  fprintf(stderr, "lapack not available, so JD method for EV computation not available \n");
+#endif // LAPACK
+  return(returnvalue);
+}
+
+#endif // WITHLAPH
diff --git a/solver/eigenvalues_Jacobi.h b/solver/eigenvalues_Jacobi.h
new file mode 100755
index 000000000..4cfb668f2
--- /dev/null
+++ b/solver/eigenvalues_Jacobi.h
@@ -0,0 +1,34 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+#ifndef _EIGENVALUESJ_H
+#define _EIGENVALUESJ_H
+
+#include "su3.h"
+
+extern su3_vector *eigenvectors_su3v;
+extern double *eigenvls_su3v;
+extern double * inv_eigenvls_su3v;
+extern int eigenvalues_for_cg_computed_su3v;
+extern int no_eigenvalues_su3v;
+extern int evlength_su3v;
+
+double eigenvalues_Jacobi(int * nr_of_eigenvalues, const int max_iterations, 
+			  const double precision, const int maxmin, int tslice, const int nstore);
+
+#endif // _EIGENVALUESJ_H
diff --git a/solver/gram-schmidt.c b/solver/gram-schmidt.c
index 36cc79e22..a27966f47 100644
--- a/solver/gram-schmidt.c
+++ b/solver/gram-schmidt.c
@@ -123,6 +123,51 @@ void IteratedClassicalGS(complex v[], double *vnrm, int n, int m, complex A[],
   }
 }
 
+#ifdef WITHLAPH
+
+void IteratedClassicalGS_su3vect(complex v[], double *vnrm, int n, int m, complex A[],
+				 complex work1[], int lda) {
+  const double alpha = 0.5;
+
+  double vnrm_old;
+  int i, n2, isorth = 0;
+  int j;
+  complex CMONE, CONE, CZERO;
+
+#ifdef CRAY
+  char *cupl_n = "N";
+  _fcd fupl_n;
+  fupl_n = _cptofcd(cupl_n, strlen(cupl_n));
+#else
+  char *fupl_n = "N";
+#endif
+
+  n2 = 2*n;
+  CMONE.re = -1.; CMONE.im=0.;
+  CONE.re = 1.; CONE.im=0.;
+  CZERO.re = 0.; CZERO.im=0.;
+
+  vnrm_old = sqrt(square_norm_su3vect((su3_vector*) v, n*sizeof(complex)/sizeof(su3_vector),1));
+
+  for(i = 0; !isorth && i < max_cgs_it; i ++) {
+
+    for(j = 0; j < m; j++){
+      work1[j] = scalar_prod_su3vect((su3_vector*) (A+j*lda), (su3_vector*) v, n*sizeof(complex)/sizeof(su3_vector),1);
+    }
+#ifdef HAVE_LAPACK
+    _FT(zgemv)(fupl_n, &n, &m, &CMONE, A, &lda, work1, &ONE, &CONE, v, &ONE, 1);
+#endif
+    (*vnrm) = sqrt(square_norm_su3vect((su3_vector*) v, n*sizeof(complex)/sizeof(su3_vector),1));
+
+    isorth=((*vnrm) > alpha*vnrm_old);
+    vnrm_old = (*vnrm);
+  }
+  if (i >= max_cgs_it) {
+    /*     errorhandler(400,""); */
+  }
+}
+
+#endif // WITHLAPH
 
 /*
  *  ModifiedGramSchmidt 
@@ -159,3 +204,20 @@ void ModifiedGS(complex v[], int n, int m, complex A[], int lda) {
   }
 }
 
+#ifdef WITHLAPH
+
+void ModifiedGS_su3vect(complex v[], int n, int m, complex A[], int lda) {
+
+  int i;
+  complex s;
+
+  for (i = 0; i < m; i ++) {
+    s = scalar_prod_su3vect((su3_vector*) (A+i*lda), (su3_vector*) v, n*sizeof(complex)/sizeof(su3_vector),1);
+    s.re = -s.re; s.im = -s.im;
+#ifdef HAVE_LAPACK
+    _FT(zaxpy)(&n, &s, A+i*lda, &ONE, v, &ONE);
+#endif
+  }
+}
+
+#endif // WITHLAPH
diff --git a/solver/gram-schmidt.h b/solver/gram-schmidt.h
index ff85296e7..33ed90268 100644
--- a/solver/gram-schmidt.h
+++ b/solver/gram-schmidt.h
@@ -23,8 +23,11 @@
 void IteratedClassicalGS_old(complex v[], double *vnrm, int n, int m, complex A[], complex work1[]);
 void IteratedClassicalGS(complex v[], double *vnrm, int n, int m, complex A[], 
 			 complex work1[], int lda) ;
+void IteratedClassicalGS_su3vect(complex v[], double *vnrm, int n, int m, complex A[],
+				 complex work1[], int lda);
 
 void ModifiedGS_old(complex v[], int n, int m, complex A[]);
 void ModifiedGS(complex v[], int n, int m, complex A[], int lda);
+void ModifiedGS_su3vect(complex v[], int n, int m, complex A[], int lda);
 
 #endif
diff --git a/solver/jdher_su3vect.c b/solver/jdher_su3vect.c
new file mode 100644
index 000000000..ad5038fe8
--- /dev/null
+++ b/solver/jdher_su3vect.c
@@ -0,0 +1,828 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+/* 
+ * Routines for the computation of eigensystems of the Laplacian operator, with Jacobi-Davidson algo.
+ * Authors Luigi Scorzato, Marco Cristoforetti
+ *
+ *
+ *******************************************************************************/
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <limits.h>
+#include <stdio.h>
+#include <float.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include <errno.h>
+#include "global.h"
+#include "su3.h"
+#include "linalg/fortran.h"
+#include "linalg/blas.h"
+#include "linalg/lapack.h"
+#include "linalg_eo.h"
+#include "solver/solver.h"
+#include "solver/gram-schmidt.h"
+#include "solver/quicksort.h"
+#include "cg_her_su3vect.h"
+#include "jdher_su3vect.h"
+#ifdef CRAY
+#include <fortran.h>
+#endif
+
+#ifdef WITHLAPH
+
+
+#define min(a,b) ((a)<(b) ? (a) : (b))
+#define max(a,b) ((a)<(b) ? (b) : (a))
+
+/****************************************************************************
+ *                                                                          *
+ * Prototypes of static functions                                           *
+ *                                                                          *
+ ****************************************************************************/
+static void print_status_su3vect(int clvl, int it, int k, int j, int kmax, 
+				 int blksize, int actblksize,
+				 double *s, double *resnrm, int *actcorrits);
+static void sorteig_su3vect(int j, double S[], complex U[], int ldu, double tau,
+			    double dtemp[], int idx1[], int idx2[], int strategy);
+
+/* Projection routines */
+void Proj_A_psi_su3vect(su3_vector * const y, su3_vector * const x, int tslice);
+
+void jderrorhandler_su3vect(const int i, char * message) 
+{
+  fprintf(stderr, "jdher %s \n", message);
+#ifdef MPI
+  MPI_Finalize();
+#endif
+  exit(i);
+}
+
+/****************************************************************************
+ *                                                                          *
+ * Static variables                                                         *
+ *                                                                          *
+ ****************************************************************************/
+/* static double DMONE = -1.0, DZER = 0.0, DONE = 1.0; */
+static int MONE = -1, ONE = 1;
+static complex CONE, CZERO, CMONE;
+
+/* Projector variables */
+
+static int p_n, p_n2, p_k, p_lda;
+static double p_theta;
+complex * p_Q;
+complex * p_work;
+matrix_mult_su3vect p_A_psi_s3;
+
+static char * fupl_u = "U", * fupl_c = "C", *fupl_n = "N", * fupl_a = "A", *fupl_v = "V", *filaenv = "zhetrd", *fvu = "VU";
+
+void jdher_su3vect(int n, int lda, double tau, double tol, 
+	   int kmax, int jmax, int jmin, int itmax,
+	   int blksize, int blkwise, 
+	   int V0dim, complex *V0, 
+	   int solver_flag, 
+	   int linitmax, double eps_tr, double toldecay,
+	   int verbosity,
+	   int *k_conv, complex *Q, double *lambda, int *it,
+	   int maxmin, int shift_mode, int tslice,
+	   matrix_mult_su3vect A_psi)
+{
+/*******************
+ * Local variables *
+ *******************/
+  
+/* constants                                                  */
+/* allocatables:                                              *
+ * initialize with NULL, so we can free even unallocated ptrs */
+double *s = NULL, *resnrm = NULL, *resnrm_old = NULL, *dtemp = NULL, *rwork = NULL;
+volatile complex *V_ = NULL;
+volatile complex  *V; 
+complex *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL, *Res_ = NULL, *Res, *eigwork = NULL, *temp1_ = NULL, *temp1;
+int *idx1 = NULL, *idx2 = NULL, *convind = NULL, *keepind = NULL, *solvestep = NULL, *actcorrits = NULL;
+
+/* non-allocated ptrs */
+complex *q, *v, *u, *r = NULL;  
+/* scalar vars */
+double theta, alpha, it_tol;
+int i, k, j, actblksize, eigworklen, found, conv, keep, n2;
+int act, cnt, idummy, info, CntCorrIts=0, endflag=0;
+int N=n*sizeof(complex)/sizeof(su3_vector);
+int IDIST = 1;
+int ISEED[4] = {2, 3, 5, 7};
+ ISEED[0] = 2;
+  
+	/* print info header */
+ if ((verbosity > 0) && (g_proc_id == 0)){
+   printf("Jacobi-Davidson method for hermitian Matrices\n");
+   printf("Solving  A*x = lambda*x \n\n");
+   printf("  N=      %10d  ITMAX=%4d\n", n, itmax);
+   printf("  KMAX=%3d  JMIN=%3d  JMAX=%3d  V0DIM=%3d\n", 
+	  kmax, jmin, jmax, V0dim);
+   printf("  BLKSIZE=        %2d  BLKWISE=      %5s\n", 
+	  blksize, blkwise ? "TRUE" : "FALSE");
+   printf("  TOL=  %11.4e TAU=  %11.4e\n", 
+	  tol, tau);
+   printf("  LINITMAX=    %5d  EPS_TR=  %10.3e  TOLDECAY=%9.2e\n", 
+	  linitmax, eps_tr, toldecay);
+   printf("\n Computing %s eigenvalues\n",
+	  maxmin ? "maximal" : "minimal");
+   printf("\n");
+   fflush( stdout );
+ }
+   /* validate input parameters */
+ if(tol <= 0) jderrorhandler(401,"");
+ if(kmax <= 0 || kmax > n) jderrorhandler(402,"");
+ if(jmax <= 0 || jmax > n) jderrorhandler(403,"");
+ if(jmin <= 0 || jmin > jmax) jderrorhandler(404,"");
+ if(itmax < 0) jderrorhandler(405,"");
+ if(blksize > jmin || blksize > (jmax - jmin)) jderrorhandler(406,"");
+ if(blksize <= 0 || blksize > kmax) jderrorhandler(406,"");
+ if(blkwise < 0 || blkwise > 1) jderrorhandler(407,"");
+ if(V0dim < 0 || V0dim >= jmax) jderrorhandler(408,"");
+ if(linitmax < 0) jderrorhandler(409,"");
+ if(eps_tr < 0.) jderrorhandler(500,"");
+ if(toldecay <= 1.0) jderrorhandler(501,"");
+ 
+ CONE.re=1.; CONE.im=0.;
+ CZERO.re=0.; CZERO.im=0.;
+ CMONE.re=-1.; CMONE.im=0.;
+ 
+ /* Get hardware-dependent values:
+   * Opt size of workspace for ZHEEV is (NB+1)*j, where NB is the opt.
+   * block size... */
+ eigworklen = (2 + _FT(ilaenv)(&ONE, filaenv, fvu, &jmax, &MONE, &MONE, &MONE, 6, 2)) * jmax;
+
+ if((void*)(V_ = (complex *)malloc((lda * jmax + 4) * sizeof(complex))) == NULL) 
+   {
+     errno = 0;
+     jderrorhandler(300,"V in jdher");
+  }
+ V = V_;
+ if((void*)(U = (complex *)malloc(jmax * jmax * sizeof(complex))) == NULL) 
+   {
+     jderrorhandler(300,"U in jdher");
+   }
+ if((void*)(s = (double *)malloc(jmax * sizeof(double))) == NULL) 
+   {
+     jderrorhandler(300,"s in jdher");
+   }
+ if((void*)(Res_ = (complex *)malloc((lda * blksize+4) * sizeof(complex))) == NULL) 
+   {
+     jderrorhandler(300,"Res in jdher");
+   }
+ Res = Res_;
+ 
+ if((void*)(resnrm = (double *)malloc(blksize * sizeof(double))) == NULL) 
+   {
+     jderrorhandler(300,"resnrm in jdher");
+   }
+ if((void*)(resnrm_old = (double *)calloc(blksize,sizeof(double))) == NULL) 
+   {
+     jderrorhandler(300,"resnrm_old in jdher");
+   }
+ if((void*)(M = (complex *)malloc(jmax * jmax * sizeof(complex))) == NULL) 
+   {
+     jderrorhandler(300,"M in jdher");
+   }
+ if((void*)(Vtmp = (complex *)malloc(jmax * jmax * sizeof(complex))) == NULL) 
+   {
+     jderrorhandler(300,"Vtmp in jdher");
+   }
+ if((void*)(p_work = (complex *)malloc(lda * sizeof(complex))) == NULL) 
+   {
+     jderrorhandler(300,"p_work in jdher");
+   }
+ 
+ /* ... */
+ if((void*)(idx1 = (int *)malloc(jmax * sizeof(int))) == NULL) 
+   {
+     jderrorhandler(300,"idx1 in jdher");
+   }
+ if((void*)(idx2 = (int *)malloc(jmax * sizeof(int))) == NULL) 
+   {
+     jderrorhandler(300,"idx2 in jdher");
+   }
+ 
+ /* Indices for (non-)converged approximations */
+ if((void*)(convind = (int *)malloc(blksize * sizeof(int))) == NULL) 
+   {
+     jderrorhandler(300,"convind in jdher");
+   }
+ if((void*)(keepind = (int *)malloc(blksize * sizeof(int))) == NULL) 
+   {
+     jderrorhandler(300,"keepind in jdher");
+   }
+ if((void*)(solvestep = (int *)malloc(blksize * sizeof(int))) == NULL) 
+   {
+     jderrorhandler(300,"solvestep in jdher");
+   }
+ if((void*)(actcorrits = (int *)malloc(blksize * sizeof(int))) == NULL) 
+   {
+     jderrorhandler(300,"actcorrits in jdher");
+   }
+ 
+ if((void*)(eigwork = (complex *)malloc(eigworklen * sizeof(complex))) == NULL) 
+   {
+     jderrorhandler(300,"eigwork in jdher");
+   }
+ if((void*)(rwork = (double *)malloc(3*jmax * sizeof(double))) == NULL) 
+   {
+     jderrorhandler(300,"rwork in jdher");
+   }
+ if((void*)(temp1_ = (complex *)malloc((lda+4) * sizeof(complex))) == NULL) 
+   {
+     jderrorhandler(300,"temp1 in jdher");
+   }
+ temp1 = temp1_;
+ if((void*)(dtemp = (double *)malloc(lda * sizeof(complex))) == NULL) 
+   {
+     jderrorhandler(300,"dtemp in jdher");
+   }
+ 
+ /* Set variables for Projection routines */
+ n2 = 2*n;
+ p_n = n;
+ p_n2 = n2;
+ p_Q = Q;
+ p_A_psi_s3 = A_psi;
+ p_lda = lda;
+
+  /**************************************************************************
+   *                                                                        *
+   * Generate initial search subspace V. Vectors are taken from V0 and if   *
+   * necessary randomly generated.                                          *
+   *                                                                        *
+   **************************************************************************/
+
+  /* copy V0 to V */
+  _FT(zlacpy)(fupl_a, &n, &V0dim, V0, &lda, V, &lda, 1);
+  j = V0dim;
+  /* if V0dim < blksize: generate additional random vectors */
+  if (V0dim < blksize) 
+	{
+    idummy = (blksize - V0dim)*n; /* nof random numbers */
+    _FT(zlarnv)(&IDIST, ISEED, &idummy, V + V0dim*lda);
+    j = blksize;
+  }
+  for (cnt = 0; cnt < j; cnt ++) 
+	{
+    ModifiedGS_su3vect(V + cnt*lda, n, cnt, V, lda);
+    alpha = sqrt(square_norm_su3vect((su3_vector*)(V+cnt*lda), N, 1));
+    alpha = 1.0 / alpha;
+    _FT(dscal)(&n2, &alpha, (double *)(V + cnt*lda), &ONE);
+  }
+  /* Generate interaction matrix M = V^dagger*A*V. Only the upper triangle
+     is computed. */
+  for (cnt = 0; cnt < j; cnt++)	{
+    /* WARNING: this assumes that A_psi updates the boundaries of the input vector */
+    A_psi((su3_vector*) temp1, (su3_vector*) (V+cnt*lda), tslice);
+    idummy = cnt+1;
+    for(i = 0; i < idummy; i++) {
+		  M[cnt*jmax+i] = scalar_prod_su3vect((su3_vector*)(V+i*lda), (su3_vector*) temp1, N, 1);
+    }
+  }
+
+  /* Other initializations */
+  k = 0; (*it) = 0; 
+  if((*k_conv) > 0) 
+	{
+    k = (*k_conv);
+  }
+
+  actblksize = blksize; 
+  for(act = 0; act < blksize; act ++)
+	{
+    solvestep[act] = 1;
+  }
+
+
+  /****************************************************************************
+   *                                                                          *
+   * Main JD-iteration loop                                                   *
+   *                                                                          *
+   ****************************************************************************/
+
+  while((*it) < itmax) 
+    {
+      /****************************************************************************
+       *                                                                          *
+       * Solving the projected eigenproblem                                       *
+       *                                                                          *
+       * M*u = V^dagger*A*V*u = s*u                                               *
+       * M is hermitian, only the upper triangle is stored                        *
+       *                                                                          *
+       ****************************************************************************/
+      _FT(zlacpy)(fupl_u, &j, &j, M, &jmax, U, &jmax, 1);
+      _FT(zheev)(fupl_v, fupl_u, &j, U, &jmax, s, eigwork, &eigworklen, rwork, &info, 1, 1); 
+
+      if (info != 0) 
+	{
+	  printf("error solving the projected eigenproblem.");
+	  printf(" zheev: info = %d\n", info);
+	}
+      if(info != 0) jderrorhandler(502,"proble in zheev");
+  
+
+      /* Reverse order of eigenvalues if maximal value is needed */
+      if(maxmin == 1)
+	{
+	  sorteig_su3vect(j, s, U, jmax, s[j-1], dtemp, idx1, idx2, 0); 
+	}
+      else
+	{
+	  sorteig_su3vect(j, s, U, jmax, 0., dtemp, idx1, idx2, 0); 
+	}
+      /****************************************************************************
+       *                                                                          *
+       * Convergence/Restart Check                                                *
+       *                                                                          *
+       * In case of convergence, strip off a whole block or just the converged    *
+       * ones and put 'em into Q.  Update the matrices Q, V, U, s                 *
+       *                                                                          *
+       * In case of a restart update the V, U and M matrices and recompute the    *
+       * Eigenvectors                                                             *
+       *                                                                          *
+       ****************************************************************************/
+      
+      found = 1;
+      while(found) 
+	{
+	  /* conv/keep = Number of converged/non-converged Approximations */
+	  conv = 0; keep = 0;
+	  for(act=0; act < actblksize; act++)
+	    {
+	      /* Setting pointers for single vectors */
+	      q = Q + (act+k)*lda; 
+	      u = U + act*jmax; 
+	      r = Res + act*lda; 	
+	      /* Compute Ritz-Vector Q[:,k+cnt1]=V*U[:,cnt1] */
+	      theta = s[act];
+	      _FT(zgemv)(fupl_n, &n, &j, &CONE, V, &lda, u, &ONE, &CZERO, q, &ONE, 1);
+	      /* Compute the residual */
+	      A_psi((su3_vector*) r, (su3_vector*) q,tslice); 
+	      theta = -theta;
+	      _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE);
+	      
+	      /* Compute norm of the residual and update arrays convind/keepind*/
+	      resnrm_old[act] = resnrm[act];
+	      resnrm[act] = sqrt(square_norm_su3vect((su3_vector*) r, N, 1));
+	      if (resnrm[act] < tol)
+		{
+		  convind[conv] = act; 
+		  conv = conv + 1; 
+		}
+	      else
+		{
+		  keepind[keep] = act; 
+		  keep = keep + 1; 
+		}
+	    }  /* for(act = 0; act < actblksize; act ++) */
+	  /* Check whether the blkwise-mode is chosen and ALL the
+	     approximations converged, or whether the strip-off mode is
+	     active and SOME of the approximations converged */
+	  found = ((blkwise==1 && conv==actblksize) || (blkwise==0 && conv!=0)) 
+	    && (j > actblksize || k == kmax - actblksize);
+	  /***************************************************************************
+	   *                                                                        *
+	   * Convergence Case                                                       *
+	   *                                                                        *
+	   * In case of convergence, strip off a whole block or just the converged  *
+	   * ones and put 'em into Q.  Update the matrices Q, V, U, s               *
+	   *                                                                        *
+	   **************************************************************************/
+	  if (found) 
+	    {
+	      /* Store Eigenvalues */
+	      for(act = 0; act < conv; act++)
+		lambda[k+act] = s[convind[act]];
+	      /* Re-use non approximated Ritz-Values */
+	      for(act = 0; act < keep; act++)
+		s[act] = s[keepind[act]];
+	      /* Shift the others in the right position */
+	      for(act = 0; act < (j-actblksize); act ++)
+		s[act+keep] = s[act+actblksize];
+	      /* Update V. Re-use the V-Vectors not looked at yet. */
+	      idummy = j - actblksize;
+	      for (act = 0; act < n; act = act + jmax) 
+		{
+		  cnt = act + jmax > n ? n-act : jmax;
+		  _FT(zlacpy)(fupl_a, &cnt, &j, V+act, &lda, Vtmp, &jmax, 1);
+		  _FT(zgemm)(fupl_n, fupl_n, &cnt, &idummy, &j, &CONE, Vtmp, 
+			     &jmax, U+actblksize*jmax, &jmax, &CZERO, V+act+keep*lda, &lda, 1, 1);
+		}
+	      /* Insert the not converged approximations as first columns in V */
+	      for(act = 0; act < keep; act++)
+		{
+		  _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+keepind[act])*lda,&lda,V+act*lda,&lda,1);
+		}
+	      /* Store Eigenvectors */
+	      for(act = 0; act < conv; act++)
+		{
+		  _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+convind[act])*lda,&lda,Q+(k+act)*lda,&lda,1);
+		}
+	      /* Update SearchSpaceSize j */
+	      j = j - conv;
+	      /* Let M become a diagonalmatrix with the Ritzvalues as entries ... */ 
+	      _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1);
+	      for (act = 0; act < j; act++)
+		{
+		  M[act*jmax + act].re = s[act];
+		}
+	      /* ... and U the Identity(jnew,jnew) */
+	      _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1);
+	      if(shift_mode == 1)
+		{
+		  if(maxmin == 0)
+		    {
+		      for(act = 0; act < conv; act ++)
+			{
+			  if (lambda[k+act] > tau)
+			    {
+			      tau = lambda[k+act];
+			    }
+			}
+		    }
+		  else
+		    {
+		      for(act = 0; act < conv; act ++)
+			{
+			  if (lambda[k+act] < tau)
+			    {
+			      tau = lambda[k+act];
+			    }
+			} 
+		    }
+		}
+	      /* Update Converged-Eigenpair-counter and Pro_k */
+	      k = k + conv;
+	      /* Update the new blocksize */
+	      actblksize=min(blksize, kmax-k);
+	      /* Exit main iteration loop when kmax eigenpairs have been  approximated */
+	      if (k == kmax)
+		{
+		  endflag = 1;
+		  break;
+		}
+	      /* Counter for the linear-solver-accuracy */
+	      for(act = 0; act < keep; act++)
+		solvestep[act] = solvestep[keepind[act]];
+	      /* Now we expect to have the next eigenvalues */
+	      /* allready with some accuracy                */
+	      /* So we do not need to start from scratch... */
+	      for(act = keep; act < blksize; act++)
+		solvestep[act] = 1;
+	    } /* if(found) */
+	  if(endflag == 1)
+	    {
+	      break;
+	    }
+	  /**************************************************************************
+	   *                                                                        *
+	   * Restart                                                                *
+	   *                                                                        *
+	   * The Eigenvector-Aproximations corresponding to the first jmin          *
+	   * Petrov-Vectors are kept.  if (j+actblksize > jmax)                     *
+	   *                                                                        *
+	   **************************************************************************/
+	  if (j+actblksize > jmax) 
+	    {
+	      idummy = j; j = jmin;
+	      
+	      for (act = 0; act < n; act = act + jmax) 
+		{ /* V = V * U(:,1:j) */
+		  cnt = act+jmax > n ? n-act : jmax;
+		  _FT(zlacpy)(fupl_a, &cnt, &idummy, V+act, &lda, Vtmp, &jmax, 1);
+		  _FT(zgemm)(fupl_n, fupl_n, &cnt, &j, &idummy, &CONE, Vtmp, 
+			     &jmax, U, &jmax, &CZERO, V+act, &lda, 1, 1);
+		}
+	      _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1);
+	      _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1);
+	      for (act = 0; act < j; act++)
+		M[act*jmax + act].re = s[act];
+	    }
+	} /* while(found) */    
+      
+      if(endflag == 1)
+	{
+	  break;
+	}
+      
+      /****************************************************************************
+       *                                                                          *
+       * Solving the correction equations                                         *
+       *                                                                          *
+       *                                                                          *
+       ****************************************************************************/
+      
+      /* Solve actblksize times the correction equation ... */
+      for (act = 0; act < actblksize; act ++) 
+	{      
+	  /* Setting start-value for vector v as zeros(n,1). Guarantees orthogonality */
+	  v = V + j*lda;
+	  for (cnt = 0; cnt < n; cnt ++)
+	    { 
+	      v[cnt].re = 0.;
+	      v[cnt].im = 0.;
+	    }
+	  /* Adaptive accuracy and shift for the lin.solver. In case the
+	     residual is big, we don't need a too precise solution for the
+	     correction equation, since even in exact arithmetic the
+	     solution wouldn't be too usefull for the Eigenproblem. */
+	  r = Res + act*lda;
+	  if (resnrm[act] < eps_tr && resnrm[act] < s[act] && resnrm_old[act] > resnrm[act])
+	    {
+	      p_theta = s[act];
+	    }
+	  else
+	    {
+	      p_theta = tau;
+	    }
+	  p_k = k + actblksize;
+	  
+	  /* if we are in blockwise mode, we do not want to */
+	  /* iterate solutions much more, if they have      */
+	  /* allready the desired precision                 */
+	  if(blkwise == 1 && resnrm[act] < tol) 
+	    {
+	      it_tol = pow(toldecay, (double)(-5));
+	    }
+	  else 
+	    {
+	      it_tol = pow(toldecay, (double)(-solvestep[act]));
+	    }
+	  solvestep[act] = solvestep[act] + 1;
+	  
+	  /* equation and project if necessary */
+	  ModifiedGS_su3vect(r, n, k + actblksize, Q, lda);
+	  
+	  /* Solve the correction equation ...  */
+	  g_sloppy_precision = 1;
+	  if(solver_flag == CG)
+	    {
+	      info = cg_her_su3vect((su3_vector*) v, (su3_vector*) r, linitmax, it_tol*it_tol, 0, 
+				    n*sizeof(complex)/sizeof(su3_vector),tslice, &Proj_A_psi_su3vect);
+	    }
+	  g_sloppy_precision = 0;
+	  
+	  /* Actualizing profiling data */
+	  if (info == -1)
+	    {
+	      CntCorrIts += linitmax;
+	    }
+	  else
+	    {
+	      CntCorrIts += info;
+	    }
+	  actcorrits[act] = info;
+	  
+	  /* orthonormalize v to Q, cause the implicit
+	     orthogonalization in the solvers may be too inaccurate. Then
+	     apply "IteratedCGS" to prevent numerical breakdown 
+	     in order to orthogonalize v to V */
+	  
+	  ModifiedGS_su3vect(v, n, k+actblksize, Q, lda);
+	  IteratedClassicalGS_su3vect(v, &alpha, n, j, V, temp1, lda);
+	  
+	  alpha = 1.0 / alpha;
+	  _FT(dscal)(&n2, &alpha, (double*) v, &ONE);
+	  
+	  /* update interaction matrix M */
+	  A_psi((su3_vector*) temp1, (su3_vector*) v, tslice);
+	  idummy = j+1;
+	  for(i = 0; i < idummy; i++) {
+	    M[j*jmax+i] = scalar_prod_su3vect((su3_vector*) (V+i*lda), (su3_vector*) temp1, N, 1);
+	  }
+	  
+	  /* Increasing SearchSpaceSize j */
+	  j ++;
+	}   /* for (act = 0;act < actblksize; act ++) */    
+      
+      /* Print information line */
+      if(g_proc_id == 0) {
+	print_status_su3vect(verbosity, *it, k, j - blksize, kmax, blksize, actblksize, 
+			     s, resnrm, actcorrits);
+      }
+      /* Increase iteration-counter for outer loop  */
+      (*it) = (*it) + 1;
+    } /* Main iteration loop */
+  
+  /******************************************************************
+   *                                                                *
+   * Eigensolutions converged or iteration limit reached            *
+   *                                                                *
+   * Print statistics. Free memory. Return.                         *
+   *                                                                *
+   ******************************************************************/
+
+  (*k_conv) = k;
+  if (g_proc_id == 0 && verbosity > 0) {
+    printf("\nJDHER execution statistics\n\n");
+    printf("IT_OUTER=%d   IT_INNER_TOT=%d   IT_INNER_AVG=%8.2f\n",
+	   (*it), CntCorrIts, (double)CntCorrIts/(*it));
+    printf("\nConverged eigensolutions in order of convergence:\n");
+    printf("#  I              LAMBDA(I)      RES(I)\n");
+    printf("#---------------------------------------\n");
+  }    
+  for (act = 0; act < *k_conv; act ++) 
+	{
+    /* Compute the residual for solution act */
+    q = Q + act*lda;
+    theta = -lambda[act];
+    A_psi((su3_vector*) r, (su3_vector*) q,tslice);
+    _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE);
+    alpha = sqrt(square_norm_su3vect((su3_vector*) r, N, 1));
+    if(g_proc_id == 0 && verbosity > 0) {
+      printf("%3d %22.15e %12.5e\n", act+1, lambda[act], alpha);
+    }
+  }
+  if(g_proc_id == 0 && verbosity > 0) 
+	{
+    printf("\n");
+    fflush( stdout );
+  }
+  free(V_); free(Vtmp); free(U); 
+  free(s); free(Res_); 
+  free(resnrm); free(resnrm_old); 
+  free(M); free(Z);
+  free(eigwork); free(temp1_);
+  free(dtemp); free(rwork);
+  free(p_work);
+  free(idx1); free(idx2); 
+  free(convind); free(keepind); free(solvestep); free(actcorrits);
+  
+} /* jdher(.....) */
+
+/****************************************************************************
+ *                                                                          *
+ * Supporting functions                                                     *
+ *                                                                          *
+ ****************************************************************************/
+
+/* PRINT_STATUS - print status line (called for each outer iteration)
+ */
+static void print_status_su3vect(int verbosity, int it, int k, int j, int kmax, 
+				 int blksize, int actblksize,
+				 double *s, double *resnrm, int *actcorrits) {
+  const int max_vals = 5;
+
+  int i, idummy;
+
+  if (verbosity >= 2) {
+    if (blksize == 1) {
+      if (it == 0) {
+	printf("  IT   K   J       RES LINIT RITZ-VALUES(1:5)\n");
+	idummy = 28 + ( 13 > max_vals*10 ? 13 : max_vals*10);
+	for (i = 0; i < idummy; i ++)
+	  putchar('-');
+	printf("\n");
+      }
+      printf("%4d %3d %3d %9.2e %5d", it + 1, k, j, resnrm[0], actcorrits[0]);
+      for (i = 0; i < (j < max_vals ? j : max_vals); i ++){
+	printf(" %9.2e", s[i]);
+      }
+      printf("\n");
+      fflush( stdout );
+    }
+    else {			/* blksize > 1 */
+      if (it == 0) {
+	printf("  IT   K   J  RITZVALS ");
+	for (i = 1; i < actblksize; i ++)
+	  printf("          ");
+	printf("   RES      ");
+	for (i = 1; i < actblksize; i ++)
+	  printf("          ");
+	printf("      LINIT\n");
+	idummy = 12 + 4 + blksize*(10 + 10 + 5);
+	for (i = 0; i < idummy; i ++)
+	  putchar('-');
+	printf("\n");
+      }
+      printf("%4d %3d %3d", it + 1, k, j);
+      for (i = 0; i < blksize; i ++)
+	if (i < actblksize)
+	  printf(" %9.2e", s[i]);
+	else
+	  printf("          ");
+      printf("  ");
+      for (i = 0; i < blksize; i ++)
+	if (i < actblksize)
+	  printf(" %9.2e", resnrm[i]);
+	else
+	  printf("          ");
+      printf("  ");
+      for (i = 0; i < blksize; i ++)
+	if (i < actblksize)
+	  printf(" %5d", actcorrits[i]);
+	else
+	  printf("     ");
+      printf("\n");
+      fflush( stdout );
+    }
+  }
+}
+
+/*
+ * SORTEIG
+ *
+ * Default behaviour (strategy == 0):
+ *
+ *   Sort eigenpairs (S(i),U(:,i)), such that 
+ *
+ *       |S(i) - tau| <= |S(i+1) -tau| for i=1..j-1.
+ *
+ *     j  : dimension of S
+ *     ldu: leading dimension of U
+ *   dtemp: double array of length j
+ *     idx: int array of length j
+ *
+ * Alternate behaviour (strategy == 1):
+ *
+ *   Same as above but put all S(i) < tau to the end. This is used to
+ *   avoid computation of zero eigenvalues.
+ */
+
+static void sorteig_su3vect(int j, double S[], complex U[], int ldu, double tau,
+			    double dtemp[], int idx1[], int idx2[], int strategy){
+  int i;
+
+  /* setup vector to be sorted and index vector */
+  switch (strategy) {
+  case 0:
+    for (i = 0; i < j; i ++)
+      dtemp[i] = fabs(S[i] - tau);
+    break;
+  case 1:
+    for (i = 0; i < j; i ++)
+      if (S[i] < tau)
+	dtemp[i] = DBL_MAX;
+      else
+	dtemp[i] = fabs(S[i] - tau);
+    break;
+  default:
+    jderrorhandler(503,"");;
+  }
+  for (i = 0; i < j; i ++)
+    idx1[i] = i;
+
+  /* sort dtemp in ascending order carrying itemp along */
+  quicksort(j, dtemp, idx1);
+
+  /* compute 'inverse' index vector */
+  for (i = 0; i < j; i ++)
+    idx2[idx1[i]] = i;
+
+  /* sort eigenvalues */
+  memcpy(dtemp, S, j * sizeof(double));
+  for (i = 0; i < j; i ++)
+    S[i] = dtemp[idx1[i]];
+
+  /* sort eigenvectors (in place) */
+  for (i = 0; i < j; i ++) {
+    if (i != idx1[i]) {
+      memcpy(dtemp, U+i*ldu, j*sizeof(complex));
+      memcpy(U+i*ldu, U+idx1[i]*ldu, j*sizeof(complex));
+      memcpy(U+idx1[i]*ldu, dtemp, j*sizeof(complex));
+      idx1[idx2[i]] = idx1[i];
+      idx2[idx1[i]] = idx2[i];
+    }
+  }
+}
+
+
+
+
+void Proj_A_psi_su3vect(su3_vector * const y, su3_vector * const x, int tslice){
+  double mtheta = -p_theta;
+  int i; 
+  /* y = A*x */
+  p_A_psi_s3(y, x, tslice); 
+  /* y = -theta*x+y*/
+  _FT(daxpy)(&p_n2, &mtheta, (double*) x, &ONE, (double*) y, &ONE);
+  /* p_work = Q^dagger*y */ 
+  for(i = 0; i < p_k; i++) {
+    p_work[i] = scalar_prod_su3vect((su3_vector*) (p_Q+i*p_lda), (su3_vector*) y, p_n*sizeof(complex)/sizeof(su3_vector), 1);
+  }
+  /* y = y - Q*p_work */ 
+  _FT(zgemv)(fupl_n, &p_n, &p_k, &CMONE, p_Q, &p_lda, (complex*) p_work, &ONE, &CONE, (complex*) y, &ONE, 1);
+}
+
+#endif // WITHLAPH
diff --git a/solver/jdher_su3vect.h b/solver/jdher_su3vect.h
new file mode 100755
index 000000000..3ccb25682
--- /dev/null
+++ b/solver/jdher_su3vect.h
@@ -0,0 +1,49 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifndef _JDHERSU3VJACOBI_H
+#define _JDHERSU3VJACOBI_H
+
+#ifndef JD_MAXIMAL
+#define JD_MAXIMAL 1
+#endif
+#ifndef JD_MINIMAL
+#define JD_MINIMAL 0
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include "su3.h"
+#include "solver/solver.h"
+
+void jderrorhandler(const int i, char * message);
+
+extern void jdher_su3vect(int n, int lda, double tau, double tol, 
+		  int kmax, int jmax, int jmin, int itmax,
+		  int blksize, int blkwise, 
+		  int V0dim, complex *V0, 
+		  int solver_flag, 
+		  int linitmax, double eps_tr, double toldecay,
+		  int verbosity,
+		  int *k_conv, complex *Q, double *lambda, int *it,
+		  int maxmin, int shift_mode,int tslice,
+		  matrix_mult_su3vect A_psi);
+
+#endif
+
diff --git a/solver/matrix_mult_typedef.h b/solver/matrix_mult_typedef.h
index 5d9a8b3c2..4535959c4 100644
--- a/solver/matrix_mult_typedef.h
+++ b/solver/matrix_mult_typedef.h
@@ -32,5 +32,6 @@ typedef void (*matrix_mult) (spinor * const, spinor * const);
 typedef void (*matrix_mult_blk) (spinor * const, spinor * const, const int);
 typedef void (*matrix_mult_clover) (spinor * const, spinor * const, const double);
 typedef void (*c_matrix_mult) (complex * const, complex * const);
+typedef void (*matrix_mult_su3vect) (su3_vector * const, su3_vector * const, const int);
 
 #endif
diff --git a/su3.h b/su3.h
index 411b5102d..77ed19d87 100644
--- a/su3.h
+++ b/su3.h
@@ -79,6 +79,17 @@ typedef struct
    spinor sp_up,sp_dn;
 } bispinor;
 
+typedef struct
+{
+  complex s00,s01,s02,s03,s10,s11,s12,s13,s20,s21,s22,s23,s30,s31,s32,s33;
+} spinor_matrix;
+
+typedef struct
+{
+  complex sc0,sc1,sc2,sc3;
+} complex_spinor;
+
+
 /*******************************************************************************
 *
 * Macros for SU(3) vectors
diff --git a/xchange.h b/xchange.h
index 17a6c4edb..b2af30c60 100644
--- a/xchange.h
+++ b/xchange.h
@@ -23,6 +23,7 @@
 #include "xchange_gauge.h"
 #include "xchange_deri.h"
 #include "xchange_halffield.h"
+#include "xchange_jacobi.h"
 #  ifdef _USE_TSPLITPAR
 #    include "xchange_field_tslice.h"
 #  endif
diff --git a/xchange_jacobi.c b/xchange_jacobi.c
new file mode 100644
index 000000000..de1985af2
--- /dev/null
+++ b/xchange_jacobi.c
@@ -0,0 +1,110 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+/**********************************************************
+ * 
+ * exchange routines for su3_vector fields
+ *
+ * Author: Luigi Scorzato
+ *
+ **********************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include<config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#ifdef MPI
+# include <mpi.h>
+#endif
+
+#include "global.h"
+#if (defined XLC && defined BGL)
+#  include "bgl.h"
+#endif
+#include "mpi_init.h"
+#include "su3.h"
+#include "xchange_jacobi.h"
+
+#ifdef WITHLAPH
+/* Note that LAPH also implies _INDEX_INDEP_GEOM, NO PARALLELT* */
+
+/* exchanges the field  l */
+void xchange_jacobi(su3_vector * const l) {
+  
+#ifdef _KOJAK_INST
+#pragma pomp inst begin(xchange_jacobi)
+#endif
+
+#  ifdef MPI
+
+  MPI_Status status;
+#    if (defined PARALLELX || defined PARALLELXY || defined PARALLELXYZ )
+  /* send the data to the neighbour on the left in x direction */
+  /* recieve the data from the neighbour on the right in x direction */
+  MPI_Sendrecv((void*)(l+gI_0_0_0), 1, jfield_x_slice_gath, g_nb_x_dn, 5091, 
+	       (void*)(l+gI_L_0_0), 1, jfield_x_slice_cont, g_nb_x_up, 5091,
+	       g_cart_grid, &status);
+    
+  /* send the data to the neighbour on the right in x direction */
+  /* recieve the data from the neighbour on the left in x direction */  
+  MPI_Sendrecv((void*)(l+gI_Lm1_0_0), 1, jfield_x_slice_gath, g_nb_x_up, 5092, 
+	       (void*)(l+gI_m1_0_0), 1, jfield_x_slice_cont, g_nb_x_dn, 5092,
+	       g_cart_grid, &status);
+    
+#    endif
+    
+#    if (defined PARALLELXY || defined PARALLELXYZ )
+  /* send the data to the neighbour on the left in y direction */
+  /* recieve the data from the neighbour on the right in y direction */
+  MPI_Sendrecv((void*)(l+gI_0_0_0), 1, jfield_y_slice_gath, g_nb_y_dn, 5101, 
+	       (void*)(l+gI_0_L_0), 1, jfield_y_slice_cont, g_nb_y_up, 5101,
+	       g_cart_grid, &status);
+    
+  /* send the data to the neighbour on the right in y direction */
+  /* recieve the data from the neighbour on the left in y direction */  
+  MPI_Sendrecv((void*)(l+gI_0_Lm1_0), 1, jfield_y_slice_gath, g_nb_y_up, 5102, 
+	       (void*)(l+gI_0_m1_0), 1, jfield_y_slice_cont, g_nb_y_dn, 5102,
+	       g_cart_grid, &status);
+    
+#    endif
+    
+#    if (defined PARALLELXYZ )  
+  /* send the data to the neighbour on the left in z direction */
+  /* recieve the data from the neighbour on the right in z direction */
+  MPI_Sendrecv((void*)(l+gI_0_0_0), 1, jfield_z_slice_gath, g_nb_z_dn, 5503,  
+	       (void*)(l+gI_0_0_L), 1, jfield_z_slice_cont, g_nb_z_up, 5503, 
+	       g_cart_grid, &status); 
+    
+  /* send the data to the neighbour on the right in y direction */
+  /* recieve the data from the neighbour on the left in y direction */  
+  MPI_Sendrecv((void*)(l+gI_0_0_Lm1), 1, jfield_z_slice_gath, g_nb_z_up, 5504, 
+	       (void*)(l+gI_0_0_m1), 1, jfield_z_slice_cont, g_nb_z_dn, 5504, 
+	       g_cart_grid, &status); 
+    
+#    endif
+#  endif // MPI
+  return;
+#ifdef _KOJAK_INST
+#pragma pomp inst end(xchange_jacobi)
+#endif
+}
+
+#endif // WITHLAPH
diff --git a/xchange_jacobi.h b/xchange_jacobi.h
new file mode 100644
index 000000000..3c8916a8f
--- /dev/null
+++ b/xchange_jacobi.h
@@ -0,0 +1,25 @@
+/***********************************************************************
+ * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ ***********************************************************************/
+
+#ifndef _XCHANGE_JACOBI_H
+#define _XCHANGE_JACOBI_H
+
+void xchange_jacobi(su3_vector * const s);
+
+#endif