From 7052d6f6b17772d65832f66229e3be9104be5c5a Mon Sep 17 00:00:00 2001 From: Luigi Scorzato Date: Sat, 10 Sep 2011 15:06:29 +0000 Subject: [PATCH] LapH: computing eigensystem for the Laplacial Heaviside method --- LapH_ev.c | 218 +++++++++ Makefile.in | 5 +- config.h.in | 3 + configure.in | 44 +- fixed_volume.h.in | 3 +- geometry_eo.c | 55 +++ global.h | 13 + init_geometry_indices.c | 11 + init_jacobi_field.c | 107 +++++ init_jacobi_field.h | 34 ++ jacobi.c | 77 +++ jacobi.h | 32 ++ linalg/Makefile.in | 3 +- linalg/assign.c | 21 + linalg/assign.h | 1 + linalg/assign_add_mul_r.c | 23 + linalg/assign_add_mul_r.h | 1 + linalg/assign_mul_add_r.c | 24 +- linalg/assign_mul_add_r.h | 1 + linalg/diff.c | 22 + linalg/diff.h | 1 + linalg/scalar_prod.c | 81 ++++ linalg/scalar_prod.h | 1 + linalg/scalar_prod_r.c | 39 +- linalg/scalar_prod_r.h | 1 + linalg/scalar_prod_su3spinor.c | 230 +++++++++ linalg/scalar_prod_su3spinor.h | 28 ++ linalg/square_norm.c | 35 ++ linalg/square_norm.h | 1 + mpi_init.c | 31 ++ mpi_init.h | 10 + solver/Makefile.in | 3 +- solver/cg_her_su3vect.c | 108 +++++ solver/cg_her_su3vect.h | 28 ++ solver/eigenvalues_Jacobi.c | 228 +++++++++ solver/eigenvalues_Jacobi.h | 34 ++ solver/gram-schmidt.c | 62 +++ solver/gram-schmidt.h | 3 + solver/jdher_su3vect.c | 828 +++++++++++++++++++++++++++++++++ solver/jdher_su3vect.h | 49 ++ solver/matrix_mult_typedef.h | 1 + su3.h | 11 + xchange.h | 1 + xchange_jacobi.c | 110 +++++ xchange_jacobi.h | 25 + 45 files changed, 2626 insertions(+), 21 deletions(-) create mode 100644 LapH_ev.c create mode 100755 init_jacobi_field.c create mode 100755 init_jacobi_field.h create mode 100644 jacobi.c create mode 100644 jacobi.h create mode 100644 linalg/scalar_prod_su3spinor.c create mode 100644 linalg/scalar_prod_su3spinor.h create mode 100755 solver/cg_her_su3vect.c create mode 100755 solver/cg_her_su3vect.h create mode 100644 solver/eigenvalues_Jacobi.c create mode 100755 solver/eigenvalues_Jacobi.h create mode 100644 solver/jdher_su3vect.c create mode 100755 solver/jdher_su3vect.h create mode 100644 xchange_jacobi.c create mode 100644 xchange_jacobi.h diff --git a/LapH_ev.c b/LapH_ev.c new file mode 100644 index 000000000..6a027c9a5 --- /dev/null +++ b/LapH_ev.c @@ -0,0 +1,218 @@ +/*********************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + ***********************************************************************/ +/* + * Program for computing the eigensystem of the Laplacian operator + * Authors Luigi Scorzato, Marco Cristoforetti + * + * + *******************************************************************************/ + +#define MAIN_PROGRAM + +#ifdef HAVE_CONFIG_H +# include "config.h" +#else +#error "no config.h" +#endif +#include +#include +#include +#include +#include +#if (defined BGL && !defined BGP) +# include +#endif +#ifdef MPI +# include +#endif +#include "global.h" +#include +#include +#include "su3.h" +#include "ranlxd.h" +#include "geometry_eo.h" +#include "read_input.h" +#include "start.h" +#include "xchange.h" +#include "init_gauge_field.h" +#include "init_geometry_indices.h" +#include "mpi_init.h" +#include "solver/eigenvalues_Jacobi.h" +#include "init_jacobi_field.h" + +int main(int argc,char *argv[]) +{ + int tslice,j,k; + char conf_filename[50]; + +#ifdef MPI + MPI_Init(&argc, &argv); +#endif + + /* Read the input file */ + read_input("LapH.input"); + + tmlqcd_mpi_init(argc, argv); + + if(g_proc_id==0) { +#ifdef SSE + printf("# The code was compiled with SSE instructions\n"); +#endif +#ifdef SSE2 + printf("# The code was compiled with SSE2 instructions\n"); +#endif +#ifdef SSE3 + printf("# The code was compiled with SSE3 instructions\n"); +#endif +#ifdef P4 + printf("# The code was compiled for Pentium4\n"); +#endif +#ifdef OPTERON + printf("# The code was compiled for AMD Opteron\n"); +#endif +#ifdef _GAUGE_COPY + printf("# The code was compiled with -D_GAUGE_COPY\n"); +#endif +#ifdef BGL + printf("# The code was compiled for Blue Gene/L\n"); +#endif +#ifdef BGP + printf("# The code was compiled for Blue Gene/P\n"); +#endif +#ifdef _USE_HALFSPINOR + printf("# The code was compiled with -D_USE_HALFSPINOR\n"); +#endif +#ifdef _USE_SHMEM + printf("# the code was compiled with -D_USE_SHMEM\n"); +# ifdef _PERSISTENT + printf("# the code was compiled for persistent MPI calls (halfspinor only)\n"); +# endif +#endif +#ifdef MPI +# ifdef _NON_BLOCKING + printf("# the code was compiled for non-blocking MPI calls (spinor and gauge)\n"); +# endif +#endif + printf("\n"); + fflush(stdout); + } + + +#ifndef WITHLAPH + printf(" Error: WITHLAPH not defined"); + exit(0); + #error " Error: WITHLAPH not defined" +#endif +#ifdef MPI +#ifndef _INDEX_INDEP_GEOM + printf(" Error: _INDEX_INDEP_GEOM not defined"); + exit(0); + #error " Error: _INDEX_INDEP_GEOM not defined" +#endif +#ifndef _USE_TSPLITPAR + printf(" Error: _USE_TSPLITPAR not defined"); + exit(0); + #error " Error: _USE_TSPLITPAR not defined" +#endif +#endif +#ifdef FIXEDVOLUME + printf(" Error: FIXEDVOLUME not allowed"); + exit(0); + #error " Error: FIXEDVOLUME not defined" +#endif + + + init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); + init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); + + if(g_proc_id == 0) { + fprintf(stdout,"The number of processes is %d \n",g_nproc); + printf("# The lattice size is %d x %d x %d x %d\n", + (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); + printf("# The local lattice size is %d x %d x %d x %d\n", + (int)(T), (int)(LX), (int)(LY),(int) LZ); + printf("# Computing LapH eigensystem \n"); + + fflush(stdout); + } + + /* define the geometry */ + geometry(); + + start_ranlux(1, 123456); + + /* Read Gauge field */ + sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore); + if (g_cart_id == 0) { + printf("#\n# Trying to read gauge field from file %s in %s precision.\n", + conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double")); + fflush(stdout); + } + if( (j = read_gauge_field(conf_filename)) !=0) { + fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", j, conf_filename); + exit(-2); + } + + + if (g_cart_id == 0) { + printf("# Finished reading gauge field.\n"); + fflush(stdout); + } + +#ifdef MPI + /*For parallelization: exchange the gaugefield */ + xchange_gauge(); +#endif + + /* Init Jacobi field */ + init_jacobi_field(SPACEVOLUME+SPACERAND,3); + +#ifdef MPI + { + /* for debugging in parallel set i_gdb = 0 */ + volatile int i_gdb = 8; + char hostname[256]; + gethostname(hostname, sizeof(hostname)); + printf("PID %d on %s ready for attach\n", getpid(), hostname); + fflush(stdout); + if(g_cart_id == 0){ + while (0 == i_gdb){ + sleep(5); + } + } + } + + MPI_Barrier(MPI_COMM_WORLD); +#endif + + for (k=0 ; k<3 ; k++) + random_jacobi_field(g_jacobi_field[k],SPACEVOLUME); + + + /* Compute LapH Eigensystem */ + + for(tslice=0; tslice +#endif #include #include #ifdef MPI @@ -79,6 +82,7 @@ EXTERN int T, L, LX, LY, LZ, VOLUME; EXTERN int N_PROC_T, N_PROC_X, N_PROC_Y, N_PROC_Z; EXTERN int RAND, EDGES, VOLUMEPLUSRAND; EXTERN int TEOSLICE; +EXTERN int SPACEVOLUME, SPACERAND; #endif /* translates from lexicographic order to even/odd order */ @@ -257,6 +261,15 @@ EXTERN int ITER_MAX_CG; EXTERN void* g_precWS; +#ifdef WITHLAPH +/* Jacobi operator per Laplacian Heaviside (LapH) */ +EXTERN su3_vector ** g_jacobi_field; +EXTERN int gI_0_0_0, gI_L_0_0, gI_Lm1_0_0, gI_m1_0_0, gI_0_L_0, gI_0_Lm1_0, gI_0_m1_0, gI_0_0_L, gI_0_0_Lm1, gI_0_0_m1; +EXTERN int tempT,tempV,tempR; +EXTERN int ** g_iup3d; +EXTERN int ** g_idn3d; +#endif + #undef EXTERN /* #undef ALIGN */ diff --git a/init_geometry_indices.c b/init_geometry_indices.c index 724fe3e45..1fa512cf7 100644 --- a/init_geometry_indices.c +++ b/init_geometry_indices.c @@ -152,6 +152,17 @@ int init_geometry_indices(const int V) { g_ipt[i] = g_ipt[i-1]+(LX+4); } +#ifdef WITHLAPH + g_idn3d = (int**)calloc(SPACEVOLUME, sizeof(int*)); + if((void*)g_idn == NULL) return(31); + g_iup3d = (int**)calloc(SPACEVOLUME, sizeof(int*)); + if((void*)g_iup == NULL) return(32); + for (i=0;i. + ***********************************************************************/ +/* + * routine for the initialization of the jocobi field (for use in LapH_ev) + * Authors Luigi Scorzato, Marco Cristoforetti + * + * + *******************************************************************************/ +#include +#include +#include +#include "global.h" +#include "su3.h" +#include "start.h" +#include "xchange_jacobi.h" +#include "init_jacobi_field.h" + +#ifdef WITHLAPH + +su3_vector *jacobi_field = NULL; + +int init_jacobi_field(const int V, const int nr) +{ +int i=0; + + if((void*)(jacobi_field = (su3_vector*)calloc(nr*V+1, sizeof(su3_vector))) == NULL) + { + printf("malloc errno : %d\n",errno); + errno = 0; + return(1); + } + if((void*)(g_jacobi_field = (su3_vector**)malloc(nr*sizeof(su3_vector*))) == NULL) + { + printf("malloc errno : %d\n",errno); + errno = 0; + return(2); + } + + g_jacobi_field[0] = jacobi_field; + for(i=1; i. + ***********************************************************************/ +/* + * routine for the initialization of the jocobi field (for use in LapH_ev) + * Authors Luigi Scorzato, Marco Cristoforetti + * + * + *******************************************************************************/ +#ifndef _INIT_JACOBI_FIELD_H +#define _INIT_JACOBI_FIELD_H + +# ifdef WITHLAPH +int init_jacobi_field(const int V, const int nr); +void free_jacobi_field(); +void random_gauss_jacobi_field(su3_vector * const k, const int V); +void random_jacobi_field(su3_vector * const k, const int V); +# endif +#endif diff --git a/jacobi.c b/jacobi.c new file mode 100644 index 000000000..9d6c25034 --- /dev/null +++ b/jacobi.c @@ -0,0 +1,77 @@ +/*********************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + ***********************************************************************/ +/* + * Routine for the computation of the Jacobi operator (for use into LapH_ev) + * Authors Luigi Scorzato, Marco Cristoforetti + * + * + *******************************************************************************/ +#ifdef HAVE_CONFIG_H +# include +#endif +#include +#include +#include +#include +#include +#include +#ifdef MPI +# include +#endif +#include "global.h" +#include "su3.h" +#include "xchange_jacobi.h" + +void Jacobi(su3_vector * const l, su3_vector * const k,int t) +{ + int ix,mu,tcoord,coord; + su3_vector lt; + +#ifdef MPI + xchange_jacobi(k); +#endif + + tcoord=t*SPACEVOLUME; + for(ix=0;ix. + ***********************************************************************/ +/* + * Routine for the computation of the Jacobi operator (for use into LapH_ev) + * Authors Luigi Scorzato, Marco Cristoforetti + * + * + *******************************************************************************/ +#ifndef _JACOBI_H +#define _JACOBI_H + +#include "su3.h" + +void Jacobi(su3_vector * const l, su3_vector * const k,int t); + +#endif diff --git a/linalg/Makefile.in b/linalg/Makefile.in index d0de76956..945a38c73 100644 --- a/linalg/Makefile.in +++ b/linalg/Makefile.in @@ -45,7 +45,8 @@ liblinalg_TARGETS = assign_add_mul_r_add_mul \ assign_mul_bra_add_mul_ket_add_bi mul_r_bi \ scalar_prod_r_bi assign_add_mul_r_bi assign_mul_add_r_bi \ convert_eo_to_lexic assign_mul_add_mul_r mul_add_mul_r \ - assign_mul_add_mul_add_mul_r mattimesvec + assign_mul_add_mul_add_mul_r mattimesvec \ + scalar_prod_su3spinor liblinalg_STARGETS = diff assign_add_mul_r assign_mul_add_r square_norm diff --git a/linalg/assign.c b/linalg/assign.c index 177d15a00..6cb6ec896 100644 --- a/linalg/assign.c +++ b/linalg/assign.c @@ -177,3 +177,24 @@ void assign(spinor * const R, spinor * const S, const int N){ } } #endif + +#ifdef WITHLAPH +void assign_su3vect(su3_vector * const R, su3_vector * const S, const int N) +{ +int ix; +su3_vector *r,*s; + + for (ix = 0; ix < N; ix++) + { + r=(su3_vector *) R + ix; + s=(su3_vector *) S + ix; + + (*r).c0.re = (*s).c0.re; + (*r).c0.im = (*s).c0.im; + (*r).c1.re = (*s).c1.re; + (*r).c1.im = (*s).c1.im; + (*r).c2.re = (*s).c2.re; + (*r).c2.im = (*s).c2.im; + } +} +#endif diff --git a/linalg/assign.h b/linalg/assign.h index bad8b0484..5afea3f0f 100644 --- a/linalg/assign.h +++ b/linalg/assign.h @@ -25,5 +25,6 @@ /* Assign (*R) = (*S) */ void assign(spinor * const R, spinor * const S, const int N); +void assign_su3vect(su3_vector * const R, su3_vector * const S, const int N); #endif diff --git a/linalg/assign_add_mul_r.c b/linalg/assign_add_mul_r.c index 77420f30f..f9e30ebe2 100644 --- a/linalg/assign_add_mul_r.c +++ b/linalg/assign_add_mul_r.c @@ -406,3 +406,26 @@ void assign_add_mul_r(spinor * const P, spinor * const Q, const double c, const } } #endif + +#ifdef WITHLAPH +void assign_add_mul_r_su3vect(su3_vector * const P, su3_vector * const Q, const double c, const int N) { + int ix; + static double fact; + su3_vector *r,*s; + + fact=c; + + for (ix = 0; ix < N; ix++) + { + r=P+ix; + s=Q+ix; + + (*r).c0.re+=fact*(*s).c0.re; + (*r).c0.im+=fact*(*s).c0.im; + (*r).c1.re+=fact*(*s).c1.re; + (*r).c1.im+=fact*(*s).c1.im; + (*r).c2.re+=fact*(*s).c2.re; + (*r).c2.im+=fact*(*s).c2.im; + } +} +#endif diff --git a/linalg/assign_add_mul_r.h b/linalg/assign_add_mul_r.h index e36d8af28..57fd0e7f4 100644 --- a/linalg/assign_add_mul_r.h +++ b/linalg/assign_add_mul_r.h @@ -24,5 +24,6 @@ #include "su3.h" void assign_add_mul_r(spinor * const P, spinor * const Q, const double c, const int N); +void assign_add_mul_r_su3vect(su3_vector * const P, su3_vector * const Q, const double c, const int N); #endif diff --git a/linalg/assign_mul_add_r.c b/linalg/assign_mul_add_r.c index 6daf81c7e..a41b76986 100644 --- a/linalg/assign_mul_add_r.c +++ b/linalg/assign_mul_add_r.c @@ -430,4 +430,26 @@ void assign_mul_add_r(spinor * const R, const double c, spinor * const S, const } #endif - +#ifdef WITHLAPH +void assign_mul_add_r_su3vect(su3_vector * const R, const double c, su3_vector * const S, const int N) +{ + int ix; + static double fact; + su3_vector *r,*s; + + fact=c; + + for (ix = 0; ix < N; ix++) + { + r = R + ix; + s = S + ix; + + (*r).c0.re = fact*(*r).c0.re + (*s).c0.re; + (*r).c0.im = fact*(*r).c0.im + (*s).c0.im; + (*r).c1.re = fact*(*r).c1.re + (*s).c1.re; + (*r).c1.im = fact*(*r).c1.im + (*s).c1.im; + (*r).c2.re = fact*(*r).c2.re + (*s).c2.re; + (*r).c2.im = fact*(*r).c2.im + (*s).c2.im; + } +} +#endif diff --git a/linalg/assign_mul_add_r.h b/linalg/assign_mul_add_r.h index bcc7e9b39..fc1d74138 100644 --- a/linalg/assign_mul_add_r.h +++ b/linalg/assign_mul_add_r.h @@ -24,5 +24,6 @@ #include "su3.h" void assign_mul_add_r(spinor * const S, const double c, spinor * const R, const int N); +void assign_mul_add_r_su3vect(su3_vector * const S, const double c, su3_vector * const R, const int N); #endif diff --git a/linalg/diff.c b/linalg/diff.c index 27a420cee..61f44b0ac 100644 --- a/linalg/diff.c +++ b/linalg/diff.c @@ -346,3 +346,25 @@ void diff(spinor * const Q,spinor * const R,spinor * const S, const int N){ } #endif + +#ifdef WITHLAPH +void diff_su3vect(su3_vector * const Q,su3_vector * const R,su3_vector * const S, const int N) +{ +int ix; +su3_vector *q,*r,*s; + + for (ix = 0; ix < N; ix++) + { + q=(su3_vector *) Q + ix; + r=(su3_vector *) R + ix; + s=(su3_vector *) S + ix; + + (*q).c0.re=(*r).c0.re-(*s).c0.re; + (*q).c0.im=(*r).c0.im-(*s).c0.im; + (*q).c1.re=(*r).c1.re-(*s).c1.re; + (*q).c1.im=(*r).c1.im-(*s).c1.im; + (*q).c2.re=(*r).c2.re-(*s).c2.re; + (*q).c2.im=(*r).c2.im-(*s).c2.im; + } +} +#endif diff --git a/linalg/diff.h b/linalg/diff.h index a9cd0730c..e4609a19f 100644 --- a/linalg/diff.h +++ b/linalg/diff.h @@ -25,6 +25,7 @@ /* Makes the difference (*Q) = (*R) - (*S) */ void diff(spinor * const Q, spinor * const R, spinor * const S, const int N); +void diff_su3vect(su3_vector * const Q, su3_vector * const R, su3_vector * const S, const int N); #endif diff --git a/linalg/scalar_prod.c b/linalg/scalar_prod.c index 1c4782347..23dd1b2e2 100644 --- a/linalg/scalar_prod.c +++ b/linalg/scalar_prod.c @@ -127,3 +127,84 @@ complex scalar_prod(spinor * const S, spinor * const R, const int N, const int p #endif return(c); } + +#ifdef WITHLAPH +complex scalar_prod_su3vect(su3_vector * const S, su3_vector * const R, const int N, const int parallel){ + int ix; + static double ks,kc,ds,tr,ts,tt; + su3_vector *s,*r; + complex c; +#ifdef MPI + complex d; +#endif + + /* Real Part */ + + ks=0.0; + kc=0.0; + for (ix = 0; ix < N; ix++) + { + s=(su3_vector *) S + ix; + r=(su3_vector *) R + ix; + + ds=(*r).c0.re*(*s).c0.re+(*r).c0.im*(*s).c0.im+ + (*r).c1.re*(*s).c1.re+(*r).c1.im*(*s).c1.im+ + (*r).c2.re*(*s).c2.re+(*r).c2.im*(*s).c2.im; + + /* Kahan Summation */ + tr=ds+kc; + ts=tr+ks; + tt=ts-ks; + ks=ts; + kc=tr-tt; + } + kc=ks+kc; + +#if defined MPI0 + if(parallel == 1) { + MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + kc = ks; + } +#endif + + c.re = kc; + + /* Imaginary Part */ + + ks=0.0; + kc=0.0; + + for (ix=0;ix=SxR^* */ complex scalar_prod(spinor * const S,spinor * const R, const int N, const int parallel); +complex scalar_prod_su3vect(su3_vector * const S,su3_vector * const R, const int N, const int parallel); #endif diff --git a/linalg/scalar_prod_r.c b/linalg/scalar_prod_r.c index 8d9d5d94e..3cd8f1003 100644 --- a/linalg/scalar_prod_r.c +++ b/linalg/scalar_prod_r.c @@ -91,7 +91,6 @@ double scalar_prod_r(spinor * const S,spinor * const R, const int N, const int p MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); return ks; } - #endif return kc; @@ -238,4 +237,42 @@ double scalar_prod_r(spinor * const S,spinor * const R, const int N, const int p return kc; } +#endif // apenext + +#ifdef WITHLAPH +double scalar_prod_r_su3vect(su3_vector * const S,su3_vector * const R, const int N, const int parallel) +{ + int ix; + static double ks,kc,ds,tr,ts,tt; + su3_vector *s,*r; + + ks=0.0; + kc=0.0; + for (ix=0;ix. + ***********************************************************************/ +/* $Id: scalar_prod.c 1173 2009-03-30 15:27:59Z urbach $ */ + +#ifdef HAVE_CONFIG_H +# include +#endif +#include +#ifdef MPI +#include +#endif +#include "su3.h" +#include "scalar_prod_su3spinor.h" + +#ifdef WITHLAPH +complex_spinor scalar_prod_su3spinor(su3_vector * const S, spinor * const R, const int N, const int parallel){ + int ix; + static double ks,kc,ds,tr,ts,tt; + su3_vector *s,*r; + complex_spinor c; +#ifdef MPI + complex_spinor d; +#endif + + /* sc0.re */ + + ks=0.0; + kc=0.0; + for (ix = 0; ix < N; ix++) + { + s=(su3_vector *) S + ix; + r=&(R[ix].s0); + + ds=(*r).c0.re*(*s).c0.re+(*r).c0.im*(*s).c0.im+ + (*r).c1.re*(*s).c1.re+(*r).c1.im*(*s).c1.im+ + (*r).c2.re*(*s).c2.re+(*r).c2.im*(*s).c2.im; + + /* Kahan Summation */ + tr=ds+kc; + ts=tr+ks; + tt=ts-ks; + ks=ts; + kc=tr-tt; + } + kc=ks+kc; + c.sc0.re = kc; + + /* sc0.im */ + + ks=0.0; + kc=0.0; + for (ix=0;ix. + ***********************************************************************/ +/* $Id: scalar_prod.h 1150 2009-02-16 16:52:09Z urbach $ */ + +#ifndef _SCALAR_PRODSU3S_H +#define _SCALAR_PRODSU3S_H + +#include "su3.h" +/* T_alpha=S_a x R_alpha,a^* */ +complex_spinor scalar_prod_su3spinor(su3_vector * const S,spinor * const R, const int N, const int parallel); + +#endif diff --git a/linalg/square_norm.c b/linalg/square_norm.c index 8e374fc66..54255a1ee 100644 --- a/linalg/square_norm.c +++ b/linalg/square_norm.c @@ -350,3 +350,38 @@ double square_norm(spinor * const P, const int N, const int parallel) { return kc; } #endif + +#ifdef WITHLAPH +double square_norm_su3vect(su3_vector * const P, const int N, const int parallel) +{ + int ix; + static double ks,kc,ds,tr,ts,tt; + su3_vector *s; + + ks = 0.0; + kc = 0.0; + + for (ix = 0; ix < N; ix++) + { + s = P + ix; + + ds = (*s).c0.re*(*s).c0.re + (*s).c0.im*(*s).c0.im + + (*s).c1.re*(*s).c1.re + (*s).c1.im*(*s).c1.im + + (*s).c2.re*(*s).c2.re + (*s).c2.im*(*s).c2.im; + + tr = ds + kc; + ts = tr + ks; + tt = ts-ks; + ks = ts; + kc = tr-tt; + } + kc = ks + kc; +# ifdef MPI + if(parallel) { + MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + return ks; + } +#endif + return kc; +} +#endif diff --git a/linalg/square_norm.h b/linalg/square_norm.h index c1f863798..ad46fb6e9 100644 --- a/linalg/square_norm.h +++ b/linalg/square_norm.h @@ -27,6 +27,7 @@ * Returns the square norm of *P */ double square_norm(spinor * const P, const int N, const int parallel); +double square_norm_su3vect(su3_vector * const P, const int N, const int parallel); #endif diff --git a/mpi_init.c b/mpi_init.c index f9f566733..cedb1e97e 100644 --- a/mpi_init.c +++ b/mpi_init.c @@ -138,6 +138,16 @@ MPI_Datatype field_zt_slice_odd_dn_ot; MPI_Datatype field_zt_slice_odd_up_ot; # endif #endif +#ifdef WITHLAPH +MPI_Datatype su3vect_point; +MPI_Datatype jfield_x_slice_cont; +MPI_Datatype jfield_y_slice_cont; +MPI_Datatype jfield_z_slice_cont; +MPI_Datatype jfield_x_slice_gath; +MPI_Datatype jfield_y_slice_gath; +MPI_Datatype jfield_z_slice_gath; +MPI_Datatype jfield_y_subslice; +#endif #if ( defined PARALLELXYZT || defined PARALLELXYZ ) MPI_Datatype field_z_slice_even_dn; @@ -299,6 +309,7 @@ void tmlqcd_mpi_init(int argc,char *argv[]) { LY = LY/g_nproc_y; LZ = LZ/g_nproc_z; VOLUME = (T*LX*LY*LZ); + SPACEVOLUME = VOLUME/T; # ifdef _USE_TSPLITPAR TEOSLICE = (LX*LY*LZ)/2; # endif @@ -330,6 +341,7 @@ void tmlqcd_mpi_init(int argc,char *argv[]) { /* Note that VOLUMEPLUSRAND is not always equal to VOLUME+RAND */ /* VOLUMEPLUSRAND rather includes the edges */ VOLUMEPLUSRAND = VOLUME + RAND + EDGES; + SPACERAND=RAND/T; # endif /* ifndef FIXEDVOLUME */ g_dbw2rand = (RAND + 2*EDGES); @@ -578,7 +590,24 @@ void tmlqcd_mpi_init(int argc,char *argv[]) { MPI_Type_commit(&field_zt_slice_ext_L); MPI_Type_commit(&field_zt_slice_ext_S); # endif +#endif +#ifdef WITHLAPH + MPI_Type_contiguous(6, MPI_DOUBLE, &su3vect_point); + + MPI_Type_contiguous(LY*LZ, su3vect_point, &jfield_x_slice_cont); + MPI_Type_contiguous(LX*LZ, su3vect_point, &jfield_y_slice_cont); + MPI_Type_contiguous(LX*LY, su3vect_point, &jfield_z_slice_cont); + MPI_Type_contiguous(LY*LZ, su3vect_point, &jfield_x_slice_gath); + MPI_Type_contiguous(LZ, su3vect_point, &jfield_y_subslice); + MPI_Type_vector(LX, 1, LY, jfield_y_subslice, &jfield_y_slice_gath); + MPI_Type_vector(LX*LY, 1, LZ, su3vect_point, &jfield_z_slice_gath); + MPI_Type_commit(&jfield_x_slice_gath); + MPI_Type_commit(&jfield_x_slice_cont); + MPI_Type_commit(&jfield_y_slice_cont); + MPI_Type_commit(&jfield_y_slice_gath); + MPI_Type_commit(&jfield_z_slice_cont); + MPI_Type_commit(&jfield_z_slice_gath); #endif /* The internal z_ and zt_ slices are constructed in geometry() with MPI_Type_indexed() */ @@ -681,12 +710,14 @@ void tmlqcd_mpi_init(int argc,char *argv[]) { # ifndef FIXEDVOLUME T = T_global; VOLUME = (T*LX*LY*LZ); + SPACEVOLUME = VOLUME/T; # ifdef _USE_TSPLITPAR TEOSLICE = (LX*LY*LZ)/2; # endif RAND = 0; EDGES = 0; VOLUMEPLUSRAND = VOLUME; + SPACERAND=0; N_PROC_T = 1; N_PROC_X = 1; N_PROC_Y = 1; diff --git a/mpi_init.h b/mpi_init.h index 6598eda5d..f7bc93526 100644 --- a/mpi_init.h +++ b/mpi_init.h @@ -106,6 +106,16 @@ extern MPI_Datatype field_zt_slice_odd_dn_ot; extern MPI_Datatype field_zt_slice_odd_up_ot; # endif #endif +#ifdef WITHLAPH +extern MPI_Datatype su3vect_point; +extern MPI_Datatype jfield_x_slice_cont; +extern MPI_Datatype jfield_y_slice_cont; +extern MPI_Datatype jfield_z_slice_cont; +extern MPI_Datatype jfield_x_slice_gath; +extern MPI_Datatype jfield_y_slice_gath; +extern MPI_Datatype jfield_z_slice_gath; +extern MPI_Datatype jfield_y_subslice; +#endif #if ( defined PARALLELXYZT || defined PARALLELXYZ ) extern MPI_Datatype field_z_slice_even_dn; diff --git a/solver/Makefile.in b/solver/Makefile.in index b1aee68c8..9a45d473c 100644 --- a/solver/Makefile.in +++ b/solver/Makefile.in @@ -40,7 +40,8 @@ libsolver_TARGETS = bicgstab_complex gmres \ sub_low_ev cg_her_nd poly_precon \ generate_dfl_subspace dfl_projector \ cg_mms_tm solver_field sumr mixed_cg_her index_jd \ - dirac_operator_eigenvectors spectral_proj + dirac_operator_eigenvectors spectral_proj \ + jdher_su3vect cg_her_su3vect eigenvalues_Jacobi libsolver_OBJECTS = $(addsuffix .o, ${libsolver_TARGETS}) diff --git a/solver/cg_her_su3vect.c b/solver/cg_her_su3vect.c new file mode 100755 index 000000000..842a54ba1 --- /dev/null +++ b/solver/cg_her_su3vect.c @@ -0,0 +1,108 @@ +/*********************************************************************** + * + * Copyright (C) 2001 Martin Hasenbusch + * 2003 Thomas Chiarappa + * 2002,2003,2004,2005,2010 Carsten Urbach + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + **************************************************************************/ + +/* ************************************************************************ + * Conjugate Gradient for su3 vectors + * Authors: Luigi Scorzato, Marco Cristoforetti + * + **************************************************************************/ + +#ifdef HAVE_CONFIG_H +# include +#endif +#include +#include +#include +#include +#ifdef MPI +# include +#endif +#include "global.h" +#include "su3.h" +#include "linalg_eo.h" +#include "start.h" +#include "solver/matrix_mult_typedef.h" +#include "cg_her_su3vect.h" + +#ifdef WITHLAPH + +int cg_her_su3vect(su3_vector * const P, su3_vector * const Q, const int max_iter, + double eps_sq, const int rel_prec, const int N,const int tslice, matrix_mult_su3vect f) { + + static double normsq,pro,err,alpha_cg,beta_cg,squarenorm; + int iteration; + int save_sloppy = g_sloppy_precision; + double atime, etime; + + +#ifdef MPI + atime = MPI_Wtime(); +#else + atime = ((double)clock())/((double)(CLOCKS_PER_SEC)); +#endif + squarenorm = square_norm_su3vect(Q, N, 1); + + f(g_jacobi_field[0],P,tslice); + + diff_su3vect(g_jacobi_field[1], Q, g_jacobi_field[0], N); + assign_su3vect(g_jacobi_field[2], g_jacobi_field[1], N); + normsq=square_norm_su3vect(g_jacobi_field[1], N, 1); + + /* main loop */ + for(iteration = 1; iteration <= max_iter; iteration++) { + f(g_jacobi_field[0], g_jacobi_field[2],tslice); + pro = scalar_prod_r_su3vect(g_jacobi_field[2], g_jacobi_field[0], N, 1); + alpha_cg = normsq / pro; + assign_add_mul_r_su3vect(P, g_jacobi_field[2], alpha_cg, N); + + assign_mul_add_r_su3vect(g_jacobi_field[0], -alpha_cg, g_jacobi_field[1], N); + err=square_norm_su3vect(g_jacobi_field[0], N, 1); + + if(g_proc_id == g_stdio_proc && g_debug_level > 1) { + printf("CG: iterations: %d res^2 %e\n", iteration, err); + fflush(stdout); + } + + if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq*squarenorm) && (rel_prec == 1))) { + break; + } + beta_cg = err / normsq; + assign_mul_add_r_su3vect(g_jacobi_field[2], beta_cg, g_jacobi_field[0], N); + assign_su3vect(g_jacobi_field[1], g_jacobi_field[0], N); + normsq = err; + } +#ifdef MPI + etime = MPI_Wtime(); +#else + etime = ((double)clock())/((double)(CLOCKS_PER_SEC)); +#endif + g_sloppy_precision = save_sloppy; + /* FLOPS= 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns ) */ + if(g_debug_level > 0 && g_proc_id == 0) { + printf("CG: iter: %d eps_sq: %1.4e t/s: %1.4e\n", iteration, eps_sq, etime-atime); + } + if(iteration > max_iter) return(-1); + return(iteration); +} + +#endif // WITHLAPH diff --git a/solver/cg_her_su3vect.h b/solver/cg_her_su3vect.h new file mode 100755 index 000000000..85e9541e1 --- /dev/null +++ b/solver/cg_her_su3vect.h @@ -0,0 +1,28 @@ +/*********************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + ***********************************************************************/ +#ifndef _CG_HERSU3V_H +#define _CG_HERSU3V_H + +#include"solver/matrix_mult_typedef.h" +#include"su3.h" + +int cg_her_su3vect(su3_vector * const P, su3_vector * const Q, const int max_iter, double eps_sq, const int rel_prec, + const int N, const int tslice, matrix_mult_su3vect f); + +#endif diff --git a/solver/eigenvalues_Jacobi.c b/solver/eigenvalues_Jacobi.c new file mode 100644 index 000000000..9fefc302a --- /dev/null +++ b/solver/eigenvalues_Jacobi.c @@ -0,0 +1,228 @@ +/*********************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + ***********************************************************************/ + +/* ************************************************************************ + * Main routine for the LapH_ev program: computes eigensystem of the Laplacian operator. + * Authors: Luigi Scorzato, Marco Cristoforetti + * + **************************************************************************/ +#ifdef HAVE_CONFIG_H +# include +#endif +#include +#include +#include +#include "global.h" +#include "su3.h" +#include +#include +#include +#include +#include +#include "jacobi.h" +#include "solver/solver.h" +#include "solver/jdher_su3vect.h" +#include "solver/matrix_mult_typedef.h" +#include "linalg_eo.h" +#include "eigenvalues_Jacobi.h" + +#ifdef WITHLAPH + +su3_vector *eigenvectors_su3v = NULL; +double *eigenvls_su3v = NULL; +double max_eigenvalue_su3v; +double * inv_eigenvls_su3v = NULL; + +int eigenvalues_for_cg_computed_su3v = 0; +int evlength_su3v; + +double eigenvalues_Jacobi(int * nr_of_eigenvalues, const int max_iterations, + const double precision, const int maxmin,int tslice, + const int nstore) { + double returnvalue; + static int allocated = 0; + +#ifdef HAVE_LAPACK + + + int verbosity = 1, converged = 0, blocksize = 1 , blockwise=0; + int solver_it_max = 50, j_max, j_min; + double decay_min = 1.7, decay_max = 1.5, prec, threshold_min = 1.e-3, threshold_max = 5.e-2; +volatile int v0dim = 0; + matrix_mult_su3vect f; + int N=SPACEVOLUME, N2=(SPACEVOLUME + SPACERAND); + su3_vector * max_eigenvector_ = NULL, *max_eigenvector; + + int returncode=0; + int returncode2=0; + su3_vector *s; + double sqnorm; + + char filename[200]; + char eigvl_filename[200]; + // int dims[]={T*g_nproc_t, LX*g_nproc_x, LY*g_nproc_y, LZ*g_nproc_z}; + int dims[]={1, LX*g_nproc_x, LY*g_nproc_y, LZ*g_nproc_z}; + FILE *efp; + +#ifdef MPI + double atime, etime; + MPI_File fp; + MPI_Offset siteSize=3*2*sizeof(double); + LemonRecordHeader *header; + LemonWriter *writer; +#else + FILE *fp; + int siteSize=3*2*sizeof(double); +#endif + + f = &Jacobi; + evlength_su3v = N2; + + if(g_proc_id == g_stdio_proc && g_debug_level >0) + { + printf("Number of %s eigenvalues to compute = %d\n", + maxmin ? "maximal" : "minimal",(*nr_of_eigenvalues)); + printf("Using Jacobi-Davidson method! \n"); + } + if((*nr_of_eigenvalues) < 8){ + j_max = 15; + j_min = 8; + } + else{ + j_max = 2*(*nr_of_eigenvalues); + j_min = (*nr_of_eigenvalues); + } + if(precision < 1.e-14){ + prec = 1.e-14; + } + else{ + prec = precision; + } + max_eigenvector_= calloc(N2, sizeof(su3_vector)); + max_eigenvector = max_eigenvector_; + + if(allocated == 0) + { + allocated = 1; + eigenvectors_su3v = calloc(N2*(*nr_of_eigenvalues), sizeof(su3_vector));; + eigenvls_su3v = (double*)malloc((*nr_of_eigenvalues)*sizeof(double)); + inv_eigenvls_su3v = (double*)malloc((*nr_of_eigenvalues)*sizeof(double)); + } + + solver_it_max = 64; + /* compute the maximal one first */ + /* DEBUG + jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex), + 50., 1.e-12, + 1, 15, 8, max_iterations, 1, 0, 0, NULL, + CG, solver_it_max, + threshold_max, decay_max, verbosity, + &converged, (complex*) max_eigenvector, (double*) &max_eigenvalue_su3v, + &returncode2, JD_MAXIMAL, 1,tslice,f); + */ + +#ifdef MPI + atime = MPI_Wtime(); +#endif + + /* (re-) compute minimal eigenvalues */ + converged = 0; + solver_it_max = 256; + + if(maxmin) + jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex), + 50., prec, + (*nr_of_eigenvalues), j_max, j_min, + max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors_su3v, + CG, solver_it_max, + threshold_max, decay_max, verbosity, + &converged, (complex*) eigenvectors_su3v, eigenvls_su3v, + &returncode, JD_MAXIMAL, 1,tslice, + f); + else + jdher_su3vect(N*sizeof(su3_vector)/sizeof(complex), N2*sizeof(su3_vector)/sizeof(complex), + 0., prec, + (*nr_of_eigenvalues), j_max, j_min, + max_iterations, blocksize, blockwise, v0dim, (complex*) eigenvectors_su3v, + CG, solver_it_max, + threshold_min, decay_min, verbosity, + &converged, (complex*) eigenvectors_su3v, eigenvls_su3v, + &returncode, JD_MINIMAL, 1,tslice, + f); + +#ifdef MPI + etime = MPI_Wtime(); + if(g_proc_id == 0) { + printf("Eigenvalues computed in %e sec. (MPI_Wtime)\n", etime-atime); + } +#endif + + + /* Printout eigenvalues. */ + if(g_proc_id == 0) { + sprintf(eigvl_filename,"eigenvalues.%.3d.%.4d", tslice, nstore); + efp=fopen(eigvl_filename,"w"); + for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) { + fprintf(efp,"%e\n",eigenvls_su3v[v0dim]); + } + fclose(efp); + } + + /* Printout eigenvectors. */ + for(v0dim = 0; v0dim < (*nr_of_eigenvalues); v0dim++) { + sprintf(filename, "eigenvector.%.3d.%.3d.%.4d", v0dim, tslice, nstore); + s=(su3_vector*)&eigenvectors_su3v[v0dim*N2]; +#ifdef MPI +# ifdef HAVE_LIBLEMON + // SEGNO: dovrebbe stampare 8*2*3*SPACEVOLUME data per file, ma ne stampa 8*2*4n*SPACEVOLUME (n=4-1 per ev 0-3) + + MPI_File_open(g_cart_grid, filename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fp); + writer = lemonCreateWriter(&fp, g_cart_grid); + header = lemonCreateHeader(1 /* MB */, 1 /* ME */, "lattice-su3_vector-data",SPACEVOLUME*3*sizeof(complex)); + lemonWriteRecordHeader(header, writer); + lemonDestroyHeader(header); + lemonWriteLatticeParallel(writer, s, siteSize, dims); + lemonWriterCloseRecord(writer); + lemonDestroyWriter(writer); + MPI_File_close(&fp); +# else + if(g_proc_id == 0) { + printf("Cannot write eigenvectors: you need LEMON for writing eigenvectors with MPI\n"); + } +# endif +#else + fp=fopen(filename,"wb"); + fwrite(s,siteSize,SPACEVOLUME,fp); + fclose(fp); +#endif // MPI + sqnorm=square_norm_su3vect(s,SPACEVOLUME,1); + if(g_proc_id == 0) { + printf("wrote eigenvector | |^2 = %e \n",sqnorm); + } + } + + returnvalue=eigenvls_su3v[0]; + free(max_eigenvector_); +#else + fprintf(stderr, "lapack not available, so JD method for EV computation not available \n"); +#endif // LAPACK + return(returnvalue); +} + +#endif // WITHLAPH diff --git a/solver/eigenvalues_Jacobi.h b/solver/eigenvalues_Jacobi.h new file mode 100755 index 000000000..4cfb668f2 --- /dev/null +++ b/solver/eigenvalues_Jacobi.h @@ -0,0 +1,34 @@ +/*********************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + ***********************************************************************/ +#ifndef _EIGENVALUESJ_H +#define _EIGENVALUESJ_H + +#include "su3.h" + +extern su3_vector *eigenvectors_su3v; +extern double *eigenvls_su3v; +extern double * inv_eigenvls_su3v; +extern int eigenvalues_for_cg_computed_su3v; +extern int no_eigenvalues_su3v; +extern int evlength_su3v; + +double eigenvalues_Jacobi(int * nr_of_eigenvalues, const int max_iterations, + const double precision, const int maxmin, int tslice, const int nstore); + +#endif // _EIGENVALUESJ_H diff --git a/solver/gram-schmidt.c b/solver/gram-schmidt.c index 36cc79e22..a27966f47 100644 --- a/solver/gram-schmidt.c +++ b/solver/gram-schmidt.c @@ -123,6 +123,51 @@ void IteratedClassicalGS(complex v[], double *vnrm, int n, int m, complex A[], } } +#ifdef WITHLAPH + +void IteratedClassicalGS_su3vect(complex v[], double *vnrm, int n, int m, complex A[], + complex work1[], int lda) { + const double alpha = 0.5; + + double vnrm_old; + int i, n2, isorth = 0; + int j; + complex CMONE, CONE, CZERO; + +#ifdef CRAY + char *cupl_n = "N"; + _fcd fupl_n; + fupl_n = _cptofcd(cupl_n, strlen(cupl_n)); +#else + char *fupl_n = "N"; +#endif + + n2 = 2*n; + CMONE.re = -1.; CMONE.im=0.; + CONE.re = 1.; CONE.im=0.; + CZERO.re = 0.; CZERO.im=0.; + + vnrm_old = sqrt(square_norm_su3vect((su3_vector*) v, n*sizeof(complex)/sizeof(su3_vector),1)); + + for(i = 0; !isorth && i < max_cgs_it; i ++) { + + for(j = 0; j < m; j++){ + work1[j] = scalar_prod_su3vect((su3_vector*) (A+j*lda), (su3_vector*) v, n*sizeof(complex)/sizeof(su3_vector),1); + } +#ifdef HAVE_LAPACK + _FT(zgemv)(fupl_n, &n, &m, &CMONE, A, &lda, work1, &ONE, &CONE, v, &ONE, 1); +#endif + (*vnrm) = sqrt(square_norm_su3vect((su3_vector*) v, n*sizeof(complex)/sizeof(su3_vector),1)); + + isorth=((*vnrm) > alpha*vnrm_old); + vnrm_old = (*vnrm); + } + if (i >= max_cgs_it) { + /* errorhandler(400,""); */ + } +} + +#endif // WITHLAPH /* * ModifiedGramSchmidt @@ -159,3 +204,20 @@ void ModifiedGS(complex v[], int n, int m, complex A[], int lda) { } } +#ifdef WITHLAPH + +void ModifiedGS_su3vect(complex v[], int n, int m, complex A[], int lda) { + + int i; + complex s; + + for (i = 0; i < m; i ++) { + s = scalar_prod_su3vect((su3_vector*) (A+i*lda), (su3_vector*) v, n*sizeof(complex)/sizeof(su3_vector),1); + s.re = -s.re; s.im = -s.im; +#ifdef HAVE_LAPACK + _FT(zaxpy)(&n, &s, A+i*lda, &ONE, v, &ONE); +#endif + } +} + +#endif // WITHLAPH diff --git a/solver/gram-schmidt.h b/solver/gram-schmidt.h index ff85296e7..33ed90268 100644 --- a/solver/gram-schmidt.h +++ b/solver/gram-schmidt.h @@ -23,8 +23,11 @@ void IteratedClassicalGS_old(complex v[], double *vnrm, int n, int m, complex A[], complex work1[]); void IteratedClassicalGS(complex v[], double *vnrm, int n, int m, complex A[], complex work1[], int lda) ; +void IteratedClassicalGS_su3vect(complex v[], double *vnrm, int n, int m, complex A[], + complex work1[], int lda); void ModifiedGS_old(complex v[], int n, int m, complex A[]); void ModifiedGS(complex v[], int n, int m, complex A[], int lda); +void ModifiedGS_su3vect(complex v[], int n, int m, complex A[], int lda); #endif diff --git a/solver/jdher_su3vect.c b/solver/jdher_su3vect.c new file mode 100644 index 000000000..ad5038fe8 --- /dev/null +++ b/solver/jdher_su3vect.c @@ -0,0 +1,828 @@ +/*********************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + ***********************************************************************/ +/* + * Routines for the computation of eigensystems of the Laplacian operator, with Jacobi-Davidson algo. + * Authors Luigi Scorzato, Marco Cristoforetti + * + * + *******************************************************************************/ +#ifdef HAVE_CONFIG_H +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include "global.h" +#include "su3.h" +#include "linalg/fortran.h" +#include "linalg/blas.h" +#include "linalg/lapack.h" +#include "linalg_eo.h" +#include "solver/solver.h" +#include "solver/gram-schmidt.h" +#include "solver/quicksort.h" +#include "cg_her_su3vect.h" +#include "jdher_su3vect.h" +#ifdef CRAY +#include +#endif + +#ifdef WITHLAPH + + +#define min(a,b) ((a)<(b) ? (a) : (b)) +#define max(a,b) ((a)<(b) ? (b) : (a)) + +/**************************************************************************** + * * + * Prototypes of static functions * + * * + ****************************************************************************/ +static void print_status_su3vect(int clvl, int it, int k, int j, int kmax, + int blksize, int actblksize, + double *s, double *resnrm, int *actcorrits); +static void sorteig_su3vect(int j, double S[], complex U[], int ldu, double tau, + double dtemp[], int idx1[], int idx2[], int strategy); + +/* Projection routines */ +void Proj_A_psi_su3vect(su3_vector * const y, su3_vector * const x, int tslice); + +void jderrorhandler_su3vect(const int i, char * message) +{ + fprintf(stderr, "jdher %s \n", message); +#ifdef MPI + MPI_Finalize(); +#endif + exit(i); +} + +/**************************************************************************** + * * + * Static variables * + * * + ****************************************************************************/ +/* static double DMONE = -1.0, DZER = 0.0, DONE = 1.0; */ +static int MONE = -1, ONE = 1; +static complex CONE, CZERO, CMONE; + +/* Projector variables */ + +static int p_n, p_n2, p_k, p_lda; +static double p_theta; +complex * p_Q; +complex * p_work; +matrix_mult_su3vect p_A_psi_s3; + +static char * fupl_u = "U", * fupl_c = "C", *fupl_n = "N", * fupl_a = "A", *fupl_v = "V", *filaenv = "zhetrd", *fvu = "VU"; + +void jdher_su3vect(int n, int lda, double tau, double tol, + int kmax, int jmax, int jmin, int itmax, + int blksize, int blkwise, + int V0dim, complex *V0, + int solver_flag, + int linitmax, double eps_tr, double toldecay, + int verbosity, + int *k_conv, complex *Q, double *lambda, int *it, + int maxmin, int shift_mode, int tslice, + matrix_mult_su3vect A_psi) +{ +/******************* + * Local variables * + *******************/ + +/* constants */ +/* allocatables: * + * initialize with NULL, so we can free even unallocated ptrs */ +double *s = NULL, *resnrm = NULL, *resnrm_old = NULL, *dtemp = NULL, *rwork = NULL; +volatile complex *V_ = NULL; +volatile complex *V; +complex *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL, *Res_ = NULL, *Res, *eigwork = NULL, *temp1_ = NULL, *temp1; +int *idx1 = NULL, *idx2 = NULL, *convind = NULL, *keepind = NULL, *solvestep = NULL, *actcorrits = NULL; + +/* non-allocated ptrs */ +complex *q, *v, *u, *r = NULL; +/* scalar vars */ +double theta, alpha, it_tol; +int i, k, j, actblksize, eigworklen, found, conv, keep, n2; +int act, cnt, idummy, info, CntCorrIts=0, endflag=0; +int N=n*sizeof(complex)/sizeof(su3_vector); +int IDIST = 1; +int ISEED[4] = {2, 3, 5, 7}; + ISEED[0] = 2; + + /* print info header */ + if ((verbosity > 0) && (g_proc_id == 0)){ + printf("Jacobi-Davidson method for hermitian Matrices\n"); + printf("Solving A*x = lambda*x \n\n"); + printf(" N= %10d ITMAX=%4d\n", n, itmax); + printf(" KMAX=%3d JMIN=%3d JMAX=%3d V0DIM=%3d\n", + kmax, jmin, jmax, V0dim); + printf(" BLKSIZE= %2d BLKWISE= %5s\n", + blksize, blkwise ? "TRUE" : "FALSE"); + printf(" TOL= %11.4e TAU= %11.4e\n", + tol, tau); + printf(" LINITMAX= %5d EPS_TR= %10.3e TOLDECAY=%9.2e\n", + linitmax, eps_tr, toldecay); + printf("\n Computing %s eigenvalues\n", + maxmin ? "maximal" : "minimal"); + printf("\n"); + fflush( stdout ); + } + /* validate input parameters */ + if(tol <= 0) jderrorhandler(401,""); + if(kmax <= 0 || kmax > n) jderrorhandler(402,""); + if(jmax <= 0 || jmax > n) jderrorhandler(403,""); + if(jmin <= 0 || jmin > jmax) jderrorhandler(404,""); + if(itmax < 0) jderrorhandler(405,""); + if(blksize > jmin || blksize > (jmax - jmin)) jderrorhandler(406,""); + if(blksize <= 0 || blksize > kmax) jderrorhandler(406,""); + if(blkwise < 0 || blkwise > 1) jderrorhandler(407,""); + if(V0dim < 0 || V0dim >= jmax) jderrorhandler(408,""); + if(linitmax < 0) jderrorhandler(409,""); + if(eps_tr < 0.) jderrorhandler(500,""); + if(toldecay <= 1.0) jderrorhandler(501,""); + + CONE.re=1.; CONE.im=0.; + CZERO.re=0.; CZERO.im=0.; + CMONE.re=-1.; CMONE.im=0.; + + /* Get hardware-dependent values: + * Opt size of workspace for ZHEEV is (NB+1)*j, where NB is the opt. + * block size... */ + eigworklen = (2 + _FT(ilaenv)(&ONE, filaenv, fvu, &jmax, &MONE, &MONE, &MONE, 6, 2)) * jmax; + + if((void*)(V_ = (complex *)malloc((lda * jmax + 4) * sizeof(complex))) == NULL) + { + errno = 0; + jderrorhandler(300,"V in jdher"); + } + V = V_; + if((void*)(U = (complex *)malloc(jmax * jmax * sizeof(complex))) == NULL) + { + jderrorhandler(300,"U in jdher"); + } + if((void*)(s = (double *)malloc(jmax * sizeof(double))) == NULL) + { + jderrorhandler(300,"s in jdher"); + } + if((void*)(Res_ = (complex *)malloc((lda * blksize+4) * sizeof(complex))) == NULL) + { + jderrorhandler(300,"Res in jdher"); + } + Res = Res_; + + if((void*)(resnrm = (double *)malloc(blksize * sizeof(double))) == NULL) + { + jderrorhandler(300,"resnrm in jdher"); + } + if((void*)(resnrm_old = (double *)calloc(blksize,sizeof(double))) == NULL) + { + jderrorhandler(300,"resnrm_old in jdher"); + } + if((void*)(M = (complex *)malloc(jmax * jmax * sizeof(complex))) == NULL) + { + jderrorhandler(300,"M in jdher"); + } + if((void*)(Vtmp = (complex *)malloc(jmax * jmax * sizeof(complex))) == NULL) + { + jderrorhandler(300,"Vtmp in jdher"); + } + if((void*)(p_work = (complex *)malloc(lda * sizeof(complex))) == NULL) + { + jderrorhandler(300,"p_work in jdher"); + } + + /* ... */ + if((void*)(idx1 = (int *)malloc(jmax * sizeof(int))) == NULL) + { + jderrorhandler(300,"idx1 in jdher"); + } + if((void*)(idx2 = (int *)malloc(jmax * sizeof(int))) == NULL) + { + jderrorhandler(300,"idx2 in jdher"); + } + + /* Indices for (non-)converged approximations */ + if((void*)(convind = (int *)malloc(blksize * sizeof(int))) == NULL) + { + jderrorhandler(300,"convind in jdher"); + } + if((void*)(keepind = (int *)malloc(blksize * sizeof(int))) == NULL) + { + jderrorhandler(300,"keepind in jdher"); + } + if((void*)(solvestep = (int *)malloc(blksize * sizeof(int))) == NULL) + { + jderrorhandler(300,"solvestep in jdher"); + } + if((void*)(actcorrits = (int *)malloc(blksize * sizeof(int))) == NULL) + { + jderrorhandler(300,"actcorrits in jdher"); + } + + if((void*)(eigwork = (complex *)malloc(eigworklen * sizeof(complex))) == NULL) + { + jderrorhandler(300,"eigwork in jdher"); + } + if((void*)(rwork = (double *)malloc(3*jmax * sizeof(double))) == NULL) + { + jderrorhandler(300,"rwork in jdher"); + } + if((void*)(temp1_ = (complex *)malloc((lda+4) * sizeof(complex))) == NULL) + { + jderrorhandler(300,"temp1 in jdher"); + } + temp1 = temp1_; + if((void*)(dtemp = (double *)malloc(lda * sizeof(complex))) == NULL) + { + jderrorhandler(300,"dtemp in jdher"); + } + + /* Set variables for Projection routines */ + n2 = 2*n; + p_n = n; + p_n2 = n2; + p_Q = Q; + p_A_psi_s3 = A_psi; + p_lda = lda; + + /************************************************************************** + * * + * Generate initial search subspace V. Vectors are taken from V0 and if * + * necessary randomly generated. * + * * + **************************************************************************/ + + /* copy V0 to V */ + _FT(zlacpy)(fupl_a, &n, &V0dim, V0, &lda, V, &lda, 1); + j = V0dim; + /* if V0dim < blksize: generate additional random vectors */ + if (V0dim < blksize) + { + idummy = (blksize - V0dim)*n; /* nof random numbers */ + _FT(zlarnv)(&IDIST, ISEED, &idummy, V + V0dim*lda); + j = blksize; + } + for (cnt = 0; cnt < j; cnt ++) + { + ModifiedGS_su3vect(V + cnt*lda, n, cnt, V, lda); + alpha = sqrt(square_norm_su3vect((su3_vector*)(V+cnt*lda), N, 1)); + alpha = 1.0 / alpha; + _FT(dscal)(&n2, &alpha, (double *)(V + cnt*lda), &ONE); + } + /* Generate interaction matrix M = V^dagger*A*V. Only the upper triangle + is computed. */ + for (cnt = 0; cnt < j; cnt++) { + /* WARNING: this assumes that A_psi updates the boundaries of the input vector */ + A_psi((su3_vector*) temp1, (su3_vector*) (V+cnt*lda), tslice); + idummy = cnt+1; + for(i = 0; i < idummy; i++) { + M[cnt*jmax+i] = scalar_prod_su3vect((su3_vector*)(V+i*lda), (su3_vector*) temp1, N, 1); + } + } + + /* Other initializations */ + k = 0; (*it) = 0; + if((*k_conv) > 0) + { + k = (*k_conv); + } + + actblksize = blksize; + for(act = 0; act < blksize; act ++) + { + solvestep[act] = 1; + } + + + /**************************************************************************** + * * + * Main JD-iteration loop * + * * + ****************************************************************************/ + + while((*it) < itmax) + { + /**************************************************************************** + * * + * Solving the projected eigenproblem * + * * + * M*u = V^dagger*A*V*u = s*u * + * M is hermitian, only the upper triangle is stored * + * * + ****************************************************************************/ + _FT(zlacpy)(fupl_u, &j, &j, M, &jmax, U, &jmax, 1); + _FT(zheev)(fupl_v, fupl_u, &j, U, &jmax, s, eigwork, &eigworklen, rwork, &info, 1, 1); + + if (info != 0) + { + printf("error solving the projected eigenproblem."); + printf(" zheev: info = %d\n", info); + } + if(info != 0) jderrorhandler(502,"proble in zheev"); + + + /* Reverse order of eigenvalues if maximal value is needed */ + if(maxmin == 1) + { + sorteig_su3vect(j, s, U, jmax, s[j-1], dtemp, idx1, idx2, 0); + } + else + { + sorteig_su3vect(j, s, U, jmax, 0., dtemp, idx1, idx2, 0); + } + /**************************************************************************** + * * + * Convergence/Restart Check * + * * + * In case of convergence, strip off a whole block or just the converged * + * ones and put 'em into Q. Update the matrices Q, V, U, s * + * * + * In case of a restart update the V, U and M matrices and recompute the * + * Eigenvectors * + * * + ****************************************************************************/ + + found = 1; + while(found) + { + /* conv/keep = Number of converged/non-converged Approximations */ + conv = 0; keep = 0; + for(act=0; act < actblksize; act++) + { + /* Setting pointers for single vectors */ + q = Q + (act+k)*lda; + u = U + act*jmax; + r = Res + act*lda; + /* Compute Ritz-Vector Q[:,k+cnt1]=V*U[:,cnt1] */ + theta = s[act]; + _FT(zgemv)(fupl_n, &n, &j, &CONE, V, &lda, u, &ONE, &CZERO, q, &ONE, 1); + /* Compute the residual */ + A_psi((su3_vector*) r, (su3_vector*) q,tslice); + theta = -theta; + _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE); + + /* Compute norm of the residual and update arrays convind/keepind*/ + resnrm_old[act] = resnrm[act]; + resnrm[act] = sqrt(square_norm_su3vect((su3_vector*) r, N, 1)); + if (resnrm[act] < tol) + { + convind[conv] = act; + conv = conv + 1; + } + else + { + keepind[keep] = act; + keep = keep + 1; + } + } /* for(act = 0; act < actblksize; act ++) */ + /* Check whether the blkwise-mode is chosen and ALL the + approximations converged, or whether the strip-off mode is + active and SOME of the approximations converged */ + found = ((blkwise==1 && conv==actblksize) || (blkwise==0 && conv!=0)) + && (j > actblksize || k == kmax - actblksize); + /*************************************************************************** + * * + * Convergence Case * + * * + * In case of convergence, strip off a whole block or just the converged * + * ones and put 'em into Q. Update the matrices Q, V, U, s * + * * + **************************************************************************/ + if (found) + { + /* Store Eigenvalues */ + for(act = 0; act < conv; act++) + lambda[k+act] = s[convind[act]]; + /* Re-use non approximated Ritz-Values */ + for(act = 0; act < keep; act++) + s[act] = s[keepind[act]]; + /* Shift the others in the right position */ + for(act = 0; act < (j-actblksize); act ++) + s[act+keep] = s[act+actblksize]; + /* Update V. Re-use the V-Vectors not looked at yet. */ + idummy = j - actblksize; + for (act = 0; act < n; act = act + jmax) + { + cnt = act + jmax > n ? n-act : jmax; + _FT(zlacpy)(fupl_a, &cnt, &j, V+act, &lda, Vtmp, &jmax, 1); + _FT(zgemm)(fupl_n, fupl_n, &cnt, &idummy, &j, &CONE, Vtmp, + &jmax, U+actblksize*jmax, &jmax, &CZERO, V+act+keep*lda, &lda, 1, 1); + } + /* Insert the not converged approximations as first columns in V */ + for(act = 0; act < keep; act++) + { + _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+keepind[act])*lda,&lda,V+act*lda,&lda,1); + } + /* Store Eigenvectors */ + for(act = 0; act < conv; act++) + { + _FT(zlacpy)(fupl_a,&n,&ONE,Q+(k+convind[act])*lda,&lda,Q+(k+act)*lda,&lda,1); + } + /* Update SearchSpaceSize j */ + j = j - conv; + /* Let M become a diagonalmatrix with the Ritzvalues as entries ... */ + _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1); + for (act = 0; act < j; act++) + { + M[act*jmax + act].re = s[act]; + } + /* ... and U the Identity(jnew,jnew) */ + _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1); + if(shift_mode == 1) + { + if(maxmin == 0) + { + for(act = 0; act < conv; act ++) + { + if (lambda[k+act] > tau) + { + tau = lambda[k+act]; + } + } + } + else + { + for(act = 0; act < conv; act ++) + { + if (lambda[k+act] < tau) + { + tau = lambda[k+act]; + } + } + } + } + /* Update Converged-Eigenpair-counter and Pro_k */ + k = k + conv; + /* Update the new blocksize */ + actblksize=min(blksize, kmax-k); + /* Exit main iteration loop when kmax eigenpairs have been approximated */ + if (k == kmax) + { + endflag = 1; + break; + } + /* Counter for the linear-solver-accuracy */ + for(act = 0; act < keep; act++) + solvestep[act] = solvestep[keepind[act]]; + /* Now we expect to have the next eigenvalues */ + /* allready with some accuracy */ + /* So we do not need to start from scratch... */ + for(act = keep; act < blksize; act++) + solvestep[act] = 1; + } /* if(found) */ + if(endflag == 1) + { + break; + } + /************************************************************************** + * * + * Restart * + * * + * The Eigenvector-Aproximations corresponding to the first jmin * + * Petrov-Vectors are kept. if (j+actblksize > jmax) * + * * + **************************************************************************/ + if (j+actblksize > jmax) + { + idummy = j; j = jmin; + + for (act = 0; act < n; act = act + jmax) + { /* V = V * U(:,1:j) */ + cnt = act+jmax > n ? n-act : jmax; + _FT(zlacpy)(fupl_a, &cnt, &idummy, V+act, &lda, Vtmp, &jmax, 1); + _FT(zgemm)(fupl_n, fupl_n, &cnt, &j, &idummy, &CONE, Vtmp, + &jmax, U, &jmax, &CZERO, V+act, &lda, 1, 1); + } + _FT(zlaset)(fupl_a, &j, &j, &CZERO, &CONE, U, &jmax, 1); + _FT(zlaset)(fupl_u, &j, &j, &CZERO, &CZERO, M, &jmax, 1); + for (act = 0; act < j; act++) + M[act*jmax + act].re = s[act]; + } + } /* while(found) */ + + if(endflag == 1) + { + break; + } + + /**************************************************************************** + * * + * Solving the correction equations * + * * + * * + ****************************************************************************/ + + /* Solve actblksize times the correction equation ... */ + for (act = 0; act < actblksize; act ++) + { + /* Setting start-value for vector v as zeros(n,1). Guarantees orthogonality */ + v = V + j*lda; + for (cnt = 0; cnt < n; cnt ++) + { + v[cnt].re = 0.; + v[cnt].im = 0.; + } + /* Adaptive accuracy and shift for the lin.solver. In case the + residual is big, we don't need a too precise solution for the + correction equation, since even in exact arithmetic the + solution wouldn't be too usefull for the Eigenproblem. */ + r = Res + act*lda; + if (resnrm[act] < eps_tr && resnrm[act] < s[act] && resnrm_old[act] > resnrm[act]) + { + p_theta = s[act]; + } + else + { + p_theta = tau; + } + p_k = k + actblksize; + + /* if we are in blockwise mode, we do not want to */ + /* iterate solutions much more, if they have */ + /* allready the desired precision */ + if(blkwise == 1 && resnrm[act] < tol) + { + it_tol = pow(toldecay, (double)(-5)); + } + else + { + it_tol = pow(toldecay, (double)(-solvestep[act])); + } + solvestep[act] = solvestep[act] + 1; + + /* equation and project if necessary */ + ModifiedGS_su3vect(r, n, k + actblksize, Q, lda); + + /* Solve the correction equation ... */ + g_sloppy_precision = 1; + if(solver_flag == CG) + { + info = cg_her_su3vect((su3_vector*) v, (su3_vector*) r, linitmax, it_tol*it_tol, 0, + n*sizeof(complex)/sizeof(su3_vector),tslice, &Proj_A_psi_su3vect); + } + g_sloppy_precision = 0; + + /* Actualizing profiling data */ + if (info == -1) + { + CntCorrIts += linitmax; + } + else + { + CntCorrIts += info; + } + actcorrits[act] = info; + + /* orthonormalize v to Q, cause the implicit + orthogonalization in the solvers may be too inaccurate. Then + apply "IteratedCGS" to prevent numerical breakdown + in order to orthogonalize v to V */ + + ModifiedGS_su3vect(v, n, k+actblksize, Q, lda); + IteratedClassicalGS_su3vect(v, &alpha, n, j, V, temp1, lda); + + alpha = 1.0 / alpha; + _FT(dscal)(&n2, &alpha, (double*) v, &ONE); + + /* update interaction matrix M */ + A_psi((su3_vector*) temp1, (su3_vector*) v, tslice); + idummy = j+1; + for(i = 0; i < idummy; i++) { + M[j*jmax+i] = scalar_prod_su3vect((su3_vector*) (V+i*lda), (su3_vector*) temp1, N, 1); + } + + /* Increasing SearchSpaceSize j */ + j ++; + } /* for (act = 0;act < actblksize; act ++) */ + + /* Print information line */ + if(g_proc_id == 0) { + print_status_su3vect(verbosity, *it, k, j - blksize, kmax, blksize, actblksize, + s, resnrm, actcorrits); + } + /* Increase iteration-counter for outer loop */ + (*it) = (*it) + 1; + } /* Main iteration loop */ + + /****************************************************************** + * * + * Eigensolutions converged or iteration limit reached * + * * + * Print statistics. Free memory. Return. * + * * + ******************************************************************/ + + (*k_conv) = k; + if (g_proc_id == 0 && verbosity > 0) { + printf("\nJDHER execution statistics\n\n"); + printf("IT_OUTER=%d IT_INNER_TOT=%d IT_INNER_AVG=%8.2f\n", + (*it), CntCorrIts, (double)CntCorrIts/(*it)); + printf("\nConverged eigensolutions in order of convergence:\n"); + printf("# I LAMBDA(I) RES(I)\n"); + printf("#---------------------------------------\n"); + } + for (act = 0; act < *k_conv; act ++) + { + /* Compute the residual for solution act */ + q = Q + act*lda; + theta = -lambda[act]; + A_psi((su3_vector*) r, (su3_vector*) q,tslice); + _FT(daxpy)(&n2, &theta, (double*) q, &ONE, (double*) r, &ONE); + alpha = sqrt(square_norm_su3vect((su3_vector*) r, N, 1)); + if(g_proc_id == 0 && verbosity > 0) { + printf("%3d %22.15e %12.5e\n", act+1, lambda[act], alpha); + } + } + if(g_proc_id == 0 && verbosity > 0) + { + printf("\n"); + fflush( stdout ); + } + free(V_); free(Vtmp); free(U); + free(s); free(Res_); + free(resnrm); free(resnrm_old); + free(M); free(Z); + free(eigwork); free(temp1_); + free(dtemp); free(rwork); + free(p_work); + free(idx1); free(idx2); + free(convind); free(keepind); free(solvestep); free(actcorrits); + +} /* jdher(.....) */ + +/**************************************************************************** + * * + * Supporting functions * + * * + ****************************************************************************/ + +/* PRINT_STATUS - print status line (called for each outer iteration) + */ +static void print_status_su3vect(int verbosity, int it, int k, int j, int kmax, + int blksize, int actblksize, + double *s, double *resnrm, int *actcorrits) { + const int max_vals = 5; + + int i, idummy; + + if (verbosity >= 2) { + if (blksize == 1) { + if (it == 0) { + printf(" IT K J RES LINIT RITZ-VALUES(1:5)\n"); + idummy = 28 + ( 13 > max_vals*10 ? 13 : max_vals*10); + for (i = 0; i < idummy; i ++) + putchar('-'); + printf("\n"); + } + printf("%4d %3d %3d %9.2e %5d", it + 1, k, j, resnrm[0], actcorrits[0]); + for (i = 0; i < (j < max_vals ? j : max_vals); i ++){ + printf(" %9.2e", s[i]); + } + printf("\n"); + fflush( stdout ); + } + else { /* blksize > 1 */ + if (it == 0) { + printf(" IT K J RITZVALS "); + for (i = 1; i < actblksize; i ++) + printf(" "); + printf(" RES "); + for (i = 1; i < actblksize; i ++) + printf(" "); + printf(" LINIT\n"); + idummy = 12 + 4 + blksize*(10 + 10 + 5); + for (i = 0; i < idummy; i ++) + putchar('-'); + printf("\n"); + } + printf("%4d %3d %3d", it + 1, k, j); + for (i = 0; i < blksize; i ++) + if (i < actblksize) + printf(" %9.2e", s[i]); + else + printf(" "); + printf(" "); + for (i = 0; i < blksize; i ++) + if (i < actblksize) + printf(" %9.2e", resnrm[i]); + else + printf(" "); + printf(" "); + for (i = 0; i < blksize; i ++) + if (i < actblksize) + printf(" %5d", actcorrits[i]); + else + printf(" "); + printf("\n"); + fflush( stdout ); + } + } +} + +/* + * SORTEIG + * + * Default behaviour (strategy == 0): + * + * Sort eigenpairs (S(i),U(:,i)), such that + * + * |S(i) - tau| <= |S(i+1) -tau| for i=1..j-1. + * + * j : dimension of S + * ldu: leading dimension of U + * dtemp: double array of length j + * idx: int array of length j + * + * Alternate behaviour (strategy == 1): + * + * Same as above but put all S(i) < tau to the end. This is used to + * avoid computation of zero eigenvalues. + */ + +static void sorteig_su3vect(int j, double S[], complex U[], int ldu, double tau, + double dtemp[], int idx1[], int idx2[], int strategy){ + int i; + + /* setup vector to be sorted and index vector */ + switch (strategy) { + case 0: + for (i = 0; i < j; i ++) + dtemp[i] = fabs(S[i] - tau); + break; + case 1: + for (i = 0; i < j; i ++) + if (S[i] < tau) + dtemp[i] = DBL_MAX; + else + dtemp[i] = fabs(S[i] - tau); + break; + default: + jderrorhandler(503,"");; + } + for (i = 0; i < j; i ++) + idx1[i] = i; + + /* sort dtemp in ascending order carrying itemp along */ + quicksort(j, dtemp, idx1); + + /* compute 'inverse' index vector */ + for (i = 0; i < j; i ++) + idx2[idx1[i]] = i; + + /* sort eigenvalues */ + memcpy(dtemp, S, j * sizeof(double)); + for (i = 0; i < j; i ++) + S[i] = dtemp[idx1[i]]; + + /* sort eigenvectors (in place) */ + for (i = 0; i < j; i ++) { + if (i != idx1[i]) { + memcpy(dtemp, U+i*ldu, j*sizeof(complex)); + memcpy(U+i*ldu, U+idx1[i]*ldu, j*sizeof(complex)); + memcpy(U+idx1[i]*ldu, dtemp, j*sizeof(complex)); + idx1[idx2[i]] = idx1[i]; + idx2[idx1[i]] = idx2[i]; + } + } +} + + + + +void Proj_A_psi_su3vect(su3_vector * const y, su3_vector * const x, int tslice){ + double mtheta = -p_theta; + int i; + /* y = A*x */ + p_A_psi_s3(y, x, tslice); + /* y = -theta*x+y*/ + _FT(daxpy)(&p_n2, &mtheta, (double*) x, &ONE, (double*) y, &ONE); + /* p_work = Q^dagger*y */ + for(i = 0; i < p_k; i++) { + p_work[i] = scalar_prod_su3vect((su3_vector*) (p_Q+i*p_lda), (su3_vector*) y, p_n*sizeof(complex)/sizeof(su3_vector), 1); + } + /* y = y - Q*p_work */ + _FT(zgemv)(fupl_n, &p_n, &p_k, &CMONE, p_Q, &p_lda, (complex*) p_work, &ONE, &CONE, (complex*) y, &ONE, 1); +} + +#endif // WITHLAPH diff --git a/solver/jdher_su3vect.h b/solver/jdher_su3vect.h new file mode 100755 index 000000000..3ccb25682 --- /dev/null +++ b/solver/jdher_su3vect.h @@ -0,0 +1,49 @@ +/*********************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + ***********************************************************************/ + +#ifndef _JDHERSU3VJACOBI_H +#define _JDHERSU3VJACOBI_H + +#ifndef JD_MAXIMAL +#define JD_MAXIMAL 1 +#endif +#ifndef JD_MINIMAL +#define JD_MINIMAL 0 +#endif + +#include +#include +#include "su3.h" +#include "solver/solver.h" + +void jderrorhandler(const int i, char * message); + +extern void jdher_su3vect(int n, int lda, double tau, double tol, + int kmax, int jmax, int jmin, int itmax, + int blksize, int blkwise, + int V0dim, complex *V0, + int solver_flag, + int linitmax, double eps_tr, double toldecay, + int verbosity, + int *k_conv, complex *Q, double *lambda, int *it, + int maxmin, int shift_mode,int tslice, + matrix_mult_su3vect A_psi); + +#endif + diff --git a/solver/matrix_mult_typedef.h b/solver/matrix_mult_typedef.h index 5d9a8b3c2..4535959c4 100644 --- a/solver/matrix_mult_typedef.h +++ b/solver/matrix_mult_typedef.h @@ -32,5 +32,6 @@ typedef void (*matrix_mult) (spinor * const, spinor * const); typedef void (*matrix_mult_blk) (spinor * const, spinor * const, const int); typedef void (*matrix_mult_clover) (spinor * const, spinor * const, const double); typedef void (*c_matrix_mult) (complex * const, complex * const); +typedef void (*matrix_mult_su3vect) (su3_vector * const, su3_vector * const, const int); #endif diff --git a/su3.h b/su3.h index 411b5102d..77ed19d87 100644 --- a/su3.h +++ b/su3.h @@ -79,6 +79,17 @@ typedef struct spinor sp_up,sp_dn; } bispinor; +typedef struct +{ + complex s00,s01,s02,s03,s10,s11,s12,s13,s20,s21,s22,s23,s30,s31,s32,s33; +} spinor_matrix; + +typedef struct +{ + complex sc0,sc1,sc2,sc3; +} complex_spinor; + + /******************************************************************************* * * Macros for SU(3) vectors diff --git a/xchange.h b/xchange.h index 17a6c4edb..b2af30c60 100644 --- a/xchange.h +++ b/xchange.h @@ -23,6 +23,7 @@ #include "xchange_gauge.h" #include "xchange_deri.h" #include "xchange_halffield.h" +#include "xchange_jacobi.h" # ifdef _USE_TSPLITPAR # include "xchange_field_tslice.h" # endif diff --git a/xchange_jacobi.c b/xchange_jacobi.c new file mode 100644 index 000000000..de1985af2 --- /dev/null +++ b/xchange_jacobi.c @@ -0,0 +1,110 @@ +/*********************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + ***********************************************************************/ +/********************************************************** + * + * exchange routines for su3_vector fields + * + * Author: Luigi Scorzato + * + **********************************************************/ + +#ifdef HAVE_CONFIG_H +# include +#endif +#include +#include +#include +#include +#ifdef MPI +# include +#endif + +#include "global.h" +#if (defined XLC && defined BGL) +# include "bgl.h" +#endif +#include "mpi_init.h" +#include "su3.h" +#include "xchange_jacobi.h" + +#ifdef WITHLAPH +/* Note that LAPH also implies _INDEX_INDEP_GEOM, NO PARALLELT* */ + +/* exchanges the field l */ +void xchange_jacobi(su3_vector * const l) { + +#ifdef _KOJAK_INST +#pragma pomp inst begin(xchange_jacobi) +#endif + +# ifdef MPI + + MPI_Status status; +# if (defined PARALLELX || defined PARALLELXY || defined PARALLELXYZ ) + /* send the data to the neighbour on the left in x direction */ + /* recieve the data from the neighbour on the right in x direction */ + MPI_Sendrecv((void*)(l+gI_0_0_0), 1, jfield_x_slice_gath, g_nb_x_dn, 5091, + (void*)(l+gI_L_0_0), 1, jfield_x_slice_cont, g_nb_x_up, 5091, + g_cart_grid, &status); + + /* send the data to the neighbour on the right in x direction */ + /* recieve the data from the neighbour on the left in x direction */ + MPI_Sendrecv((void*)(l+gI_Lm1_0_0), 1, jfield_x_slice_gath, g_nb_x_up, 5092, + (void*)(l+gI_m1_0_0), 1, jfield_x_slice_cont, g_nb_x_dn, 5092, + g_cart_grid, &status); + +# endif + +# if (defined PARALLELXY || defined PARALLELXYZ ) + /* send the data to the neighbour on the left in y direction */ + /* recieve the data from the neighbour on the right in y direction */ + MPI_Sendrecv((void*)(l+gI_0_0_0), 1, jfield_y_slice_gath, g_nb_y_dn, 5101, + (void*)(l+gI_0_L_0), 1, jfield_y_slice_cont, g_nb_y_up, 5101, + g_cart_grid, &status); + + /* send the data to the neighbour on the right in y direction */ + /* recieve the data from the neighbour on the left in y direction */ + MPI_Sendrecv((void*)(l+gI_0_Lm1_0), 1, jfield_y_slice_gath, g_nb_y_up, 5102, + (void*)(l+gI_0_m1_0), 1, jfield_y_slice_cont, g_nb_y_dn, 5102, + g_cart_grid, &status); + +# endif + +# if (defined PARALLELXYZ ) + /* send the data to the neighbour on the left in z direction */ + /* recieve the data from the neighbour on the right in z direction */ + MPI_Sendrecv((void*)(l+gI_0_0_0), 1, jfield_z_slice_gath, g_nb_z_dn, 5503, + (void*)(l+gI_0_0_L), 1, jfield_z_slice_cont, g_nb_z_up, 5503, + g_cart_grid, &status); + + /* send the data to the neighbour on the right in y direction */ + /* recieve the data from the neighbour on the left in y direction */ + MPI_Sendrecv((void*)(l+gI_0_0_Lm1), 1, jfield_z_slice_gath, g_nb_z_up, 5504, + (void*)(l+gI_0_0_m1), 1, jfield_z_slice_cont, g_nb_z_dn, 5504, + g_cart_grid, &status); + +# endif +# endif // MPI + return; +#ifdef _KOJAK_INST +#pragma pomp inst end(xchange_jacobi) +#endif +} + +#endif // WITHLAPH diff --git a/xchange_jacobi.h b/xchange_jacobi.h new file mode 100644 index 000000000..3c8916a8f --- /dev/null +++ b/xchange_jacobi.h @@ -0,0 +1,25 @@ +/*********************************************************************** + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + ***********************************************************************/ + +#ifndef _XCHANGE_JACOBI_H +#define _XCHANGE_JACOBI_H + +void xchange_jacobi(su3_vector * const s); + +#endif