Skip to content

Commit

Permalink
Merge pull request #423 from etmc/DDalphaAMG_nd_merge_etmc_master
Browse files Browse the repository at this point in the history
merge DDalphaAMG_nd branch into etmc/tmLQCD/master
  • Loading branch information
kostrzewa authored May 13, 2019
2 parents 5955fdb + 42356fc commit 4fbdef8
Show file tree
Hide file tree
Showing 58 changed files with 2,986 additions and 540 deletions.
933 changes: 852 additions & 81 deletions DDalphaAMG_interface.c

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions DDalphaAMG_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,20 @@
#include "global.h"
#include "su3.h"
#include"solver/matrix_mult_typedef.h"
#include"solver/matrix_mult_typedef_nd.h"

extern int mg_setup_iter;
extern int mg_coarse_setup_iter;
extern int mg_update_setup_iter;
extern int mg_update_gauge;
extern int mg_omp_num_threads;
extern int mg_Nvec;
extern int mg_lvl;
extern int mg_blk[4];
extern int mg_mixed_prec;
extern int mg_setup_mu_set;
extern int mg_no_shifts;
extern double mg_mms_mass;
extern double mg_setup_mu;
extern double mg_cmu_factor;
extern double mg_dtau_update;
Expand All @@ -44,6 +48,7 @@ extern double mg_rho_update;
void MG_init(void);
void MG_update_gauge(double step);
void MG_update_mu(double mu_tmLQCD, double odd_tmLQCD);
void MG_update_mubar_epsbar(double mubar_tmLQCD, double epsbar_tmLQCD, double shift_tmLQCD);
void MG_reset(void);
void MG_finalize(void);

Expand All @@ -56,4 +61,22 @@ int MG_solver_eo(spinor * const Even_new, spinor * const Odd_new,
const double precision, const int max_iter, const int rel_prec,
const int N, su3 **gf, matrix_mult_full f_full);

int MG_solver_nd(spinor * const up_new, spinor * const dn_new,
spinor * const up_old, spinor * const dn_old,
const double precision, const int max_iter, const int rel_prec,
const int N, su3 **gf, matrix_mult_nd f);

int MG_solver_nd_eo(spinor * const Even_new_up, spinor * const Odd_new_up,
spinor * const Even_new_dn, spinor * const Odd_new_dn,
spinor * const Even_up, spinor * const Odd_up,
spinor * const Even_dn, spinor * const Odd_dn,
const double precision, const int max_iter, const int rel_prec,
const int N, su3 **gf, matrix_mult_full_nd f_full);

int MG_mms_solver_nd(spinor **const up_new, spinor **const dn_new,
spinor * const up_old, spinor * const dn_old,
const double * shifts, const int no_shifts,
const double * precision, const int max_iter, const int rel_prec,
const int N, su3 **gf, matrix_mult_nd f);

#endif /* DDalphaAMG_INTERFACE_H_ */
1 change: 1 addition & 0 deletions default_input_values.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
#define _default_g_mu1 0.0
#define _default_g_mu2 0.0
#define _default_g_mu3 0.0
#define _default_g_shift 0.0
#define _default_c_sw -1.0
#define _default_g_beta 6.0
#define _default_g_N_s 20
Expand Down
2 changes: 2 additions & 0 deletions doc/DDalphaAMG.tex
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ \subsubsection{More advanced settings}
\item[\texttt{MGdtauUpdate:}] for HMC, $d\tau$ interval after that the setup is updated. If 0, it will be updated every time the configuration is changed.
\item[\texttt{MGrhoUpdate:}] for HMC, rho value of the monomial at which the setup have to be updated. It can be combined with \texttt{MGdtauUpdate} or used standalone.
\item[\texttt{MGUpdateSetupIter:}] for HMC, number of setup iterations to do on the fine level when the setup has to be updated.
\item[\texttt{MGNumberOfShifts:}] for MG in multi-shift systems, number of shifted linear systems, N, to be solved by DDalphaAMG. MG will solve the N smaller shifts.
\item[\texttt{MGMMSMass:}] for MG in multi-shift systems, alternative to the previous. MG will solve all the mass-shifts smaller than the given value.
\end{description}
\subsubsection{Output analysis\label{sec:DDalphaAMG_output}}
Running tmLQCD programs with the option \texttt{-v}, the full output of DDalphaAMG is shown. Here some hints on the informations given. Just before the setup, the full set of parameters is printed, with an output similar to the following:
Expand Down
166 changes: 131 additions & 35 deletions expo.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,48 +52,132 @@
#include "su3.h"
#include "su3adj.h"
#include "expo.h"
#include "float.h"
#include "global.h"

void exposu3(su3* const vr, const su3adj* const p) {
int i;
su3 ALIGN v,v2;
double ALIGN fac,r;
double ALIGN a,b;
_Complex double ALIGN a0,a1,a2,a1p;
static double imag_det(const su3adj* p) {
double d,tos3,o3,os3;
tos3=2.0/sqrt(3.0);
o3=1.0/3.0;
os3=1.0/sqrt(3.0);

d=tos3*(*p).d8*(o3*(*p).d8*(*p).d8-(*p).d3*(*p).d3)+2*((*p).d2*(*p).d4*(*p).d7-(*p).d1*(*p).d4*(*p).d6-(*p).d2*(*p).d5*(*p).d6-(*p).d1*(*p).d5*(*p).d7);
d+=(os3*(*p).d8-(*p).d3)*((*p).d4*(*p).d4+(*p).d5*(*p).d5)+(os3*(*p).d8+(*p).d3)*((*p).d6*(*p).d6+(*p).d7*(*p).d7)-tos3*(*p).d8*((*p).d1*(*p).d1+(*p).d2*(*p).d2);
return d;
}

static void mul_su3alg(su3adj* p,double d) {
(*p).d1*=d;
(*p).d2*=d;
(*p).d3*=d;
(*p).d4*=d;
(*p).d5*=d;
(*p).d6*=d;
(*p).d7*=d;
(*p).d8*=d;
}

/* it writes 'p=vec(h_{j,mu})' in matrix form 'v' */
void init_exposu3() {
int k;
double fctr = 1.0;
g_exposu3_no_c = 0;

while (fctr>DBL_EPSILON) {
g_exposu3_no_c++;
fctr/=(double)(g_exposu3_no_c);
}
g_exposu3_no_c += 7;
g_exposu3_no_c += (g_exposu3_no_c%2);

g_exposu3_c=malloc((g_exposu3_no_c+1)*sizeof(*g_exposu3_c));

g_exposu3_c[0]=1.0;
for (k=0; k < g_exposu3_no_c; k++)
g_exposu3_c[k+1]=g_exposu3_c[k]/(double)(k+1);
}

void exposu3(su3* const vr, const su3adj* const p) {
int n,m,mm;
su3 ALIGN v,v2,vt;
su3adj pa;
double ALIGN d,tc;
_Complex double t;
_Complex double ALIGN p0,p1,p2;
_Complex double ALIGN q0,q1,q2;

_make_su3(v,*p);
_su3_times_su3(v2,v,v);
tc = -2.0*(v2.c00 +v2.c11+v2.c22);

pa.d1=(*p).d1;
pa.d2=(*p).d2;
pa.d3=(*p).d3;
pa.d4=(*p).d4;
pa.d5=(*p).d5;
pa.d6=(*p).d6;
pa.d7=(*p).d7;
pa.d8=(*p).d8;

mm=0;
while (tc>1.0) {
mul_su3alg(&pa,0.5);
tc*=0.5;
mm+=1;
}

/* it writes 'p=vec(h_{j,mu})' in matrix form 'v' */
_make_su3(v,pa);
/* calculates v^2 */
_su3_times_su3(v2,v,v);
/* */
a = 0.5 * (creal(v2.c00) + creal(v2.c11) + creal(v2.c22));
/* 1/3 imaginary part of tr v*v2 */
b = 0.33333333333333333 * cimag(v.c00 * v2.c00 + v.c01 * v2.c10 + v.c02 * v2.c20 +
v.c10 * v2.c01 + v.c11 * v2.c11 + v.c12 * v2.c21 +
v.c20 * v2.c02 + v.c21 * v2.c12 + v.c22 * v2.c22 );
a0 = 0.16059043836821615e-9;
a1 = 0.11470745597729725e-10;
a2 = 0.76471637318198165e-12;
fac = 0.20876756987868099e-8; /* 1/12! */
r = 12.0;
for(i = 3; i <= 15; ++i)
{
a1p = a0 + a * a2;
a0 = fac + b * I * a2;
a2 = a1;
a1 = a1p;
fac *= r;
r -= 1.0;
/* t= -tr(X^2)/2*/
t = -0.5*(v2.c00 +v2.c11+v2.c22);
/* d= -1i * det(X)*/
d=-imag_det(&pa);
/* printf(" d= %.16f and t=%.16f + 1i %.16f \n",d,creal(t),cimag(t));*/

if(fabs(d)>(1.000001*(1.000002-fabs(t))))
printf("The norm of X is larger than 1 and N = %d \n", g_exposu3_no_c);


p0=g_exposu3_c[g_exposu3_no_c];
p1=0.0;
p2=0.0;

for (n=(g_exposu3_no_c-1);n>=0;n--) {
q0=p0;
q1=p1;
q2=p2;

p0=g_exposu3_c[n]-I*d*q2;
p1=q0-t*q2;
p2=q1;
}

/* vr = a0 + a1*v + a2*v2 */
vr->c00 = a0 + a1 * v.c00 + a2 * v2.c00;
vr->c01 = a1 * v.c01 + a2 * v2.c01;
vr->c02 = a1 * v.c02 + a2 * v2.c02;
vr->c10 = a1 * v.c10 + a2 * v2.c10;
vr->c11 = a0 + a1 * v.c11 + a2 * v2.c11;
vr->c12 = a1 * v.c12 + a2 * v2.c12;
vr->c20 = a1 * v.c20 + a2 * v2.c20;
vr->c21 = a1 * v.c21 + a2 * v2.c21;
vr->c22 = a0 + a1 * v.c22 + a2 * v2.c22;
vt.c00 = p0 + p1 * v.c00 + p2 * v2.c00;
vt.c01 = p1 * v.c01 + p2 * v2.c01;
vt.c02 = p1 * v.c02 + p2 * v2.c02;
vt.c10 = p1 * v.c10 + p2 * v2.c10;
vt.c11 = p0 + p1 * v.c11 + p2 * v2.c11;
vt.c12 = p1 * v.c12 + p2 * v2.c12;
vt.c20 = p1 * v.c20 + p2 * v2.c20;
vt.c21 = p1 * v.c21 + p2 * v2.c21;
vt.c22 = p0 + p1 * v.c22 + p2 * v2.c22;

for(m=0;m<mm;m++) {
_su3_times_su3(v2,vt,vt);
vt=v2;
}

vr->c00=vt.c00;
vr->c01=vt.c01;
vr->c02=vt.c02;
vr->c10=vt.c10;
vr->c11=vt.c11;
vr->c12=vt.c12;
vr->c20=vt.c20;
vr->c21=vt.c21;
vr->c22=vt.c22;
}

void exposu3_check(su3* const vr, const su3adj* const p, int im) {
Expand Down Expand Up @@ -135,6 +219,12 @@ void restoresu3(su3* const vr, const su3* const u) {
vr->c20 = conj(vr->c01 * vr->c12 - vr->c02 * vr->c11);
vr->c21 = conj(vr->c02 * vr->c10 - vr->c00 * vr->c12);
vr->c22 = conj(vr->c00 * vr->c11 - vr->c01 * vr->c10);

/* compute row 2 as the conjugate of the cross-product of 3 and 1 */
vr->c10 = conj(vr->c21 * vr->c02 - vr->c22 * vr->c01);
vr->c11 = conj(vr->c22 * vr->c00 - vr->c20 * vr->c02);
vr->c12 = conj(vr->c20 * vr->c01 - vr->c21 * vr->c00);

}

void restoresu3_in_place(su3* const u) {
Expand All @@ -156,6 +246,12 @@ void restoresu3_in_place(su3* const u) {
u->c20 = conj(u->c01 * u->c12 - u->c02 * u->c11);
u->c21 = conj(u->c02 * u->c10 - u->c00 * u->c12);
u->c22 = conj(u->c00 * u->c11 - u->c01 * u->c10);

/* compute row 2 as the conjugate of the cross-product of 3 and 1 */
u->c10 = conj(u->c21 * u->c02 - u->c22 * u->c01);
u->c11 = conj(u->c22 * u->c00 - u->c20 * u->c02);
u->c12 = conj(u->c20 * u->c01 - u->c21 * u->c00);

}

/* Exponentiates a hermitian 3x3 matrix Q */
Expand Down
11 changes: 6 additions & 5 deletions expo.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
#ifndef _EXPO_H
#define _EXPO_H

extern void exposu3(su3* const vr, const su3adj* const p);
extern void exposu3_check(su3* const vr, const su3adj* const p, int im);
extern void restoresu3(su3* const vr, const su3* const u);
extern void restoresu3_in_place(su3* const u);
extern void exposu3_in_place(su3* const u);
void init_exposu3();
void exposu3(su3* const vr, const su3adj* const p);
void exposu3_check(su3* const vr, const su3adj* const p, int im);
void restoresu3(su3* const vr, const su3* const u);
void restoresu3_in_place(su3* const u);
void exposu3_in_place(su3* const u);

#endif
17 changes: 16 additions & 1 deletion global.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ EXTERN su3adj ** ddummy;

EXTERN int count00,count01,count10,count11,count20,count21;
EXTERN double g_kappa, g_c_sw, g_beta;
EXTERN double g_mu, g_mu1, g_mu2, g_mu3;
EXTERN double g_mu, g_mu1, g_mu2, g_mu3, g_shift;
EXTERN double g_rgi_C0, g_rgi_C1;

/* Parameters for non-degenrate case */
Expand All @@ -212,6 +212,10 @@ EXTERN int g_mpi_z_rank;
EXTERN int g_mpi_ST_rank;
EXTERN int g_nb_list[8];

/* Variables for exposu3 */
EXTERN int g_exposu3_no_c;
EXTERN double * g_exposu3_c;

/* OpenMP Kahan accumulation arrays */
EXTERN _Complex double *g_omp_acc_cp;
EXTERN double* g_omp_acc_re;
Expand Down Expand Up @@ -282,3 +286,14 @@ void fatal_error(char const *error, char const *function);

#endif

/*
* Comments: generic macro for swapping values or pointers.
* We use memcpy because is optimal when the amount to copy is known at compilation time.
* "sizeof(x) == sizeof(y) ? (signed)sizeof(x) : -1" is a compile time check that the types are compatible.
*/
#define SWAP(x,y) do \
{ unsigned char swap_temp[sizeof(x) == sizeof(y) ? (signed)sizeof(x) : -1]; \
memcpy(swap_temp,&y,sizeof(x)); \
memcpy(&y,&x, sizeof(x)); \
memcpy(&x,swap_temp,sizeof(x)); \
} while(0)
3 changes: 3 additions & 0 deletions init/init_gauge_field.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "su3.h"
#include "sse.h"
#include "init_gauge_field.h"
#include "expo.h"

su3 * gauge_field = NULL;
su3_32 * gauge_field_32 = NULL;
Expand All @@ -48,6 +49,8 @@ int init_gauge_field(const int V, const int back) {
g_gauge_field_copy = NULL;
#endif

if (g_exposu3_no_c == 0) init_exposu3();

if((void*)(g_gauge_field = (su3**)calloc(V, sizeof(su3*))) == NULL) {
printf ("malloc errno : %d\n",errno);
errno = 0;
Expand Down
3 changes: 3 additions & 0 deletions init/init_stout_smear_vars.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "global.h"
#include "su3.h"
#include "sse.h"
#include "expo.h"
#include "init_stout_smear_vars.h"

su3 * gauge_field_saved;
Expand Down Expand Up @@ -91,6 +92,8 @@ int init_stout_smear_vars(const int V, const int stout_no_iter)
k = 0;
mu = 0;

if (g_exposu3_no_c == 0) init_exposu3();

/*
* this is the field where we store the smeared force matrices \Sigma^{(k)}_\mu(x)
* eqtn (44) hep-lat/0311018
Expand Down
3 changes: 1 addition & 2 deletions invert.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
#endif
#include "meas/measurements.h"
#include "source_generation.h"
#include "expo.h"

#define CONF_FILENAME_LENGTH 500

Expand Down Expand Up @@ -179,7 +180,6 @@ int main(int argc, char *argv[])
j = init_gauge_field(VOLUMEPLUSRAND, 0);
j += init_gauge_field_32(VOLUMEPLUSRAND, 0);
#endif

if (j != 0) {
fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n");
exit(-1);
Expand Down Expand Up @@ -296,7 +296,6 @@ int main(int argc, char *argv[])
exit(-2);
}


if (g_cart_id == 0) {
printf("# Finished reading gauge field.\n");
fflush(stdout);
Expand Down
Loading

0 comments on commit 4fbdef8

Please sign in to comment.