Skip to content

Commit

Permalink
OpenMP parallelization in update_gauge
Browse files Browse the repository at this point in the history
expo functions with pointers and no return value
  • Loading branch information
kostrzewa committed Sep 26, 2012
1 parent 441615c commit 9f495fe
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 53 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ read_input.c
*~
Makefile
config.log
*.mod.c
*.opari.inc
106 changes: 61 additions & 45 deletions expo.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/***********************************************************************
* Copyright (C) 2001 Martin Hasenbusch
* Copyright (C) 2012 Bartosz Kostrzewa (void versions)
*
* This file is part of tmLQCD.
*
Expand All @@ -20,11 +21,12 @@
*
*
* The externally accessible functions are
*
* su3 exposu3(su3adj p)
* su3 exposu3_check(su3adj p)
* su3 restoresu3(su3 u)
* Returns an element of su3
*
* void exposu3(su3* const vr, const su3adj* const p);
* extern void exposu3_check(su3* const vr, const su3adj* const p, int im);
* extern void restoresu3(su3* const vr,const su3* const u);
* extern void restoresu3_in_place(su3* const u);
* extern void exposu3_in_place(su3* const u);
*
* Author: Martin Hasenbusch <[email protected]>
* Tue Aug 28 10:06:56 MEST 2001
Expand All @@ -51,15 +53,15 @@
#include "su3adj.h"
#include "expo.h"

su3 exposu3(su3adj p) {
void exposu3(su3* const vr, const su3adj* const p) {
int i;
su3 ALIGN v,v2,vr;
su3 ALIGN v,v2;
double ALIGN fac,r;
double ALIGN a,b;
_Complex double ALIGN a0,a1,a2,a1p;

/* it writes 'p=vec(h_{j,mu})' in matrix form 'v' */
_make_su3(v,p);
_make_su3(v,*p);
/* calculates v^2 */
_su3_times_su3(v2,v,v);
/* */
Expand All @@ -83,73 +85,87 @@ su3 exposu3(su3adj p) {
r -= 1.0;
}
/* vr = a0 + a1*v + a2*v2 */
vr.c00 = a0 + a1 * v.c00 + a2 * v2.c00;
vr.c01 = a1 * v.c01 + a2 * v2.c01;
vr.c02 = a1 * v.c02 + a2 * v2.c02;
vr.c10 = a1 * v.c10 + a2 * v2.c10;
vr.c11 = a0 + a1 * v.c11 + a2 * v2.c11;
vr.c12 = a1 * v.c12 + a2 * v2.c12;
vr.c20 = a1 * v.c20 + a2 * v2.c20;
vr.c21 = a1 * v.c21 + a2 * v2.c21;
vr.c22 = a0 + a1 * v.c22 + a2 * v2.c22;
return vr;
vr->c00 = a0 + a1 * v.c00 + a2 * v2.c00;
vr->c01 = a1 * v.c01 + a2 * v2.c01;
vr->c02 = a1 * v.c02 + a2 * v2.c02;
vr->c10 = a1 * v.c10 + a2 * v2.c10;
vr->c11 = a0 + a1 * v.c11 + a2 * v2.c11;
vr->c12 = a1 * v.c12 + a2 * v2.c12;
vr->c20 = a1 * v.c20 + a2 * v2.c20;
vr->c21 = a1 * v.c21 + a2 * v2.c21;
vr->c22 = a0 + a1 * v.c22 + a2 * v2.c22;
}

su3 exposu3_check(su3adj p, int im) {

void exposu3_check(su3* const vr, const su3adj* const p, int im) {
/* compute the result by taylor series */
su3 ALIGN v,v2,v3,vr;
su3 ALIGN v,v2,v3;
double ALIGN fac;
int i;

_make_su3(v, p);
_su3_one(vr);
_su3_acc(vr, v);
_make_su3(v, *p);
_su3_one(*vr);
_su3_acc(*vr, v);
_su3_times_su3(v2, v, v);
_su3_refac_acc(vr, 0.5, v2);
_su3_refac_acc(*vr, 0.5, v2);
fac = 0.5;
for(i = 3; i <= im; i++) {
fac = fac/i;
_su3_times_su3(v3, v2, v);
_su3_refac_acc(vr, fac, v3);
_su3_refac_acc(*vr, fac, v3);
_su3_assign(v2, v3);
}
return vr;
}


su3 restoresu3(su3 u) {
su3 ALIGN vr;
void restoresu3(su3* const vr, const su3* const u) {
double ALIGN n0,n1;

/* normalize rows 1 and 2 */
n0 = 1.0 / sqrt(conj(u.c00) * u.c00 + conj(u.c01) * u.c01 + conj(u.c02) * u.c02);
n1 = 1.0 / sqrt(conj(u.c10) * u.c10 + conj(u.c11) * u.c11 + conj(u.c12) * u.c12);
n0 = 1.0 / sqrt(conj(u->c00) * u->c00 + conj(u->c01) * u->c01 + conj(u->c02) * u->c02);
n1 = 1.0 / sqrt(conj(u->c10) * u->c10 + conj(u->c11) * u->c11 + conj(u->c12) * u->c12);

vr.c00 = n0 * u.c00;
vr.c01 = n0 * u.c01;
vr.c02 = n0 * u.c02;
vr->c00 = n0 * u->c00;
vr->c01 = n0 * u->c01;
vr->c02 = n0 * u->c02;

vr.c10 = n1 * u.c10;
vr.c11 = n1 * u.c11;
vr.c12 = n1 * u.c12;
vr->c10 = n1 * u->c10;
vr->c11 = n1 * u->c11;
vr->c12 = n1 * u->c12;

/* compute row 3 as the conjugate of the cross-product of 1 and 2 */
vr.c20 = conj(vr.c01 * vr.c12 - vr.c02 * vr.c11);
vr.c21 = conj(vr.c02 * vr.c10 - vr.c00 * vr.c12);
vr.c22 = conj(vr.c00 * vr.c11 - vr.c01 * vr.c10);

return vr;
vr->c20 = conj(vr->c01 * vr->c12 - vr->c02 * vr->c11);
vr->c21 = conj(vr->c02 * vr->c10 - vr->c00 * vr->c12);
vr->c22 = conj(vr->c00 * vr->c11 - vr->c01 * vr->c10);
}

void restoresu3_in_place(su3* const u) {
double ALIGN n0,n1;

/* normalize rows 1 and 2 */
n0 = 1.0 / sqrt(conj(u->c00) * u->c00 + conj(u->c01) * u->c01 + conj(u->c02) * u->c02);
n1 = 1.0 / sqrt(conj(u->c10) * u->c10 + conj(u->c11) * u->c11 + conj(u->c12) * u->c12);

u->c00 = n0 * u->c00;
u->c01 = n0 * u->c01;
u->c02 = n0 * u->c02;

u->c10 = n1 * u->c10;
u->c11 = n1 * u->c11;
u->c12 = n1 * u->c12;

/* compute row 3 as the conjugate of the cross-product of 1 and 2 */
u->c20 = conj(u->c01 * u->c12 - u->c02 * u->c11);
u->c21 = conj(u->c02 * u->c10 - u->c00 * u->c12);
u->c22 = conj(u->c00 * u->c11 - u->c01 * u->c10);
}

/* Exponentiates a hermitian 3x3 matrix Q */
/* Convenience function -- wrapper around Hasenbusch's implementation */
void exposu3_in_place(su3 *u) {
void exposu3_in_place(su3* const u) {
su3adj ALIGN p;

_trace_lambda(p, *u); /* Projects onto the Gell-Mann matrices */
/* -2.0 to get su3 to su3adjoint consistency ****/
p.d1 *= -0.5; p.d2 *= -0.5; p.d3 *= -0.5; p.d4 *= -0.5;
p.d5 *= -0.5; p.d6 *= -0.5; p.d7 *= -0.5; p.d8 *= -0.5;
*u = exposu3(p);
exposu3(u,&p);
}
9 changes: 5 additions & 4 deletions expo.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
#ifndef _EXPO_H
#define _EXPO_H

extern su3 exposu3(su3adj p);
extern su3 exposu3_check(su3adj p,int im);
extern su3 restoresu3(su3 u);
extern void exposu3_in_place(su3 *u);
extern void exposu3(su3* const vr, const su3adj* const p);
extern void exposu3_check(su3* const vr, const su3adj* const p, int im);
extern void restoresu3(su3* const vr, const su3* const u);
extern void restoresu3_in_place(su3* const u);
extern void exposu3_in_place(su3* const u);

#endif
2 changes: 1 addition & 1 deletion tests/test_su3_algebra.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ TEST(su3_expo_positivedet) {
U.c22 = -0.5279 -0.0022*I;

_trace_lambda(T,Q);
Q = exposu3(T);
exposu3(&Q,&T);

if( creal(Q.c00 - U.c00) > EPS && creal(Q.c01 - U.c01) > EPS && creal(Q.c02 - U.c02) > EPS &&
creal(Q.c10 - U.c10) > EPS && creal(Q.c11 - U.c11) > EPS && creal(Q.c12 - U.c12) > EPS &&
Expand Down
20 changes: 18 additions & 2 deletions update_gauge.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@


void update_gauge(const double step, hamiltonian_field_t * const hf) {

#ifdef OMP
#define static
#pragma omp parallel
{
#endif
int i,mu;
static su3 v,w;
su3 *z;
Expand All @@ -57,17 +61,29 @@ void update_gauge(const double step, hamiltonian_field_t * const hf) {
#pragma pomp inst begin(updategauge)
#endif

#ifdef OMP
#undef static
#endif

#ifdef OMP
#pragma omp for
#endif
for(i = 0; i < VOLUME; i++) {
for(mu = 0; mu < 4; mu++){
/* moment[i][mu] = h_{i,mu}^{alpha} */
xm = &hf->momenta[i][mu];
z = &hf->gaugefield[i][mu];
_su3adj_assign_const_times_su3adj(deriv, step, *xm);
v = restoresu3( exposu3(deriv) );
exposu3(&w,&deriv);
restoresu3(&v,&w);
_su3_times_su3(w, v, *z);
_su3_assign(*z, w);
}
}

#ifdef OMP
} /* OpenMP parallel closing brace */
#endif

#ifdef MPI
/* for parallelization */
Expand Down
2 changes: 1 addition & 1 deletion update_tm.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy,
for(int ix=0;ix<VOLUME;ix++) {
for(int mu=0;mu<4;mu++) {
v=&hf.gaugefield[ix][mu];
*v=restoresu3(*v);
restoresu3_in_place(v);
}
}
}
Expand Down

0 comments on commit 9f495fe

Please sign in to comment.