forked from etmc/tmLQCD
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
OpenMP parallelization in update_gauge
expo functions with pointers and no return value
- Loading branch information
Showing
6 changed files
with
88 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,3 +8,5 @@ read_input.c | |
*~ | ||
Makefile | ||
config.log | ||
*.mod.c | ||
*.opari.inc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
/*********************************************************************** | ||
* Copyright (C) 2001 Martin Hasenbusch | ||
* Copyright (C) 2012 Bartosz Kostrzewa (void versions) | ||
* | ||
* This file is part of tmLQCD. | ||
* | ||
|
@@ -20,11 +21,12 @@ | |
* | ||
* | ||
* The externally accessible functions are | ||
* | ||
* su3 exposu3(su3adj p) | ||
* su3 exposu3_check(su3adj p) | ||
* su3 restoresu3(su3 u) | ||
* Returns an element of su3 | ||
* | ||
* void exposu3(su3* const vr, const su3adj* const p); | ||
* extern void exposu3_check(su3* const vr, const su3adj* const p, int im); | ||
* extern void restoresu3(su3* const vr,const su3* const u); | ||
* extern void restoresu3_in_place(su3* const u); | ||
* extern void exposu3_in_place(su3* const u); | ||
* | ||
* Author: Martin Hasenbusch <[email protected]> | ||
* Tue Aug 28 10:06:56 MEST 2001 | ||
|
@@ -51,15 +53,15 @@ | |
#include "su3adj.h" | ||
#include "expo.h" | ||
|
||
su3 exposu3(su3adj p) { | ||
void exposu3(su3* const vr, const su3adj* const p) { | ||
int i; | ||
su3 ALIGN v,v2,vr; | ||
su3 ALIGN v,v2; | ||
double ALIGN fac,r; | ||
double ALIGN a,b; | ||
_Complex double ALIGN a0,a1,a2,a1p; | ||
|
||
/* it writes 'p=vec(h_{j,mu})' in matrix form 'v' */ | ||
_make_su3(v,p); | ||
_make_su3(v,*p); | ||
/* calculates v^2 */ | ||
_su3_times_su3(v2,v,v); | ||
/* */ | ||
|
@@ -83,73 +85,87 @@ su3 exposu3(su3adj p) { | |
r -= 1.0; | ||
} | ||
/* vr = a0 + a1*v + a2*v2 */ | ||
vr.c00 = a0 + a1 * v.c00 + a2 * v2.c00; | ||
vr.c01 = a1 * v.c01 + a2 * v2.c01; | ||
vr.c02 = a1 * v.c02 + a2 * v2.c02; | ||
vr.c10 = a1 * v.c10 + a2 * v2.c10; | ||
vr.c11 = a0 + a1 * v.c11 + a2 * v2.c11; | ||
vr.c12 = a1 * v.c12 + a2 * v2.c12; | ||
vr.c20 = a1 * v.c20 + a2 * v2.c20; | ||
vr.c21 = a1 * v.c21 + a2 * v2.c21; | ||
vr.c22 = a0 + a1 * v.c22 + a2 * v2.c22; | ||
return vr; | ||
vr->c00 = a0 + a1 * v.c00 + a2 * v2.c00; | ||
vr->c01 = a1 * v.c01 + a2 * v2.c01; | ||
vr->c02 = a1 * v.c02 + a2 * v2.c02; | ||
vr->c10 = a1 * v.c10 + a2 * v2.c10; | ||
vr->c11 = a0 + a1 * v.c11 + a2 * v2.c11; | ||
vr->c12 = a1 * v.c12 + a2 * v2.c12; | ||
vr->c20 = a1 * v.c20 + a2 * v2.c20; | ||
vr->c21 = a1 * v.c21 + a2 * v2.c21; | ||
vr->c22 = a0 + a1 * v.c22 + a2 * v2.c22; | ||
} | ||
|
||
su3 exposu3_check(su3adj p, int im) { | ||
|
||
void exposu3_check(su3* const vr, const su3adj* const p, int im) { | ||
/* compute the result by taylor series */ | ||
su3 ALIGN v,v2,v3,vr; | ||
su3 ALIGN v,v2,v3; | ||
double ALIGN fac; | ||
int i; | ||
|
||
_make_su3(v, p); | ||
_su3_one(vr); | ||
_su3_acc(vr, v); | ||
_make_su3(v, *p); | ||
_su3_one(*vr); | ||
_su3_acc(*vr, v); | ||
_su3_times_su3(v2, v, v); | ||
_su3_refac_acc(vr, 0.5, v2); | ||
_su3_refac_acc(*vr, 0.5, v2); | ||
fac = 0.5; | ||
for(i = 3; i <= im; i++) { | ||
fac = fac/i; | ||
_su3_times_su3(v3, v2, v); | ||
_su3_refac_acc(vr, fac, v3); | ||
_su3_refac_acc(*vr, fac, v3); | ||
_su3_assign(v2, v3); | ||
} | ||
return vr; | ||
} | ||
|
||
|
||
su3 restoresu3(su3 u) { | ||
su3 ALIGN vr; | ||
void restoresu3(su3* const vr, const su3* const u) { | ||
double ALIGN n0,n1; | ||
|
||
/* normalize rows 1 and 2 */ | ||
n0 = 1.0 / sqrt(conj(u.c00) * u.c00 + conj(u.c01) * u.c01 + conj(u.c02) * u.c02); | ||
n1 = 1.0 / sqrt(conj(u.c10) * u.c10 + conj(u.c11) * u.c11 + conj(u.c12) * u.c12); | ||
n0 = 1.0 / sqrt(conj(u->c00) * u->c00 + conj(u->c01) * u->c01 + conj(u->c02) * u->c02); | ||
n1 = 1.0 / sqrt(conj(u->c10) * u->c10 + conj(u->c11) * u->c11 + conj(u->c12) * u->c12); | ||
|
||
vr.c00 = n0 * u.c00; | ||
vr.c01 = n0 * u.c01; | ||
vr.c02 = n0 * u.c02; | ||
vr->c00 = n0 * u->c00; | ||
vr->c01 = n0 * u->c01; | ||
vr->c02 = n0 * u->c02; | ||
|
||
vr.c10 = n1 * u.c10; | ||
vr.c11 = n1 * u.c11; | ||
vr.c12 = n1 * u.c12; | ||
vr->c10 = n1 * u->c10; | ||
vr->c11 = n1 * u->c11; | ||
vr->c12 = n1 * u->c12; | ||
|
||
/* compute row 3 as the conjugate of the cross-product of 1 and 2 */ | ||
vr.c20 = conj(vr.c01 * vr.c12 - vr.c02 * vr.c11); | ||
vr.c21 = conj(vr.c02 * vr.c10 - vr.c00 * vr.c12); | ||
vr.c22 = conj(vr.c00 * vr.c11 - vr.c01 * vr.c10); | ||
|
||
return vr; | ||
vr->c20 = conj(vr->c01 * vr->c12 - vr->c02 * vr->c11); | ||
vr->c21 = conj(vr->c02 * vr->c10 - vr->c00 * vr->c12); | ||
vr->c22 = conj(vr->c00 * vr->c11 - vr->c01 * vr->c10); | ||
} | ||
|
||
void restoresu3_in_place(su3* const u) { | ||
double ALIGN n0,n1; | ||
|
||
/* normalize rows 1 and 2 */ | ||
n0 = 1.0 / sqrt(conj(u->c00) * u->c00 + conj(u->c01) * u->c01 + conj(u->c02) * u->c02); | ||
n1 = 1.0 / sqrt(conj(u->c10) * u->c10 + conj(u->c11) * u->c11 + conj(u->c12) * u->c12); | ||
|
||
u->c00 = n0 * u->c00; | ||
u->c01 = n0 * u->c01; | ||
u->c02 = n0 * u->c02; | ||
|
||
u->c10 = n1 * u->c10; | ||
u->c11 = n1 * u->c11; | ||
u->c12 = n1 * u->c12; | ||
|
||
/* compute row 3 as the conjugate of the cross-product of 1 and 2 */ | ||
u->c20 = conj(u->c01 * u->c12 - u->c02 * u->c11); | ||
u->c21 = conj(u->c02 * u->c10 - u->c00 * u->c12); | ||
u->c22 = conj(u->c00 * u->c11 - u->c01 * u->c10); | ||
} | ||
|
||
/* Exponentiates a hermitian 3x3 matrix Q */ | ||
/* Convenience function -- wrapper around Hasenbusch's implementation */ | ||
void exposu3_in_place(su3 *u) { | ||
void exposu3_in_place(su3* const u) { | ||
su3adj ALIGN p; | ||
|
||
_trace_lambda(p, *u); /* Projects onto the Gell-Mann matrices */ | ||
/* -2.0 to get su3 to su3adjoint consistency ****/ | ||
p.d1 *= -0.5; p.d2 *= -0.5; p.d3 *= -0.5; p.d4 *= -0.5; | ||
p.d5 *= -0.5; p.d6 *= -0.5; p.d7 *= -0.5; p.d8 *= -0.5; | ||
*u = exposu3(p); | ||
exposu3(u,&p); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters