|
| 1 | +/*********************************************************************** |
| 2 | + * |
| 3 | + * Copyright (C) 2013 Albert Deuzeman |
| 4 | + * |
| 5 | + * This file is part of tmLQCD. |
| 6 | + * |
| 7 | + * tmLQCD is free software: you can redistribute it and/or modify |
| 8 | + * it under the terms of the GNU General Public License as published by |
| 9 | + * the Free Software Foundation, either version 3 of the License, or |
| 10 | + * (at your option) any later version. |
| 11 | + * |
| 12 | + * tmLQCD is distributed in the hope that it will be useful, |
| 13 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | + * GNU General Public License for more details. |
| 16 | + * |
| 17 | + * You should have received a copy of the GNU General Public License |
| 18 | + * along with tmLQCD. If not, see <http://www.gnu.org/licenses/>. |
| 19 | + * |
| 20 | + ************************************************************************/ |
| 21 | + |
| 22 | +#if HAVE_CONFIG_H |
| 23 | +#include <config.h> |
| 24 | +#endif |
| 25 | +#include <math.h> |
| 26 | +#include <complex.h> |
| 27 | + |
| 28 | +#include "su3.h" |
| 29 | + |
| 30 | +#ifndef OMP |
| 31 | +static |
| 32 | +#endif |
| 33 | +void exponent_from_coefficients(su3 *out, _Complex double f0, _Complex double f1, _Complex double f2, su3 const *in) |
| 34 | +{ |
| 35 | + su3 ALIGN tmp; |
| 36 | + _complex_times_su3(tmp, f2, *in); |
| 37 | + _su3_add_equals_complex_identity(tmp, f1); |
| 38 | + _su3_times_su3(*out, tmp, *in); |
| 39 | + _su3_add_equals_complex_identity(*out, f0); |
| 40 | +} |
| 41 | + |
| 42 | +void cayley_hamilton_exponent(su3* expA, su3 const *A) |
| 43 | +{ |
| 44 | + static double const fac_1_3 = 1 / 3.0; |
| 45 | + |
| 46 | + _Complex double f0,f1,f2; |
| 47 | + |
| 48 | + /* c0 = det[A] */ |
| 49 | + double c0 = I * (A->c00 * (A->c11 * A->c22 - A->c12 * A->c21) + |
| 50 | + A->c01 * (A->c12 * A->c20 - A->c10 * A->c22) + |
| 51 | + A->c02 * (A->c10 * A->c21 - A->c11 * A->c20) ); |
| 52 | + |
| 53 | + /* c1 = 0.5 * Tr[AA] */ |
| 54 | + double c1 = -0.5 * (A->c00 * A->c00 + A->c01 * A->c10 + A->c02 * A->c20 + |
| 55 | + A->c10 * A->c01 + A->c11 * A->c11 + A->c12 * A->c21 + |
| 56 | + A->c20 * A->c02 + A->c21 * A->c12 + A->c22 * A->c22 ); |
| 57 | + |
| 58 | + /* There is a special, but common (cold start) case where the given matrix is actually 0! |
| 59 | + * We need to account for it. */ |
| 60 | + if (c0 == 0 && c1 == 0) |
| 61 | + { |
| 62 | + _su3_one(*expA); |
| 63 | + f1 = I; |
| 64 | + f2 = -0.5; |
| 65 | + return; |
| 66 | + } |
| 67 | + |
| 68 | + /* P&M give symmetry relations that can be used when c0 < 0, to avoid the numerically problematic c0 -> -c0_max limit. |
| 69 | + We note the sign here for future reference, then continue with c0 as if it were positive. */ |
| 70 | + int c0_negative = (c0 < 0); |
| 71 | + c0 = fabs(c0); |
| 72 | + |
| 73 | + /* The call to fmin below is needed, because for small deviations alpha from zero -- O(10e-12) -- rounding errors can cause c0 > c0max by epsilon. |
| 74 | + In that case, acos(c0/c0max) will produce NaNs, whereas the mathematically correct conclusion would be that theta is zero to machine precision! |
| 75 | + Note that this approach will *not* produce identity and zero for all output, but rather the correct answer of order (I + alpha) for exp(iQ). */ |
| 76 | + |
| 77 | + double c0max = 2.0 * pow(fac_1_3 * c1, 1.5); |
| 78 | + double theta_3 = fac_1_3 * acos(fmin(c0 / c0max, 1.0)); |
| 79 | + |
| 80 | + double u = sqrt(fac_1_3 * c1) * cos(theta_3); |
| 81 | + double w = sqrt(c1) * sin(theta_3); |
| 82 | + |
| 83 | + /* Calculate and cache some repeating factors. * |
| 84 | + * We can fold in the sign immediately -- c.f. f_j(-c0, c1) = -1^j * conj(f_j(c0, c1)) |
| 85 | + * This should just amount to potentially adding a minus to all imaginary components and an overall phase for f1. */ |
| 86 | + _Complex double ma = cexp(2 * I * u); |
| 87 | + _Complex double mb = cexp(-I * u); |
| 88 | + double cw = cos(w); |
| 89 | + double u2 = u * u; |
| 90 | + double w2 = w * w; |
| 91 | + |
| 92 | + /* Modification w.r.t. Peardon & Morningstar: w is always positive, so |w| = w */ |
| 93 | + double xi0 = (w > 0.05) ? (sin(w) / w) |
| 94 | + : 1 - 0.16666666666666667 * w2 * (1 - 0.05 * w2 * (1 - 0.023809523809523808 * w2)); |
| 95 | + double divisor = 1.0 / (9.0 * u2 - w2); |
| 96 | + |
| 97 | + f0 = divisor * (ma * (u * u - w * w) + mb * (8 * u * u * cw + 2 * I * u * (3 * u * u + w * w) * xi0)); |
| 98 | + f1 = divisor * (-2 * I * u * ma + mb * (2 * I * u * cw + (3 * u * u - w * w) * xi0)); |
| 99 | + f2 = divisor * (mb * (cw + 3 * I * u * xi0) - ma); |
| 100 | + |
| 101 | + /* The first point where we use the symmetry relations to calculate the negative c0 possibility */ |
| 102 | + if (c0_negative) |
| 103 | + { |
| 104 | + f0 = conj(f0); |
| 105 | + f1 = conj(f1); |
| 106 | + f2 = conj(f2); |
| 107 | + } |
| 108 | + |
| 109 | + exponent_from_coefficients(expA, f0, f1, f2, A); |
| 110 | + |
| 111 | + return; |
| 112 | + } |
| 113 | + |
| 114 | +void project_traceless_antiherm(su3 *in) |
| 115 | +{ |
| 116 | + static const double fac_3 = 1.00 / 3.00; |
| 117 | + double tr_in = fac_3 * (cimag(in->c00) + cimag(in->c11) + cimag(in->c22)); |
| 118 | + |
| 119 | + in->c00 = (cimag(in->c00) - tr_in) * I; |
| 120 | + in->c11 = (cimag(in->c11) - tr_in) * I; |
| 121 | + in->c22 = (cimag(in->c22) - tr_in) * I; |
| 122 | + |
| 123 | + in->c01 -= conj(in->c10); |
| 124 | + in->c01 *= 0.50; |
| 125 | + in->c10 = -conj(in->c01); |
| 126 | + |
| 127 | + in->c02 -= conj(in->c20); |
| 128 | + in->c02 *= 0.50; |
| 129 | + in->c20 = -conj(in->c02); |
| 130 | + |
| 131 | + in->c12 -= conj(in->c21); |
| 132 | + in->c12 *= 0.50; |
| 133 | + in->c21 = -conj(in->c12); |
| 134 | +} |
| 135 | + |
0 commit comments