diff --git a/lib/actions/ferm/fermacts/fermacts_aggregate_w.cc b/lib/actions/ferm/fermacts/fermacts_aggregate_w.cc index 5b008c052..07aa26d87 100644 --- a/lib/actions/ferm/fermacts/fermacts_aggregate_w.cc +++ b/lib/actions/ferm/fermacts/fermacts_aggregate_w.cc @@ -6,6 +6,7 @@ #include "actions/ferm/fermacts/fermacts_aggregate_w.h" #include "actions/ferm/fermacts/unprec_clover_fermact_w.h" +#include "actions/ferm/fermacts/unprec_exp_clover_fermact_w.h" #include "actions/ferm/fermacts/unprec_wilson_fermact_w.h" #include "actions/ferm/fermacts/unprec_parwilson_fermact_w.h" #include "actions/ferm/fermacts/unprec_graphene_fermact_w.h" @@ -102,6 +103,7 @@ namespace Chroma success &= EvenOddPrecCloverFermActEnv::registerAll(); success &= SymEvenOddPrecCloverFermActEnv::registerAll(); success &= UnprecCloverFermActEnv::registerAll(); + success &= UnprecExpCloverFermActEnv::registerAll(); success &= EvenOddPrecCloverOrbifoldFermActEnv::registerAll(); success &= EvenOddPrecSLICFermActEnv::registerAll(); success &= EvenOddPrecSLRCFermActEnv::registerAll(); diff --git a/lib/actions/ferm/linop/exp_clover_term_base_w.h b/lib/actions/ferm/linop/exp_clover_term_base_w.h index 397624265..07b3f908d 100644 --- a/lib/actions/ferm/linop/exp_clover_term_base_w.h +++ b/lib/actions/ferm/linop/exp_clover_term_base_w.h @@ -8,7 +8,7 @@ #include "chroma_config.h" #include "linearop.h" - +#include "actions/ferm/linop/clover_term_base_w.h" namespace Chroma { @@ -19,9 +19,7 @@ namespace Chroma */ template - class ExpCloverTermBase : public DslashLinearOperator, - multi1d > + class ExpCloverTermBase : public CloverTermBase< T, U> { public: //! No real need for cleanup here diff --git a/lib/actions/ferm/linop/exp_clover_term_qdp_w.h b/lib/actions/ferm/linop/exp_clover_term_qdp_w.h index b16c545a0..f2b8cd2ae 100644 --- a/lib/actions/ferm/linop/exp_clover_term_qdp_w.h +++ b/lib/actions/ferm/linop/exp_clover_term_qdp_w.h @@ -309,6 +309,10 @@ namespace Chroma #endif #if 1 + //Set the highest power of A^n for the exp sum. This allows for N_exp_default < 5 to compare with clover + int pow_max=5; + if (N_exp_default <5) + pow_max=N_exp_default; // Accumulate exponential from only A RComplex tmp[12]; @@ -320,7 +324,7 @@ namespace Chroma } // Main loop: chi = psi + q[i]/q[i-1] A chi - for (int pow = 5; pow > 0; --pow) + for (int pow = pow_max; pow > 0; --pow) { siteApplicationBlock(tmp, tri_in.A, cchi); for (int cspin = 0; cspin < 6; cspin++) @@ -399,6 +403,23 @@ namespace Chroma #endif } + //Apply coefficient to site + template + inline void siteApplicationCoeff(RComplex* __restrict__ cchi, const ExpClovTriang& tri_in, + int pow_i,int pow_j, + const RComplex* const __restrict__ ppsi) + { + + // Top block + for (int cspin = 0; cspin < 6; cspin++) + cchi[cspin] = tri_in.C[0][pow_i][pow_j] * ppsi[cspin]; + + // Second Block + for (int cspin = 6; cspin < 12; cspin++) + cchi[cspin] = tri_in.C[1][pow_i][pow_j]* ppsi[cspin]; + } + + template inline void siteApplicationPower(RComplex* __restrict__ cchi, const ExpClovTriang& tri_in, @@ -501,17 +522,73 @@ namespace Chroma * \param cb Checkerboard of OUTPUT std::vector (Read) */ + //! Take deriv of D^power + /*! + * \param chi left std::vector on cb (Read) + * \param psi right std::vector on 1-cb (Read) + * \param isign D'^dag or D' ( MINUS | PLUS ) resp. (Read) + * \param cb Checkerboard of chi std::vector (Read) + * + * \return Computes \f$chi^\dag * \dot(D} * psi\f$ + */ + //! Take deriv of D + /*! + * \param chi left std::vector (Read) + * \param psi right std::vector (Read) + * \param isign D'^dag or D' ( MINUS | PLUS ) resp. (Read) + * + * \return Computes \f$chi^\dag * \dot(D} * psi\f$ + */ + void deriv(multi1d& ds_u, + const T& chi, const T& psi, + enum PlusMinus isign) const;//{ExpCloverTermBase::deriv(ds_u,chi,psi,isign);} + + void deriv(multi1d& ds_u, + const T& chi, const T& psi, + enum PlusMinus isign, int cb) const; + + //! Take deriv of D + /*! + * \param chi left vectors (Read) + * \param psi right vectors (Read) + * \param isign D'^dag or D' ( MINUS | PLUS ) resp. (Read) + * \param cb Checkerboard of chi std::vector (Read) + * + * \return Computes \f$chi^\dag * \dot(D} * psi\f$ + */ + void derivMultipole(multi1d& ds_u, + const multi1d& chi, const multi1d& psi, + enum PlusMinus isign) const; + + //! Take deriv of D + /*! + * \param chi left vectors on cb (Read) + * \param psi right vectors on cb (Read) + * \param isign D'^dag or D' ( MINUS | PLUS ) resp. (Read) + * \param cb Checkerboard of chi std::vector (Read) + * + * \return Computes \f$chi^\dag * \dot(D} * psi\f$ + */ + + void derivMultipole(multi1d& ds_u, + const multi1d& chi, const multi1d& psi, + enum PlusMinus isign, int cb) const; + + void fillRefDiag(Real diag); // Reference exponential using old fashioned taylor expansion void applyRef(T& chi, const T& psi, enum PlusMinus isign, int N = N_exp) const; - // Appl;y a power of a matrix from A^0 to A^5 + // Apply a power of a matrix from A^0 to A^5 void applyPowerSite(T& chi, const T& psi, enum PlusMinus isign, int site, int power = 1) const; - // Appl;y a power of a matrix from A^0 to A^5 + // Apply a power of a matrix from A^0 to A^5 void applyPower(T& chi, const T& psi, enum PlusMinus isign, int cb, int power = 1) const; + // Apply coefficients to powers of a matrix A + void applyCoeff(T& chi, const T& psi, enum PlusMinus isign,int cb, int pow_i, int pow_j) const; + // Apply exponential operator void apply(T& chi, const T& psi, enum PlusMinus isign, int cb) const override; @@ -1021,6 +1098,169 @@ namespace Chroma #endif } + //! Take deriv of D + /*! + * \param chi left std::vector (Read) + * \param psi right std::vector (Read) + * \param isign D'^dag or D' ( MINUS | PLUS ) resp. (Read) + * + * \return Computes \f$\chi^\dag * \dot(D} * \psi\f$ + */ + template + void QDPExpCloverTermT::deriv(multi1d& ds_u, + const T& chi, const T& psi, + enum PlusMinus isign) const + { + START_CODE(); + + // base deriv resizes. + // Even even checkerboard + deriv(ds_u, chi, psi, isign,0); + + // Odd Odd checkerboard + multi1d ds_tmp; + deriv(ds_tmp, chi, psi, isign,1); + + ds_u += ds_tmp; + + END_CODE(); + } + + //! Take deriv of D + /*! + * \param chi left std::vector on cb (Read) + * \param psi right std::vector on 1-cb (Read) + * \param isign D'^dag or D' ( MINUS | PLUS ) resp. (Read) + * \param cb Checkerboard of chi std::vector (Read) + * + * \return Computes \f$\chi^\dag * \dot(D} * \psi\f$ + */ + + template + void QDPExpCloverTermT::deriv(multi1d& ds_u, + const T& chi, const T& psi, + enum PlusMinus isign, int cb) const + { + START_CODE(); + + // Do I still need to do this? + if( ds_u.size() != Nd ) { + ds_u.resize(Nd); + } + + ds_u = zero; + multi1d ds_u_tmp; + ds_u_tmp.resize(Nd); + + // Get the links + //const multi1d& u = getU(); + + T ppsi= zero; + T cchi= zero; + T f_chi= zero; + f_chi=chi; + + // The exp derivative is computed as + // A'+AA'/2+A'A/2+A'AA/6+AA'A/6+AAA'/6 = Sum A^i A' A^j + // applyCoeff multiplies the chi by the exponential term factor + // and the factors from using the Caley Hamilton for A^n, for n>5 + + for(int i=0;i<=5;i++){ + for(int j=0;j<=5;j++){ + (*this).applyCoeff(f_chi, chi, isign,cb,i,j); + (*this).applyPower(ppsi, psi, PLUS, cb, j); + (*this).applyPower(cchi, f_chi, PLUS, cb,i); + + CloverTermBase::deriv(ds_u_tmp,cchi,ppsi,isign,cb); + + for(int i=0;i + void QDPExpCloverTermT::derivMultipole(multi1d& ds_u, + const multi1d& chi, const multi1d& psi, + enum PlusMinus isign) const + { + START_CODE(); + + // base deriv resizes. + // Even even checkerboard + derivMultipole(ds_u, chi, psi, isign,0); + + // Odd Odd checkerboard + multi1d ds_tmp; + derivMultipole(ds_tmp, chi, psi, isign,1); + + ds_u += ds_tmp; + + END_CODE(); + } + + template + void QDPExpCloverTermT::derivMultipole(multi1d& ds_u, + const multi1d& chi, const multi1d& psi, + enum PlusMinus isign, int cb) const + { + START_CODE(); + + + // Do I still need to do this? + if( ds_u.size() != Nd ) { + ds_u.resize(Nd); + } + + ds_u = zero; + multi1d ds_u_tmp; + ds_u_tmp.resize(Nd); + + // Get the links + //const multi1d& u = getU(); + + multi1d ppsi,cchi,f_chi; + + f_chi.resize(chi.size()); + cchi.resize(chi.size()); + ppsi.resize(chi.size()); + + for(int i=0;i5 + + for(int i=0;i<=5;i++){ + for(int j=0;j<=5;j++){ + for(int k=0;k::derivMultipole(ds_u_tmp,cchi,ppsi,isign,cb); + + for(int i=0;i(sign * tab[block][row][i] / (REALT)(fact)); } } - } + + //HMC: adding the calculation of the C_ij + for (int i = 0; i < 6; i++) + { + for (int j = 0; j < 6; j++) + { + tri[site].C[block][i][j] = RScalar(tab[block][0][i])*RScalar(tab[block][0][j]); + } + } + + fact = 1; + unsigned long fact_row = 1; + + for (unsigned int row = 0; row <= N_exp; ++row) + { + if (row!=0) + fact_row *= (unsigned long)(row); + fact=fact_row*(unsigned long)(row+1); + for(unsigned int col = 0; col <= N_exp-row; ++col) + { + if(row!=0 || col!=0) //row=0, col=0 computed above + { + + //This is the factor on the exp = c_n x^n, for the derivative of the n-term x^row x'x^col + //the factor is row+col+1,where row+col=n-1 + if( col !=0) + fact *= (unsigned long)(row+col+1); + for (int i = 0; i < 6; i++) + { + for (int j = 0; j < 6; j++) + { + tri[site].C[block][i][j] += RScalar(tab[block][col][j])*RScalar(tab[block][row][i] / (REALT)(fact)); + } + } + } + } + } + + }//for block ends // Assemble te exponential from the q-s and powers of A. // siteExponentiate(tri[site]); @@ -1409,6 +1687,18 @@ namespace Chroma int power = 1; }; + template + struct ApplyDerivCoeffArgs { + typedef typename WordType::Type_t REALT; + T& chi; + const T& psi; + const ExpClovTriang* tri; + int cb; + int pow_i = 1; + int pow_j = 1; + }; + + template void applySitePowerLoop(int lo, int hi, int MyId, ApplyPowerArgs* arg) { @@ -1452,6 +1742,43 @@ namespace Chroma int cb; }; + template + void applySiteCoeffLoop(int lo, int hi, int MyId, ApplyDerivCoeffArgs* arg) + { +#ifndef QDP_IS_QDPJIT + // This is essentially the body of the previous "Apply" + // but now the args are handed in through user arg struct... + + START_CODE(); + + typedef typename WordType::Type_t REALT; + // Unwrap the args... + T& chi = arg->chi; + const T& psi = arg->psi; + const ExpClovTriang* tri = arg->tri; + int cb = arg->cb; + int pow_i = arg->pow_i; + int pow_j = arg->pow_j; + const int n = 2 * Nc; + + for (int ssite = lo; ssite < hi; ++ssite) + { + + int site = rb[cb].siteTable()[ssite]; + + RComplex* cchi = (RComplex*)&(chi.elem(site).elem(0).elem(0)); + + const RComplex* const ppsi = + (const RComplex* const) & (psi.elem(site).elem(0).elem(0)); + + siteApplicationCoeff(cchi, tri[site], pow_i,pow_j, ppsi); + } + END_CODE(); +#endif + } // Function + + + template void applySiteLoop(int lo, int hi, int MyId, ApplyArgs* arg) { @@ -1546,6 +1873,30 @@ namespace Chroma #endif } + template + void QDPExpCloverTermT::applyCoeff(T& chi, const T& psi, enum PlusMinus isign,int cb, int pow_i, int pow_j) const + { +#ifndef QDP_IS_QDPJIT + START_CODE(); + + if (Ns != 4) + { + QDPIO::cerr << __func__ << ": CloverTerm::apply requires Ns==4" << std::endl; + QDP_abort(1); + } + + QDPExpCloverEnv::ApplyDerivCoeffArgs arg = {chi, psi, tri, cb, pow_i, pow_j}; + int num_sites = rb[cb].siteTable().size(); + + // The dispatch function is at the end of the file + // ought to work for non-threaded targets too... + dispatch_to_threads(num_sites, arg, QDPExpCloverEnv::applySiteCoeffLoop); + (*this).getFermBC().modifyF(chi, QDP::rb[cb]); + + END_CODE(); +#endif + } + template void QDPExpCloverTermT::apply(T& chi, const T& psi, enum PlusMinus isign, int cb) const diff --git a/lib/actions/ferm/linop/unprec_exp_clover_linop_w.cc b/lib/actions/ferm/linop/unprec_exp_clover_linop_w.cc index 77e8fdda5..4ccc9f6c4 100644 --- a/lib/actions/ferm/linop/unprec_exp_clover_linop_w.cc +++ b/lib/actions/ferm/linop/unprec_exp_clover_linop_w.cc @@ -58,6 +58,11 @@ namespace Chroma A.deriv(ds_u, chi, psi, isign); + for (int mu = 0; mu < Nd; mu++) + { + ds_u[mu] *= (Real(Nd) + param.Mass); + } + multi1d ds_tmp(Nd); ds_tmp = zero;