From ccbb1e82af78a734101c18f5bb2a54b32c116c73 Mon Sep 17 00:00:00 2001 From: Eduard Valeyev Date: Thu, 12 Dec 2024 00:24:02 -0500 Subject: [PATCH] GaussianConvolution1D supports range restriction on kernel --- src/madness/mra/convolution1d.h | 151 ++++++++++++++++++++++++-------- 1 file changed, 114 insertions(+), 37 deletions(-) diff --git a/src/madness/mra/convolution1d.h b/src/madness/mra/convolution1d.h index 9ed2d28b553..dc96b67ccc5 100644 --- a/src/madness/mra/convolution1d.h +++ b/src/madness/mra/convolution1d.h @@ -41,10 +41,11 @@ #include #include #include +#include #include /// \file mra/convolution1d.h -/// \brief Compuates most matrix elements over 1D operators (including Gaussians) +/// \brief Computes most matrix elements over 1D operators (including Gaussians) /// \ingroup function @@ -260,28 +261,38 @@ namespace madness { int k; ///< Wavelet order int npt; ///< Number of quadrature points (is this used?) int maxR; ///< Number of lattice translations for sum + double bloch_k; ///< k in exp(i k R) Bloch phase factor folded into lattice sum + unsigned int D; ///< kernel range limited to [-D/2,D/2] (in simulation cell units), useful for finite-range convolutions with periodic functions; for infinite-range use lattice summation (maxR > 0) Tensor quad_x; Tensor quad_w; Tensor c; Tensor hgT, hg; Tensor hgT2k; - double bloch_k; ///< k in exp(i k R) Bloch phase factor folded into lattice sum mutable SimpleCache, 1> rnlp_cache; mutable SimpleCache, 1> rnlij_cache; mutable SimpleCache, 1> ns_cache; mutable SimpleCache, 2> mod_ns_cache; + static unsigned int maxD() { return std::numeric_limits::max(); } + bool lattice_summed() const { return maxR != 0; } + bool range_limited() const { return D != maxD(); } + virtual ~Convolution1D() {}; - Convolution1D(int k, int npt, int maxR, double bloch_k = 0.0) + Convolution1D(int k, int npt, int maxR, + double bloch_k = 0.0, + unsigned int D = maxD()) : k(k) , npt(npt) , maxR(maxR) , quad_x(npt) , quad_w(npt) , bloch_k(bloch_k) + , D(D) { + if (range_limited()) MADNESS_CHECK(!lattice_summed()); + auto success = autoc(k,&c); MADNESS_CHECK(success); @@ -306,22 +317,39 @@ namespace madness { /// Returns true if the block of rnlp is expected to be small including periodicity bool get_issmall(Level n, Translation lx) const { - if (maxR == 0) { - return issmall(n, lx); + if (lattice_summed()) { + Translation twon = Translation(1) << n; + for (int R = -maxR; R <= maxR; ++R) { + if (!issmall(n, R * twon + lx)) + return false; } + return true; + } else { // !lattice_summed + if (!range_limited()) + return issmall(n, lx); else { - Translation twon = Translation(1)< 0 || lx < -1; + } else { // n > 0 + if (lx >= 0) + result = (1 << (n - 1)) * Translation(D) <= lx; + else + result = (-(1 << (n - 1)) * Translation(D)) > lx; } + } + return result; } /// Returns the level for projection - //virtual Level natural_level() const { - // return 13; - //} virtual Level natural_level() const {return 13;} /// Computes the transition matrix elements for the convolution for n,l @@ -333,6 +361,7 @@ namespace madness { /// This is computed from the matrix elements over the correlation /// function which in turn are computed from the matrix elements /// over the double order legendre polynomials. + /// \note if `this->range_limited()==true`, `θ(D/2 - |x-y|) K(x-y)` is used as the kernel const Tensor& rnlij(Level n, Translation lx, bool do_transpose=false) const { const Tensor* p=rnlij_cache.getptr(n,lx); if (p) return *p; @@ -517,7 +546,7 @@ namespace madness { else { // PROFILE_BLOCK(Convolution1Drnlp); // Too fine grain for routine profiling - if (maxR > 0) { + if (lattice_summed()) { Translation twon = Translation(1)<(2*k); for (int R=-maxR; R<=maxR; ++R) { @@ -631,7 +660,7 @@ namespace madness { } } - virtual Level natural_level() const {return op.natural_level();} + virtual Level natural_level() const final {return op.natural_level();} struct Shmoo { typedef Tensor returnT; @@ -654,12 +683,12 @@ namespace madness { } }; - Tensor rnlp(Level n, Translation lx) const { + Tensor rnlp(Level n, Translation lx) const final { return adq1(lx, lx+1, Shmoo(n, lx, this), 1e-12, this->npt, this->quad_x.ptr(), this->quad_w.ptr(), 0); } - bool issmall(Level n, Translation lx) const { + bool issmall(Level n, Translation lx) const final { if (lx < 0) lx = 1 - lx; // Always compute contributions to nearest neighbor coupling // ... we are two levels below so 0,1 --> 0,1,2,3 --> 0,...,7 @@ -696,8 +725,9 @@ namespace madness { const int m; ///< Order of derivative (0, 1, or 2 only) explicit GaussianConvolution1D(int k, Q coeff, double expnt, - int m, bool periodic, double bloch_k = 0.0) - : Convolution1D(k,k+11,maxR(periodic,expnt),bloch_k) + int m, bool periodic, double bloch_k = 0.0, + unsigned int D = Convolution1D::maxD()) + : Convolution1D(k,k+11,maxR(periodic,expnt),bloch_k, D) , coeff(coeff) , expnt(expnt) , natlev(Level(0.5*log(expnt)/log(2.0)+1)) @@ -715,7 +745,7 @@ namespace madness { virtual ~GaussianConvolution1D() {} - virtual Level natural_level() const { + virtual Level natural_level() const final { return natlev; } @@ -736,12 +766,37 @@ namespace madness { /// \code /// beta = alpha * 2^(-2*n) /// \endcode - Tensor rnlp(Level n, Translation lx) const { + Tensor rnlp(Level n, const Translation lx) const final { int twok = 2*this->k; Tensor v(twok); // Can optimize this away by passing in - - Translation lkeep = lx; - if (lx<0) lx = -lx-1; + KahanAccumulator v_accumulator[twok]; + constexpr bool use_kahan = false; // change to true to use Kahan accumulator + + // if outside the range, early return, else update the integration limits + std::pair integration_limits{0,1}; + if (this->range_limited()) { + const auto two_to_nm1 = (1ul << n) * 0.5; + if (lx < 0) { + integration_limits = std::make_pair( + std::min(std::max(-two_to_nm1 * this->D - lx, 0.), 1.), 1.); + } else { + integration_limits = std::make_pair( + 0., std::max(std::min(two_to_nm1 * this->D - lx, 1.), 0.)); + } + // early return if empty integration range (this indicates that + // the range restriction makes the kernel zero everywhere in the box) + if (integration_limits.first == integration_limits.second) { + MADNESS_ASSERT(this->outside_the_range(n, lx)); + return v; + } + else { + MADNESS_ASSERT(!this->outside_the_range(n, lx)); + } + } + // integration range lower bound, upper bound, length + const auto x0 = integration_limits.first; + const auto x1 = integration_limits.second; + const auto L = x1 - x0; /* Apply high-order Gauss Legendre onto subintervals @@ -780,7 +835,7 @@ namespace madness { double h = 1.0/sqrt(beta); // 2.0*sqrt(0.5/beta); long nbox = long(1.0/h); if (nbox < 1) nbox = 1; - h = 1.0/nbox; + h = L/nbox; // Find argmax such that h*scaledcoeff*exp(-argmax)=1e-22 ... if // beta*xlo*xlo is already greater than argmax we can neglect this @@ -793,10 +848,29 @@ namespace madness { else if (m == 2) sch *= expnt*expnt; double argmax = std::abs(log(1e-22/sch)); // perhaps should be -log(1e-22/sch) ? - for (long box=0; box argmax) break; - for (long i=0; inpt; ++i) { + // to screen need to iterate over boxes in the order of decreasing kernel values + const bool left_to_right = lx >= 0; + // if going left-to-right, start at left, else at right + const double xstartedge = left_to_right ? x0+lx : lx + 1; + + // with oscillatory integrands the heuristic for reducing roundoff + // is to sum from large to small, i.e. proceed in same direction as the order of boxes + // WARNING: the grid points in quad_{x,w} are in order of decreasing x! + // hence decrement grid point indices for left_to_right, increment otherwise + const long first_pt = left_to_right ? this->npt-1: 0; + const long sentinel_pt = left_to_right ? -1 : this->npt; + const auto next_pt = [lx, left_to_right](auto i) { return left_to_right ? i-1 : i+1; }; + + double xlo = left_to_right ? xstartedge : xstartedge-h; + double xhi; + for (long box=0; box!=nbox; ++box, xlo = (left_to_right ? xhi : xlo-h)) { + + // can ignore this and rest of boxes if the Gaussian has decayed enough at the side of the box closest to the origin + xhi=xlo+h; + const auto xabs_min = std::min(std::abs(xhi),std::abs(xlo)); + if (beta*xabs_min*xabs_min > argmax) break; + + for (long i=first_pt; i!=sentinel_pt; i=next_pt(i)) { #ifdef IBMXLC double phix[80]; #else @@ -814,22 +888,25 @@ namespace madness { } legendre_scaling_functions(xx-lx,twok,phix); - for (long p=0; p(v_accumulator[p]); } return v; - }; + } /// Returns true if the block is expected to be small - bool issmall(Level n, Translation lx) const { + bool issmall(Level n, Translation lx) const final { double beta = expnt * pow(0.25,double(n)); Translation ll; if (lx > 0)