Working version. Incomplete Yukawa potential and madelung constant. I…

…ll-conditioning in Yukawa lattice computation
kcroker · Jan 22, 2016 · ef42f9f · ef42f9f
1 parent 60cf031
commit ef42f9f
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 60 deletions.
diff --git a/forcetree.c b/forcetree.c
@@ -1826,8 +1826,7 @@ int force_treeevaluate_shortrange(int target, int mode)
 
 	  sG = -1;
 
-	  // XXX!
-	  if(0) //r2min > rcut2)
+	  if(r2min > rcut2)
 	    {
 	      /* check whether we can stop walking along this branch */
 	      eff_dist = rcut + 0.5 * nop->len;
@@ -2124,9 +2123,6 @@ int force_treeevaluate_lattice_correction(int target, int mode, double pos_x, do
 	  r2[sG] = dx[sG] * dx[sG] + dy[sG] * dy[sG] + dz[sG] * dz[sG];
 
 	  // KC 1/5/16
-	  // XXX
-	  // WHOA BUDDY
-	  // r2min, r2max are not set if we only have an actual particle!
 	  r2min = r2max = r2[sG];
 	}
       else
@@ -3273,7 +3269,7 @@ void force_treeallocate(int maxnodes, int maxpart)
       // Anything above this is oversampling, because the integrand MUST be zero as far as the machine is concerned
       // (because the normalized greens is bounded above by 1).
       //
-      ngravsPeriodicTable = ngravsConvolutionInit(NTAB, 3, 8);
+      ngravsPeriodicTable = ngravsConvolutionInit(NTAB, 4, 12);
       Z = 1.0/2.0; 
 
       if(!ThisTask)

diff --git a/ngravs.c b/ngravs.c
@@ -824,19 +824,8 @@ double ewald_psi(double x[3])
  *
  */
 
-// KC 1/10/16
-// XXX
-// ym/2 should be on all Yukawa quantities so that a Gauss computation 
-// gives a unit enclosed charge!!
-//
-// For some reason though, puting this in really fucks everything up
-//
-
 /*! A pure Yukawa force
  *
- * Radii take values in [0, BoxLength] for periodic, unconstrained otherwise.
- * If we are in PERIODIC mode, so that the tables do not become sensitive to
- * the BoxLength as given in the configuration files, YUKAWA_IMASS is in units of EN.
  */
 double yukawa(double target, double source, double h, double r, long N) {
 
@@ -850,16 +839,20 @@ double yukawa(double target, double source, double h, double r, long N) {
  */
 double pgyukawa(double target, double source, double k2, double k, long N) {
 
-  //double ym = YUKAWA_IMASS/(2*M_PI);
   double ym = YUKAWA_IMASS/(2*M_PI);
-  return 1.0 / (k2 + ym*ym);
+  double asmth2;
+
+  asmth2 = (2 * M_PI) * All.Asmth[0] / All.BoxSize;
+  asmth2 *= asmth2;
+
+  return 1.0 / (k2 + ym*ym) * exp(-ym*ym*asmth2);
 }
 
 double normed_pgyukawa(double target, double source, double k2, double k, long N) {
 
   // This converts from PMGRID units into shortrange interpolation table units
   double ym = gridKtoNormK(YUKAWA_IMASS/(2*M_PI));
-  return k2 / (k2 + ym*ym);
+  return k2 / (k2 + ym*ym) * exp(-ym*ym*0.25);
 }
 
 /*! This function computes the Madelung constant for the yukawa potential
@@ -1003,10 +996,11 @@ void yukawa_lattice_force(int iii, int jjj, int kkk, double x[3], double force[3
   int i, h[3], n[3], h2;
   double ym;
   double fac;
-
+  
   // KC 11/16/15
   // Note our use of Salin's optimal 'alpha', and our excessive momentum-space
   alpha = 5.64;
+  //alpha = 1.0;
 
   if(iii == 0 && jjj == 0 && kkk == 0)
     return;
@@ -1039,6 +1033,12 @@ void yukawa_lattice_force(int iii, int jjj, int kkk, double x[3], double force[3
   // Looks like this takes the first four images out in position space in each direction (so
   // bracketing by 8 overall)
 
+  // KC 1/19/16
+  // This does not group expressions with the same order of n together in additions.
+  // Change so that we do.
+  //
+  // Note that erfc(x > 27) = 0 in double precision
+  //
   for(n[0] = -5; n[0] <= 5; n[0]++)
     for(n[1] = -5; n[1] <= 5; n[1]++)
       for(n[2] = -5; n[2] <= 5; n[2]++)
@@ -1051,29 +1051,37 @@ void yukawa_lattice_force(int iii, int jjj, int kkk, double x[3], double force[3
   	  // Note, as YUKAWA_IMASS \to zero, we regenerate the Ewald for Coloumb
   	  //	  val = erfc(alpha * r) + 2 * alpha * r / sqrt(M_PI) * exp(-alpha * alpha * r * r);
 
+	  // TYPE I
 	  // 0.5*(A + B) eventually /r^3
-	  val = 0.5000000000000*( exp(ym*r)*gsl_sf_erfc(alpha*r + ym/(2*alpha)) +
-				  exp(-ym*r)*gsl_sf_erfc(alpha*r - ym/(2*alpha)));
+	  val = 0.5*( exp(ym*r)*gsl_sf_erfc(alpha*r + ym/(2*alpha)) +
+		      exp(-ym*r)*gsl_sf_erfc(alpha*r - ym/(2*alpha)));
 
 	  // KC 1/7/16
 	  // For r > rcut, approx = 0.5*exp(ym*r)*erfc(alpha*r + ym/(2*alpha))
 	  // Overall, it will carry a factor of 1/r^2.
 
 	  // 0.5*(A + C*r) eventually /r^3
-	  /* val = 0.5*gsl_sf_erfc(alpha*r - ym/(2*alpha))*exp(-ym*r)*(1 + ym*r); */
-	  /* val += 0.5*gsl_sf_erfc(alpha*r + ym/(2*alpha))*exp(ym*r)*(1 - ym*r); */
+	  /* val = 0.5*gsl_sf_erfc(alpha*r - ym/(2*alpha))*exp(-ym*r)*(1.0/r + ym);  */
+	  /* val += 0.5*gsl_sf_erfc(alpha*r + ym/(2*alpha))*exp(ym*r)*(1.0/r - ym); */
 
 	  // 0.5*(B + D*r) eventually /r^3
 
+	  // TYPE I
   	  for(i = 0; i < 3; i++)
   	    force[i] -= dx[i] / (r * r * r) * val;
 
+	  /* for(i = 0; i < 3; i++) */
+  	  /*   force[i] -= dx[i] / (r * r) * val; */
+
+	  // TYPE II
 	  // Now E
-	  /* val = 2*alpha*exp(-alpha*alpha*r*r-ym*ym/(4*alpha*alpha))/sqrt(M_PI); */
+	  /* val += 2*alpha*exp(-alpha*alpha*r*r-ym*ym/(4*alpha*alpha))/sqrt(M_PI); */
 
 	  // 0.5*ym*(C +1 D) + E eventually /r^2
-  	  val = 0.5000000000000*ym*(-exp(ym*r)*gsl_sf_erfc(alpha*r + ym/(2*alpha)) +
-				    exp(-ym*r)*gsl_sf_erfc(alpha*r - ym/(2*alpha))) +
+	  // 
+	  // The subtraction here could destroy accuracy?
+  	  val = 0.5*ym*(-exp(ym*r)*gsl_sf_erfc(alpha*r + ym/(2*alpha)) +
+			exp(-ym*r)*gsl_sf_erfc(alpha*r - ym/(2*alpha))) +
   	    2*alpha*exp(-alpha*alpha*r*r-ym*ym/(4*alpha*alpha))/sqrt(M_PI);
 
 	  // KC 1/7/16
@@ -1320,11 +1328,6 @@ FLOAT fourierIntegrand(FLOAT k, gravity normKGreen, FLOAT Z) {
 
   FLOAT k2 = k*k;
 
-  // XXX?
-  // But if we fuck with this, we'll break newton.  The exponential factor
-  // seems to be correct because it works with newton, where the normgreens = 1.0
-  //
-  // But if this were the problem, it would only be on the lowend of the force...
   return (*normKGreen)(1, 1, k2, k, 1) * exp(-k2 * Z * Z);
 }
 
@@ -1369,9 +1372,8 @@ int performConvolution(struct ngravsInterpolant *s, gravity normKGreen, FLOAT Z,
   }
 
   // 1) FFTW needs this loaded in wonk order
-  // XXX? Should this be explicitly zero'd to avoid a constant force contribution
-  // to the corretion tabulations?  Apparently not, because the k = 0 for
-  // normed newton is 1 and that works...
+  // Note we need zero power in order to compute the 
+  // potential term correctly.
   in[0].re = fourierIntegrand(jTok(0, Z, s), normKGreen, Z);
 
   for(j = 1; j < s->ngravs_tpm_n/2; ++j) {
@@ -1390,23 +1392,18 @@ int performConvolution(struct ngravsInterpolant *s, gravity normKGreen, FLOAT Z,
   /*   printf("%.15f %.15f\n", mTox(m, s), out[m].re*norm); */
   /* exit(0); */
 
+  // ???
   sum = s->ngravs_tpm_n;
 
   for(m = 0; m < s->ntab; ++m)
     oRes[m] = out[gadgetToFourier(m, s)].re * norm;
- 
+
   // 3) Integrate so as to constrain the error correctly:
   // Newton-Cotes 4-point rule
   // Run the sum at double precision, though we may assign to lower precision
-  //
-  // KC 1/5/16
-  // XXX?
-  // The first term of the integrated quantity should be zero, as it corresponds to r=0?
-  //
-  // Is there an off by one error here?
-  //sum = 0.0;
-  // Remove the lowest 
+  // First term of in[] should be zero.
   in[0].re = 0.0;
+  sum = 0.0;
   for(m = 0; m < s->ngravs_tpm_n-3; m += 3) {
     sum += (mTox(m+3, s) - mTox(m, s)) * 0.125 * norm * (out[m].re + 3.0*out[m+1].re + 3.0*out[m+2].re + out[m+3].re);
 

diff --git a/pm_periodic.c b/pm_periodic.c
@@ -138,7 +138,6 @@ void pm_init_periodic_allocate(int dimprod)
   int dimprodmax;
   double bytes_tot = 0;
   size_t bytes;
-  int n, m;
 
   MPI_Allreduce(&dimprod, &dimprodmax, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
 
@@ -184,7 +183,6 @@ void pm_init_periodic_allocate(int dimprod)
  */
 void pm_init_periodic_free(void)
 {
-  int n,m;
   /* allocate the memory to hold the FFT fields */
   free(workspace);
   free(forcegrid);
@@ -488,14 +486,13 @@ void pmforce_periodic(void)
 		//
 		// We also apply the short range truncation, here in k-space
 		//
-		//
 		// This is the only place asmth has entered anywhere!
 		smth = (*GreensFxns[nA][nB])(All.MassTable[nA], All.MassTable[nB], k2, sqrt(k2), 1);
-		smth *= -exp(-k2 * asmth2) * ff * ff * ff * ff;
+		smth *= -exp(-k2*asmth2) * ff * ff * ff * ff;
 
 		// NOTE: Transposed order of indicies due to FFTW in k-space
 		ip = PMGRID * (PMGRID / 2 + 1) * (y - slabstart_y) + (PMGRID / 2 + 1) * x + z;
-
+		
 		// KC 27.9.15
 		// 
 		// CONSTRAINT: 
@@ -1061,11 +1058,11 @@ void pmpotential_periodic(void)
 	    }
 
       // 12/31/15
-      // XXX!  Need to be adjusted to check for nan.  If so, then do this.  Otherwise, 
-      // we need the DC power.
-      // This looks like a check to set the DC power to zero....
-      /* if(slabstart_y == 0) */
-      /* 	fft_of_rhogrid[0].re = fft_of_rhogrid[0].im = 0.0; */
+      // Check for nan.  If so, set DC power to zero (this was what was done previously)
+      if(slabstart_y == 0) {
+	if(fft_of_rhogrid[0].re != fft_of_rhogrid[0].re) // IEEE spec!
+	  fft_of_rhogrid[0].re = fft_of_rhogrid[0].im = 0.0;
+      }
 
       /* Do the FFT to get the potential */
 

diff --git a/rdep.py b/rdep.py
@@ -46,11 +46,29 @@ def __init__(self, ptype, crap2, crap3, x, y, z, fx, fy, fz, jx, jy, jz, cfx, cf
 # cen = np.array([4967.0, 4967.0, 4967.0])
 cen = np.array([random.random()*L for x in range(3)])
 
-# These separations probe the default TPM transition scale
-seps_pmxition = [10 + n*(400 - 10)/(N/2) for n in range(N/2)]
-seps_longrange = [400 + n*(L - 400)/(N/2) for n in range(N/2)]
+seps_pmxition = []
+seps_longrange = []
+rutile = raw_input("Enter custom range (min,max) [Enter] for default: ")
+bounds = [float(x) for x in rutile.split(',')]
+
+if len(bounds) > 1:
+
+    # Reorder if necessary
+    if bounds[0] > bounds[1]:
+        tmp = bounds[0]
+        bounds[0] = bounds[1]
+        bounds[1] = tmp
+
+    # Assign the partitioning
+    seps_pmxition = []
+    seps_longrange = [bounds[0] + n*(bounds[1] - bounds[0])/N for n in range(N)]
+else:
+    print "Using default ranges over entire length"
+
+    # These separations probe the default TPM transition scale
+    seps_pmxition = [10 + n*(400 - 10)/(N/2) for n in range(N/2)]
+    seps_longrange = [400 + n*(L - 400)/(N/2) for n in range(N/2)]
 
-#seps = [L/s for s in range(1,N+1)]
 seps = [x for x in seps_pmxition + seps_longrange]
 print seps
 
@@ -60,7 +78,7 @@ def __init__(self, ptype, crap2, crap3, x, y, z, fx, fy, fz, jx, jy, jz, cfx, cf
     if not os.path.exists("./%s/%s" % (proggyName, label)):
         os.mkdir("./%s/%s" % (proggyName, label))
     else:
-        print "%s run already exists.  Refusing to overwrite."
+        print "%s run already exists.  Refusing to overwrite." % label
         sys.exit()
 
     for i,d in enumerate(seps):