FIXED: loss of precision in transition tabulation when operating with…

… singles (FFT must be at double precision in tabulations. This is still a huge performance bottleneck)
kcroker · Feb 8, 2016 · 34ad97c · 34ad97c
1 parent 61e9e30
commit 34ad97c
Show file tree

Hide file tree

Showing 7 changed files with 35 additions and 19 deletions.
diff --git a/Makefile.reference b/Makefile.reference
@@ -168,7 +168,7 @@ OBJS   = main.o  run.o  predict.o begrun.o endrun.o global.o  \
 	 domain.o  allvars.o potential.o  \
          forcetree.o   peano.o gravtree_forcetest.o \
 	 pm_periodic.o pm_nonperiodic.o longrange.o \
-	ngravs.o ngravs_core.o
+	 ngravs.o ngravs_core.o
 
 INCL   = allvars.h  proto.h  tags.h  Makefile
 

diff --git a/domain.c b/domain.c
@@ -194,6 +194,10 @@ void domain_decompose(void)
     }
   free(temp);
 
+  // KC 2/7/16
+  // Note that this only checks the active softenings,
+  // the only ones that affect particles present in the 
+  // any particular initial condition.
 #ifndef UNEQUALSOFTENINGS
   for(i = 0; i < 6; i++)
     if(Ntype[i] > 0)

diff --git a/forcetree.c b/forcetree.c
@@ -3184,6 +3184,7 @@ void force_treeallocate(int maxnodes, int maxpart)
   FLOAT u;
   int nA, nB;
   FLOAT Z;
+  double temp[NTAB], tempI[NTAB];
 
 #ifdef NGRAVS_TREEPM_XITION_CHECK  
   char buf[512];
@@ -3287,8 +3288,8 @@ void force_treeallocate(int maxnodes, int maxpart)
 	  i = performConvolution(ngravsPeriodicTable, 
 				 NormedGreensFxns[nB][nA], 
 				 Z,
-				 shortrange_fourier_pot[nB][nA], 
-				 shortrange_fourier_force[nB][nA]);
+				 temp, 
+				 tempI);
 	  if(i) {
 
 	    printf("ngravs: could not allocate memory for FFT on task %d.  Reduce OL and/or LEN and recompile.", ThisTask);
@@ -3334,15 +3335,22 @@ void force_treeallocate(int maxnodes, int maxpart)
 	    if(!ThisTask && !skipWrite)
 	      fprintf(fhand, "%.15e %.15e %.15e\n", 
 		      u,
-		      shortrange_fourier_pot[nB][nA][i],
-		      shortrange_fourier_force[nB][nA][i]);
+		      temp[i],
+		      tempI[i]);
 #endif
 	    // Divide by the appropriate values of u to save computation time in actual use
-	    shortrange_fourier_force[nB][nA][i] /= u*u;
-	    shortrange_fourier_pot[nB][nA][i] /= u;
+	    tempI[i] /= u*u;
+	    temp[i] /= u;
 
-	    // Precompute buddy!
-	    shortrange_fourier_force[nB][nA][i] -= shortrange_fourier_pot[nB][nA][i];
+	    // NOW Lose precision as we assign to the final table that needs to run fast in cache
+	    // Do potential first
+	    shortrange_fourier_pot[nB][nA][i] = temp[i];
+
+	    // Precompute buddy! (working in double)
+	    tempI[i] -= temp[i];
+
+	    // NOW lose precision as we assign to the final table for forces
+	    shortrange_fourier_force[nB][nA][i] = tempI[i];
 	  }
 
 #if defined NGRAVS_DEBUG_FORCETRACE && defined NGRAVS_TREEPM_XITION_CHECK	

diff --git a/ngravs.h b/ngravs.h
@@ -69,7 +69,7 @@ double yukawa_madelung(double ym);
 
 // Functions required for convolution
 int gadgetToFourier(int i, struct ngravsInterpolant *s);
-int performConvolution(struct ngravsInterpolant *s, gravity normKGreen, FLOAT Z, FLOAT *oRes, FLOAT *oResI);
+int performConvolution(struct ngravsInterpolant *s, gravity normKGreen, FLOAT Z, double *oRes, double *oResI);
 struct ngravsInterpolant *ngravsConvolutionInit(int ntab, int len, int ol);
 void ngravsConvolutionFree(struct ngravsInterpolant *s);
 FLOAT mTox(int m, struct ngravsInterpolant *s);

diff --git a/ngravs_core.c b/ngravs_core.c
@@ -75,7 +75,7 @@ FLOAT fourierIntegrand(FLOAT k, gravity normKGreen, FLOAT Z) {
   return (*normKGreen)(1, 1, k2, k, 1) * exp(-k2 * Z * Z);
 }
 
-int performConvolution(struct ngravsInterpolant *s, gravity normKGreen, FLOAT Z, FLOAT *oRes, FLOAT *oResI) {
+int performConvolution(struct ngravsInterpolant *s, gravity normKGreen, FLOAT Z, double *oRes, double *oResI) {
 
   fftw_complex *in, *out;
   int m,j;
@@ -128,6 +128,11 @@ int performConvolution(struct ngravsInterpolant *s, gravity normKGreen, FLOAT Z,
   // ???
   sum = s->ngravs_tpm_n;
 
+  // KC 2/8/16
+  // XXX
+  // Loss of precision here, we need to compute all of these 
+  // in terms of double, and assign to the final interpolation table
+  // whatever FLOAT happens to be...
   for(m = 0; m < s->ntab; ++m)
     oRes[m] = out[gadgetToFourier(m, s)].re * norm;
 
@@ -203,7 +208,6 @@ void init_grav_maps(void) {
 
   int i, j;
   int counts[N_GRAVS];
-  int n;
 
 #ifdef BAMTEST
   double q;

diff --git a/utilities/Configuration.tpmfp b/utilities/Configuration.tpmfp
@@ -106,12 +106,12 @@ GravityConstantInternal  0
 % That initial condition file uses only GADGET-2 types Halo (1) and Disk (2).  Here GADGET-2 types 
 % {0,1,3,4,5} are bound to the first gravitational interaction (0), which GADGET-2 type {2} is bound 
 % to the second gravitational interaction (1).  These interactions are defined in ngravs.c and ngravs.h
-% GravityGas 0
-% GravityHalo 0 
-% GravityDisk 1
-% GravityBulge 0
-% GravityStars 0
-% GravityBndry 0
+GravityGas 0
+GravityHalo 0 
+GravityDisk 1
+GravityBulge 0
+GravityStars 0
+GravityBndry 0
 
 % Softening lengths
 

diff --git a/utilities/tpmfp.py b/utilities/tpmfp.py
@@ -94,7 +94,7 @@ def __init__(self, ptype, ts, crap3, x, y, z, fx, fy, fz, jx, jy, jz, cfx, cfy,
 
                 body.append(pos)
 
-            print "Placed %d particles at radius %f\n" % (k, r)
+            #print "Placed %d particles at radius %f\n" % (k, r)
         # Now we manually create the stub file and write it out
         stub = open("%s/stub" % stash, 'w')
         stub.write("# g2munge: %d\n\n# Group 0: 0\n\n# Group 1: %d\n" % (len(body)+1, len(body)))