Merge pull request #195 from nlesc-dirac/fixes

Diffuse shapelet sky models in calibration
nlesc-dirac · Apr 12, 2024 · 9e85e86 · 9e85e86
2 parents 410e0f7 + 4894ab2
commit 9e85e86
Show file tree

Hide file tree

Showing 15 changed files with 535 additions and 135 deletions.
diff --git a/README.md b/README.md
@@ -200,6 +200,9 @@ Spatial regularization (with distributed multi-directional calibration) enables
 
 After each solution, images showing the spatial model (amplitude) will be created as ```.PPM``` files.
 
+#### 5a) Diffuse sky models in calibration
+When a spatial model is enabled, it is also possible to apply the spatial model onto a model of a diffuse sky background. The diffuse sky model can have any number of shapelet components. They all should belong to one cluster (say cluster *45*). With ```-D``` option, it is possible to enable the use of diffuse sky model with the spatial model, like ```-D 45,0.1```, where *0.1* is the regularization factor used while applying the spatial model. The larger this regularization is, the more strongly the spatial model will be enforced onto the diffuse sky model.
+
 ### 6) Solution format
 All SAGECal solutions are stored as text files. Lines starting with '#' are comments.
 The first non-comment line includes some general information, i.e.

diff --git a/src/MPI/main.cpp b/src/MPI/main.cpp
@@ -34,7 +34,7 @@ using namespace Data;
 
 void
 print_copyright(void) {
-  cout<<"SAGECal-MPI 0.8.2 (C) 2011-2024 Sarod Yatawatta"<<endl;
+  cout<<"SAGECal-MPI 0.8.3 (C) 2011-2024 Sarod Yatawatta"<<endl;
 }
 
 

diff --git a/src/MPI/sagecal_slave.cpp b/src/MPI/sagecal_slave.cpp
@@ -230,6 +230,20 @@ cerr<<"Error: Worker "<<myrank<<": Recheck your allocation or reduce number of w
       }
      }
 
+#ifdef DEBUG
+    /* open text files for each MS, each line re,im XX,XY,YX,YY */
+    vector<FILE *> debug_vec(mymscount);
+    if (Data::spatialreg && sp_diffuse_id>=0) {
+         for(int cm=0; cm<mymscount; cm++) {
+           string filebuff=std::string(myms[cm])+std::string(".coh.txt\0");
+           if ((debug_vec[cm]=fopen(filebuff.c_str(),"w+"))==0) {
+             fprintf(stderr,"%s: %d: no file\n",__FILE__,__LINE__);
+             exit(1);
+            }
+         }
+    }
+#endif /* DEBUG */
+
     vector<double *> p_vec(mymscount);
     vector<double **> pm_vec(mymscount);
 
@@ -657,7 +671,28 @@ cout<<myrank<<" : "<<cm<<": downweight ratio ("<<iodata_vec[cm].fratio<<") based
 
         /* Re-calculate model for cluster id 'sp_diffuse_id' */
         for(int cm=0; cm<mymscount; cm++) {
-          recalculate_diffuse_coherencies(iodata_vec[cm].u,iodata_vec[cm].v,iodata_vec[cm].w,coh_vec[cm],iodata_vec[cm].N,iodata_vec[cm].Nbase*iodata_vec[cm].tilesz,barr_vec[cm],carr_vec[cm],M,iodata_vec[cm].freq0,iodata_vec[cm].deltaf,iodata_vec[cm].deltat,iodata_vec[cm].dec0,Data::min_uvcut,Data::max_uvcut,sp_diffuse_id,sh_n0,sh_beta,&Zb[cm*4*iodata_vec[0].N*G],Data::Nt);
+#ifdef HAVE_CUDA
+     if (GPUpredict) {
+          recalculate_diffuse_coherencies(iodata_vec[cm].u,iodata_vec[cm].v,iodata_vec[cm].w,coh_vec[cm],iodata_vec[cm].N,iodata_vec[cm].Nbase*iodata_vec[cm].tilesz,barr_vec[cm],carr_vec[cm],M,iodata_vec[cm].freq0,iodata_vec[cm].deltaf,iodata_vec[cm].deltat,iodata_vec[cm].dec0,Data::min_uvcut,Data::max_uvcut,sp_diffuse_id,sh_n0,sh_beta,&Zb[cm*4*iodata_vec[0].N*G],Data::Nt,1);
+     } else {
+          recalculate_diffuse_coherencies(iodata_vec[cm].u,iodata_vec[cm].v,iodata_vec[cm].w,coh_vec[cm],iodata_vec[cm].N,iodata_vec[cm].Nbase*iodata_vec[cm].tilesz,barr_vec[cm],carr_vec[cm],M,iodata_vec[cm].freq0,iodata_vec[cm].deltaf,iodata_vec[cm].deltat,iodata_vec[cm].dec0,Data::min_uvcut,Data::max_uvcut,sp_diffuse_id,sh_n0,sh_beta,&Zb[cm*4*iodata_vec[0].N*G],Data::Nt,0);
+     }
+#else 
+          recalculate_diffuse_coherencies(iodata_vec[cm].u,iodata_vec[cm].v,iodata_vec[cm].w,coh_vec[cm],iodata_vec[cm].N,iodata_vec[cm].Nbase*iodata_vec[cm].tilesz,barr_vec[cm],carr_vec[cm],M,iodata_vec[cm].freq0,iodata_vec[cm].deltaf,iodata_vec[cm].deltat,iodata_vec[cm].dec0,Data::min_uvcut,Data::max_uvcut,sp_diffuse_id,sh_n0,sh_beta,&Zb[cm*4*iodata_vec[0].N*G],Data::Nt,0);
+#endif /* HAVE_CUDA */
+
+#ifdef DEBUG
+          /* save calculated coherencies in text file, re,im XX,XY,YX,YY,
+           * note that coherencies need to be multiplied by the solutions to make sense */
+          if (admm>=Nadmm-Data::admm_cadence) {
+          for (int nb=0; nb<iodata_vec[cm].Nbase*iodata_vec[cm].tilesz; nb++) {
+            fprintf(debug_vec[cm],"%e %e %e %e %e %e %e %e\n",creal(coh_vec[cm][4*M*nb+4*sp_diffuse_id]),cimag(coh_vec[cm][4*M*nb+4*sp_diffuse_id]),
+            creal(coh_vec[cm][4*M*nb+4*sp_diffuse_id+1]),cimag(coh_vec[cm][4*M*nb+4*sp_diffuse_id+1]),
+            creal(coh_vec[cm][4*M*nb+4*sp_diffuse_id+2]),cimag(coh_vec[cm][4*M*nb+4*sp_diffuse_id+2]),
+            creal(coh_vec[cm][4*M*nb+4*sp_diffuse_id+3]),cimag(coh_vec[cm][4*M*nb+4*sp_diffuse_id+3]));
+          }
+          }
+#endif /* DEBUG */
         }
       }
       /************************************************************************/
@@ -1106,6 +1141,13 @@ cout<<myrank<<" : "<<cm<<": downweight ratio ("<<iodata_vec[cm].fratio<<") based
       free(Zspat);
       free(Zb);
       free(B);
+
+#ifdef DEBUG
+      /* close files */
+      for(int cm=0; cm<mymscount; cm++) {
+         fclose(debug_vec[cm]);
+      }
+#endif /* DEBUG */
   }
   /**********************************************************/
 

diff --git a/src/MS/main.cpp b/src/MS/main.cpp
@@ -35,7 +35,7 @@ using namespace Data;
 
 void
 print_copyright(void) {
-  cout<<"SAGECal 0.8.2 (C) 2011-2024 Sarod Yatawatta"<<endl;
+  cout<<"SAGECal 0.8.3 (C) 2011-2024 Sarod Yatawatta"<<endl;
 }
 
 

diff --git a/src/buildsky/annotate.py b/src/buildsky/annotate.py
@@ -193,7 +193,7 @@ def annotate_lsm_sky(infilename,clusterfilename,outfilename,clid=None,color='yel
     v=pp.search(eachline)
     if v!= None:
        # iterate over list of source names (names can also have a '.')
-       CL[str(v.group('col1'))]=re.split('[^a-zA-Z0-9_\.]+',re.sub('\n','',str(v.group('col3'))))
+       CL[str(v.group('col1'))]=re.split(r'[^a-zA-Z0-9_\.]+',re.sub('\n','',str(v.group('col3'))))
 
   print('Read %d clusters'%len(CL))
 

diff --git a/src/lib/Dirac/Dirac_common.h b/src/lib/Dirac/Dirac_common.h
@@ -654,6 +654,8 @@ __attribute__ ((target(MIC)))
 #endif
 extern void
 my_dscal(int N, double a, double *x);
+extern void
+my_dscal_inc(int N, double a, double *x, int inc);
 #ifdef USE_MIC
 __attribute__ ((target(MIC)))
 #endif
@@ -673,6 +675,8 @@ __attribute__ ((target(MIC)))
 #endif
 extern double
 my_dnrm2(int N, double *x);
+extern double
+my_dnrm2_inc(int N, double *x, int inc);
 #ifdef USE_MIC
 __attribute__ ((target(MIC)))
 #endif

diff --git a/src/lib/Dirac/fista.c b/src/lib/Dirac/fista.c
@@ -21,6 +21,8 @@
 #include <string.h>
 #include "Dirac.h"
 
+#define FISTA_L_MIN 1e2
+#define FISTA_L_MAX 1e7
 
 /* 
  * Z = arg min \| Z_k - Z Phi_k\|^2 + \lambda \|Z\|^2 + \mu \|Z\|_1
@@ -42,6 +44,11 @@ update_spatialreg_fista(complex double *Z, complex double *Zbar, complex double
   complex double *Zold,*Y;
   /* Lipschitz constant of gradient, use ||Phikk||^2 as estimate */
   double L=my_cdot(2*G*2*G,Phikk,Phikk);
+  /* if 1/L too large, might diverge, so catch it */
+  if (L<FISTA_L_MIN) { L=FISTA_L_MIN; }
+  /* if 1/L too small, will give zero solution, so catch it */
+  if (L>FISTA_L_MAX) { L=FISTA_L_MAX; }
+
   /* intial t */
   double t=1.0;
   if ((gradf=(complex double*)calloc((size_t)2*Npoly*N*2*G,sizeof(complex double)))==0) {
@@ -75,7 +82,7 @@ update_spatialreg_fista(complex double *Z, complex double *Zbar, complex double
     /* take gradient descent step Y - 1/L gradf */
     my_caxpy(2*Npoly*N*2*G, gradf, -1.0/L, Y);
     /* soft threshold and update Z */
-    double thresh=t*mu;
+    double thresh=mu/L;
     for (int ci=0; ci<2*Npoly*N*2*G; ci++) {
        double r=creal(Y[ci]);
        double r1=fabs(r)-thresh; 
@@ -90,12 +97,13 @@ update_spatialreg_fista(complex double *Z, complex double *Zbar, complex double
     }
     double t0=t;
     t=(1.0+sqrt(1.0+4.0*t*t))*0.5;
-    /* update Y = Z + (t-1)/told (Z-Zold) = (1+(t-1)/told) Z - (t-1)/told Zold */
+    /* Zold <= Zold-Z */
+    my_caxpy(2*Npoly*N*2*G, Z, -1.0, Zold);
+    printf("FISTA %d ||grad||=%lf ||Z-Zold||=%lf\n",it,my_dnrm2(2*2*Npoly*N*2*G,(double*)gradf),my_dnrm2(2*2*Npoly*N*2*G,(double*)Zold)/my_dnrm2(2*2*Npoly*N*2*G,(double*)Z));
+    /* update Y = Z + (told-1)/t(Z-Zold) */
     memcpy(Y,Z,2*Npoly*N*2*G*sizeof(complex double));
-    double scalefac=(t-1.0)/t0;
-    my_cscal(2*Npoly*N*2*G,1.0+scalefac,Y);
+    double scalefac=(t0-1.0)/t;
     my_caxpy(2*Npoly*N*2*G, Zold, -scalefac, Y);
-    //printf("%lf %lf %lf %lf %lf\n",t,creal(Y[10]),cimag(Y[10]),creal(Z[10]),cimag(Z[10]));
   }
 
   free(gradf);
@@ -130,6 +138,11 @@ update_spatialreg_fista_with_diffconstraint(complex double *Z, complex double *Z
   complex double *Zold,*Y;
   /* Lipschitz constant of gradient, use ||Phikk||^2 as estimate */
   double L=my_cdot(2*G*2*G,Phikk,Phikk);
+  /* if 1/L too large, might diverge, so catch it */
+  if (L<FISTA_L_MIN) { L=FISTA_L_MIN; }
+  /* if 1/L too small, will give zero solution, so catch it */
+  if (L>FISTA_L_MAX) { L=FISTA_L_MAX; }
+
   /* intial t */
   double t=1.0;
   if ((gradf=(complex double*)calloc((size_t)2*Npoly*N*2*G,sizeof(complex double)))==0) {
@@ -173,7 +186,7 @@ update_spatialreg_fista_with_diffconstraint(complex double *Z, complex double *Z
     /* take gradient descent step Y - 1/L gradf */
     my_caxpy(2*Npoly*N*2*G, gradf, -1.0/L, Y);
     /* soft threshold and update Z */
-    double thresh=t*mu;
+    double thresh=mu/L;
     for (int ci=0; ci<2*Npoly*N*2*G; ci++) {
        double r=creal(Y[ci]);
        double r1=fabs(r)-thresh; 
@@ -188,12 +201,13 @@ update_spatialreg_fista_with_diffconstraint(complex double *Z, complex double *Z
     }
     double t0=t;
     t=(1.0+sqrt(1.0+4.0*t*t))*0.5;
-    /* update Y = Z + (t-1)/told (Z-Zold) = (1+(t-1)/told) Z - (t-1)/told Zold */
+    /* Zold=Z-Zold */
+    my_caxpy(2*Npoly*N*2*G, Z, -1.0, Zold);
+    printf("FISTA %d ||grad||=%lf ||Z-Zold||=%lf\n",it,my_dnrm2(2*2*Npoly*N*2*G,(double*)gradf),my_dnrm2(2*2*Npoly*N*2*G,(double*)Zold)/my_dnrm2(2*2*Npoly*N*2*G,(double*)Z));
+    /* update Y = Z + (told-1)/t(Z-Zold) */
     memcpy(Y,Z,2*Npoly*N*2*G*sizeof(complex double));
-    double scalefac=(t-1.0)/t0;
-    my_cscal(2*Npoly*N*2*G,1.0+scalefac,Y);
+    double scalefac=(t0-1.0)/t;
     my_caxpy(2*Npoly*N*2*G, Zold, -scalefac, Y);
-    //printf("%lf %lf %lf %lf %lf\n",t,creal(Y[10]),cimag(Y[10]),creal(Z[10]),cimag(Z[10]));
   }
 
   free(gradf);

diff --git a/src/lib/Dirac/myblas.c b/src/lib/Dirac/myblas.c
@@ -61,6 +61,11 @@ __attribute__ ((target(MIC)))
   dscal_(&N,&a,x,&i);
 }
 void
+my_dscal_inc(int N, double a, double *x, int inc) {
+  extern void dscal_(int *N, double *alpha, double *x, int *incx);
+  dscal_(&N,&a,x,&inc);
+}
+void
 my_sscal(int N, float a, float *x) {
 #ifdef USE_MIC
 __attribute__ ((target(MIC)))
@@ -91,6 +96,14 @@ __attribute__ ((target(MIC)))
   int i=1;
   return(dnrm2_(&N,x,&i));
 }
+double
+my_dnrm2_inc(int N, double *x, int inc) {
+#ifdef USE_MIC
+__attribute__ ((target(MIC)))
+#endif
+  extern double  dnrm2_(int *N, double *x, int *incx);
+  return(dnrm2_(&N,x,&inc));
+}
 float
 my_fnrm2(int N, float *x) {
 #ifdef USE_MIC

diff --git a/src/lib/Radio/Dirac_radio.h b/src/lib/Radio/Dirac_radio.h
@@ -223,9 +223,10 @@ precalculate_coherencies_multifreq(double *u, double *v, double *w, complex doub
 
 
 /****************************** diffuse_predict.c ****************************/
+/* have_cuda: if 1, use GPU version, else only CPU version */
 extern int
 recalculate_diffuse_coherencies(double *u, double *v, double *w, complex double *x, int N,
-   int Nbase, baseline_t *barr,  clus_source_t *carr, int M, double freq0, double fdelta, double tdelta, double dec0, double uvmin, double uvmax, int diffuse_cluster, int sh_n0, double sh_beta, complex double *Z, int Nt);
+   int Nbase, baseline_t *barr,  clus_source_t *carr, int M, double freq0, double fdelta, double tdelta, double dec0, double uvmin, double uvmax, int diffuse_cluster, int sh_n0, double sh_beta, complex double *Z, int Nt, int use_cuda);
 /****************************** transforms.c ****************************/
 #ifndef ASEC2RAD
 #define ASEC2RAD 4.848136811095359935899141e-6
@@ -477,6 +478,11 @@ precess_source_locations_deprecated(double jd_tdb, clus_source_t *carr, int M, d
 
 /****************************** predict_withbeam_cuda.c ****************************/
 #ifdef HAVE_CUDA
+/* copy Nx1 double array x to device as float
+   first allocate device memory (need to be freed later) */
+extern void
+dtofcopy(int N, float **x_d, double *x);
+
 /* if dobeam==0, beam calculation is off
    else, flag to determine if full (element+array), array only, or element only beam is calculated
  */
@@ -544,6 +550,9 @@ cudakernel_correct_residuals(int B, int N, int Nb, int boff, int F, int nchunk,
 
 extern void
 cudakernel_convert_time(int T, double *time_utc);
+
+extern void
+cudakernel_calculate_shapelet_coherencies(float u, float v, float *modes, float *fact, int n0, float beta, double *coh);
 #endif /* !HAVE_CUDA */
-Original file line number
+Diff line change
@@ Expand Up / @@ -34,7 +34,7 @@ using namespace Data; @@
     void
     print_copyright(void) {
-      cout<<"SAGECal-MPI 0.8.2 (C) 2011-2024 Sarod Yatawatta"<<endl;
+      cout<<"SAGECal-MPI 0.8.3 (C) 2011-2024 Sarod Yatawatta"<<endl;
     }
@@ Expand Down @@