From 06d1f31ebd44da1b9904e4783cb02cd45000d3fe Mon Sep 17 00:00:00 2001
From: Manodeep Sinha <manodeep@gmail.com>
Date: Sat, 6 Feb 2016 07:55:05 +1100
Subject: [PATCH] Enabled USE_AVX in the theory.options and common.options
 files. However, protected the user in case the compiler doesn't have AVX
 capability. I think if the compiler has AVX but the cpu doesn't, then the
 code will compile fine but crash at runtime

---
 mocks.options                            |  2 +-
 theory.options                           |  2 +-
 xi_mocks/DDrppi/DDrppi_mocks.c           |  2 +-
 xi_mocks/DDrppi/countpairs_rp_pi_mocks.c |  6 +++---
 xi_mocks/tests/tests_mocks.c             |  2 +-
 xi_mocks/vpf/countspheres_mocks.c        | 12 ++++++------
 xi_mocks/wtheta/DDtheta_mocks.c          |  2 +-
 xi_mocks/wtheta/countpairs_theta_mocks.c |  6 +++---
 xi_theory/vpf/countspheres.c             |  6 +++---
 xi_theory/vpf/vpf.c                      |  2 +-
 xi_theory/wp/countpairs_wp.c             |  6 +++---
 xi_theory/wp/wp.c                        |  2 +-
 xi_theory/xi/countpairs_xi.c             |  6 +++---
 xi_theory/xi/xi.c                        |  2 +-
 xi_theory/xi_of_r/DD.c                   |  2 +-
 xi_theory/xi_of_r/countpairs.c           |  6 +++---
 xi_theory/xi_rp_pi/DDrppi.c              |  2 +-
 xi_theory/xi_rp_pi/countpairs_rp_pi.c    |  6 +++---
 18 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/mocks.options b/mocks.options
index c0c310aa..c43387f0 100644
--- a/mocks.options
+++ b/mocks.options
@@ -8,6 +8,6 @@ OPT += -DLINK_IN_RA
 
 #### Code specs for both data Correlation Functions
 OPT += -DDOUBLE_PREC
-#OPT += -DUSE_AVX
+OPT += -DUSE_AVX
 OPT += -DUSE_OMP
 #OPT += -DFAST_DIVIDE ##replaces divide in DDrppi with approximate divides. If you really must get that extra ~20% performance boost
diff --git a/theory.options b/theory.options
index 9a8da845..107c86a9 100644
--- a/theory.options
+++ b/theory.options
@@ -4,7 +4,7 @@ OPT = -DPERIODIC
 
 #### Code specs for both theory and data Correlation Functions
 #OPT += -DDOUBLE_PREC
-#OPT += -DUSE_AVX
+OPT += -DUSE_AVX
 OPT += -DUSE_OMP
 
 
diff --git a/xi_mocks/DDrppi/DDrppi_mocks.c b/xi_mocks/DDrppi/DDrppi_mocks.c
index 0ff94e66..155f5a6e 100644
--- a/xi_mocks/DDrppi/DDrppi_mocks.c
+++ b/xi_mocks/DDrppi/DDrppi_mocks.c
@@ -202,7 +202,7 @@ void Printhelp(void)
   fprintf(stderr,"Precision = float\n");
 #endif
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   fprintf(stderr,"Use AVX = True\n");
 #else   
   fprintf(stderr,"Use AVX = False\n");
diff --git a/xi_mocks/DDrppi/countpairs_rp_pi_mocks.c b/xi_mocks/DDrppi/countpairs_rp_pi_mocks.c
index cfd47f93..efb9059a 100644
--- a/xi_mocks/DDrppi/countpairs_rp_pi_mocks.c
+++ b/xi_mocks/DDrppi/countpairs_rp_pi_mocks.c
@@ -21,7 +21,7 @@
 #include "cosmology_params.h"
 #include "set_cosmo_dist.h"
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 #include "avx_calls.h"
 #endif
 
@@ -241,7 +241,7 @@ results_countpairs_mocks * countpairs_mocks(const int64_t ND1, DOUBLE *phi1, DOU
     rupp_sqr[i] = rupp[i]*rupp[i];
 	}	
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   AVX_FLOATS m_rupp_sqr[nrpbin];
 	AVX_FLOATS m_kbin[nrpbin];
   for(int i=0;i<nrpbin;i++) {
@@ -413,7 +413,7 @@ results_countpairs_mocks * countpairs_mocks(const int64_t ND1, DOUBLE *phi1, DOU
 /* 					const DOUBLE TWO=2.0; */
 /* 					const DOUBLE sqr_d1 = d1[i]*d1[i]; */
 		  
-#ifndef USE_AVX
+#if !(defined(USE_AVX) && defined(__AVX__))
 
 					DOUBLE *localx2  = x2;
 					DOUBLE *localy2  = y2;
diff --git a/xi_mocks/tests/tests_mocks.c b/xi_mocks/tests/tests_mocks.c
index 3e6e9792..1189dede 100644
--- a/xi_mocks/tests/tests_mocks.c
+++ b/xi_mocks/tests/tests_mocks.c
@@ -33,7 +33,7 @@
 #endif
 
 #if !(defined(__INTEL_COMPILER)) && defined(USE_AVX)
-#warning Test suite for mocks will be slow without Intel ICC while USE_AVX is set. 
+#warning Test suite for mocks are faster with Intel compiler, icc, AVX libraries. 
 #endif
 
 #ifndef SILENT
diff --git a/xi_mocks/vpf/countspheres_mocks.c b/xi_mocks/vpf/countspheres_mocks.c
index 7b438096..a373a58a 100644
--- a/xi_mocks/vpf/countspheres_mocks.c
+++ b/xi_mocks/vpf/countspheres_mocks.c
@@ -19,7 +19,7 @@
 #include "set_cosmo_dist.h"//cosmological distance calculations
 #include "cosmology_params.h"//init_cosmology 
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 #include "avx_calls.h"
 #endif
 
@@ -256,7 +256,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
 	const DOUBLE rstep = rmax/(DOUBLE)nbin ;
 	const DOUBLE inv_rstep = ((DOUBLE) 1.0)/rstep;
 	
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   AVX_FLOATS m_rupp_sqr[nbin];
   AVX_FLOATS m_rmax_sqr = AVX_SET_FLOAT(rmax_sqr);
 	for(int k=0;k<nbin;k++) {
@@ -316,7 +316,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
       const int max_ix = ix + bin_refine_factor > ngrid-1 ? ngrid-1:ix + bin_refine_factor;
       for(int iix=min_ix;iix<=max_ix;iix++) {
 				const DOUBLE newxpos = xcen;
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 				const AVX_FLOATS m_newxpos = AVX_SET_FLOAT(newxpos);
 #endif	
 	
@@ -325,7 +325,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
 
 				for(int iiy=min_iy;iiy<=max_iy;iiy++) {
 					const DOUBLE newypos = ycen;
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 					const AVX_FLOATS m_newypos = AVX_SET_FLOAT(newypos);
 #endif	
 
@@ -334,7 +334,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
 
 					for(int iiz=min_iz;iiz<=max_iz;iiz++) {
 						const DOUBLE newzpos = zcen;
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 						const AVX_FLOATS m_newzpos = AVX_SET_FLOAT(newzpos);
 #endif	
 						const int index=iix*ngrid*ngrid + iiy*ngrid + iiz;
@@ -344,7 +344,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
 						DOUBLE *z2 = cellstruct->pos + 2*NVEC;
 						int ipart;
 						for(ipart=0;ipart<=(cellstruct->nelements-NVEC);ipart+=NVEC) {
-#ifndef USE_AVX
+#if !(defined(USE_AVX) && defined(__AVX__))
 							int ibin[NVEC];
 #if  __INTEL_COMPILER
 #pragma simd vectorlengthfor(DOUBLE)
diff --git a/xi_mocks/wtheta/DDtheta_mocks.c b/xi_mocks/wtheta/DDtheta_mocks.c
index 6c626a40..f9a6ae03 100644
--- a/xi_mocks/wtheta/DDtheta_mocks.c
+++ b/xi_mocks/wtheta/DDtheta_mocks.c
@@ -187,7 +187,7 @@ void Printhelp(void)
   fprintf(stderr,"Precision = float\n");
 #endif
         
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   fprintf(stderr,"Use AVX = True\n");
 #else   
   fprintf(stderr,"Use AVX = False\n");
diff --git a/xi_mocks/wtheta/countpairs_theta_mocks.c b/xi_mocks/wtheta/countpairs_theta_mocks.c
index 33bfbcb1..7ba83c10 100644
--- a/xi_mocks/wtheta/countpairs_theta_mocks.c
+++ b/xi_mocks/wtheta/countpairs_theta_mocks.c
@@ -30,7 +30,7 @@
 #include "progressbar.h" //for the progressbar
 
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 #include "avx_calls.h"
 #endif
 
@@ -181,7 +181,7 @@ results_countpairs_theta * countpairs_theta_mocks(const int64_t ND1, DOUBLE *phi
 #endif
 #endif
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   AVX_FLOATS m_costheta_upp[nthetabin] ;
   for(int i=0;i<nthetabin;i++) {  
     /* fprintf(stderr," i = %d theta_upp[i-1] = %lf cos(theta_upp[i-1] = %lf cos(theta_upp[i]) = %lf \n",i, theta_upp[i-1],COSD(theta_upp[i-1]),COSD(theta_upp[i])); */
@@ -399,7 +399,7 @@ results_countpairs_theta * countpairs_theta_mocks(const int64_t ND1, DOUBLE *phi
 					/*---Loop-over-Data2-particles--------------------*/
 					int j;
 					for(j=0;j <=(Nloop-NVEC);j+=NVEC) {
-#ifndef USE_AVX
+#if !(defined(USE_AVX) && defined(__AVX__))
 						DOUBLE costheta[NVEC];
 						int thetabin[NVEC];
 #ifdef OUTPUT_THETAAVG
diff --git a/xi_theory/vpf/countspheres.c b/xi_theory/vpf/countspheres.c
index 6741244a..9640fbf7 100644
--- a/xi_theory/vpf/countspheres.c
+++ b/xi_theory/vpf/countspheres.c
@@ -18,7 +18,7 @@
 #include "utils.h" //all of the utilities
 #include "progressbar.h" //for the progressbar
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 #include "avx_calls.h"
 #endif
 
@@ -65,7 +65,7 @@ results_countspheres * countspheres(const int64_t np, const DOUBLE * restrict X,
   const DOUBLE inv_rstep = ((DOUBLE) 1.0)/rstep;
   /* const DOUBLE inv_rcube = ((DOUBLE) 1.0)/rcube; */
   const DOUBLE rmax_sqr = rmax*rmax;
-#ifdef USE_AVX  
+#if defined(USE_AVX) && defined(__AVX__)  
   AVX_FLOATS m_rmax_sqr = AVX_SET_FLOAT(rmax_sqr);
   AVX_FLOATS m_rupp_sqr[nbin];
   for(int k=0;k<nbin;k++) {
@@ -200,7 +200,7 @@ results_countspheres * countspheres(const int64_t np, const DOUBLE * restrict X,
 					DOUBLE *x2 = first->pos;
 					DOUBLE *y2 = first->pos + NVEC;
 					DOUBLE *z2 = first->pos + 2*NVEC;
-#ifndef USE_AVX
+#if !(defined(USE_AVX) && defined(__AVX__))
 
 					for(int64_t j=0;j<first->nelements;j+=NVEC) {
 						int block_size=first->nelements - j;
diff --git a/xi_theory/vpf/vpf.c b/xi_theory/vpf/vpf.c
index 58a64870..f25a75e0 100644
--- a/xi_theory/vpf/vpf.c
+++ b/xi_theory/vpf/vpf.c
@@ -152,7 +152,7 @@ void Printhelp(void)
 	fprintf(stderr,"Precision = float\n");
 #endif
 	
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 	fprintf(stderr,"Use AVX = True\n");
 #else
 	fprintf(stderr,"Use AVX = False\n");
diff --git a/xi_theory/wp/countpairs_wp.c b/xi_theory/wp/countpairs_wp.c
index 74c05e0c..fb3bcdc7 100644
--- a/xi_theory/wp/countpairs_wp.c
+++ b/xi_theory/wp/countpairs_wp.c
@@ -23,7 +23,7 @@
 
 #include "sglib.h"
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 #include "avx_calls.h"
 #endif
 
@@ -136,7 +136,7 @@ results_countpairs_wp *countpairs_wp(const int64_t ND1, DOUBLE * restrict X1, DO
 	}
 
 	
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   AVX_FLOATS m_rupp_sqr[nbin];
 	for(int i=0;i<nbin;i++) {
     m_rupp_sqr[i] = AVX_SET_FLOAT(rupp_sqr[i]);
@@ -270,7 +270,7 @@ results_countpairs_wp *countpairs_wp(const int64_t ND1, DOUBLE * restrict X1, DO
 								const DOUBLE y1pos = y1[ii] + off_ywrap;
 								const DOUBLE z1pos = z1[ii] + off_zwrap;
 
-#ifndef USE_AVX							
+#if !(defined(USE_AVX) && defined(__AVX__))							
 								DOUBLE *localx2 = x2;
 								DOUBLE *localy2 = y2;
 								DOUBLE *localz2 = z2;
diff --git a/xi_theory/wp/wp.c b/xi_theory/wp/wp.c
index 506d7536..30164f69 100644
--- a/xi_theory/wp/wp.c
+++ b/xi_theory/wp/wp.c
@@ -197,7 +197,7 @@ void Printhelp(void)
 	fprintf(stderr,"Precision = float\n");
 #endif
 	
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 	fprintf(stderr,"Use AVX = True\n");
 #else	
 	fprintf(stderr,"Use AVX = False\n");
diff --git a/xi_theory/xi/countpairs_xi.c b/xi_theory/xi/countpairs_xi.c
index 183ea65d..2732aaf0 100644
--- a/xi_theory/xi/countpairs_xi.c
+++ b/xi_theory/xi/countpairs_xi.c
@@ -18,7 +18,7 @@
 #include "progressbar.h" //for the progressbar
 #include "sglib.h"
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 #include "avx_calls.h"
 #endif
 
@@ -144,7 +144,7 @@ results_countpairs_xi *countpairs_xi(const int64_t ND1, DOUBLE * restrict X1, DO
 	const DOUBLE sqr_rpmax=rupp_sqr[nrpbin-1];
   const DOUBLE sqr_rpmin=rupp_sqr[0];
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   AVX_FLOATS m_rupp_sqr[nrpbin];
   for(int i=0;i<nrpbin;i++) {
     m_rupp_sqr[i] = AVX_SET_FLOAT(rupp_sqr[i]);
@@ -247,7 +247,7 @@ results_countpairs_xi *countpairs_xi(const int64_t ND1, DOUBLE * restrict X1, DO
 								const DOUBLE y1pos=y1[ii] + off_ywrap;;
 								const DOUBLE z1pos=z1[ii] + off_zwrap;
 					  
-#ifndef USE_AVX
+#if !(defined(USE_AVX) && defined(__AVX__))
 								DOUBLE *localx2 = x2;
 								DOUBLE *localy2 = y2;
 								DOUBLE *localz2 = z2;
diff --git a/xi_theory/xi/xi.c b/xi_theory/xi/xi.c
index 53f265d1..530483d5 100644
--- a/xi_theory/xi/xi.c
+++ b/xi_theory/xi/xi.c
@@ -184,7 +184,7 @@ void Printhelp(void)
 	fprintf(stderr,"Precision = float\n");
 #endif
 	
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 	fprintf(stderr,"Use AVX = True\n");
 #else	
 	fprintf(stderr,"Use AVX = False\n");
diff --git a/xi_theory/xi_of_r/DD.c b/xi_theory/xi_of_r/DD.c
index 1d349d3f..c409d8e0 100644
--- a/xi_theory/xi_of_r/DD.c
+++ b/xi_theory/xi_of_r/DD.c
@@ -187,7 +187,7 @@ void Printhelp(void)
 	fprintf(stderr,"Precision = float\n");
 #endif
 	
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 	fprintf(stderr,"Use AVX = True\n");
 #else	
 	fprintf(stderr,"Use AVX = False\n");
diff --git a/xi_theory/xi_of_r/countpairs.c b/xi_theory/xi_of_r/countpairs.c
index fffcbd1f..e53a0566 100644
--- a/xi_theory/xi_of_r/countpairs.c
+++ b/xi_theory/xi_of_r/countpairs.c
@@ -16,7 +16,7 @@
 #include "utils.h" //all of the utilities
 #include "progressbar.h" //for the progressbar
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 #include "avx_calls.h"
 #endif
 
@@ -165,7 +165,7 @@ results_countpairs * countpairs(const int64_t ND1, const DOUBLE * const X1, cons
   DOUBLE sqr_rpmax=rupp_sqr[nrpbin-1];
   DOUBLE sqr_rpmin=rupp_sqr[0];
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   AVX_FLOATS m_rupp_sqr[nrpbin];
   for(int i=0;i<nrpbin;i++) {
     m_rupp_sqr[i] = AVX_SET_FLOAT(rupp_sqr[i]);
@@ -297,7 +297,7 @@ results_countpairs * countpairs(const int64_t ND1, const DOUBLE * const X1, cons
 #endif
 
 							
-#ifndef USE_AVX
+#if !(defined(USE_AVX) && defined(__AVX__))
 								
 								DOUBLE *localx2 = x2;
 								DOUBLE *localy2 = y2;
diff --git a/xi_theory/xi_rp_pi/DDrppi.c b/xi_theory/xi_rp_pi/DDrppi.c
index 7c910a86..8d6ba81c 100644
--- a/xi_theory/xi_rp_pi/DDrppi.c
+++ b/xi_theory/xi_rp_pi/DDrppi.c
@@ -220,7 +220,7 @@ void Printhelp(void)
 	fprintf(stderr,"Precision = float\n");
 #endif
 	
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 	fprintf(stderr,"Use AVX = True\n");
 #else	
 	fprintf(stderr,"Use AVX = False\n");
diff --git a/xi_theory/xi_rp_pi/countpairs_rp_pi.c b/xi_theory/xi_rp_pi/countpairs_rp_pi.c
index 6dcb8e0e..4d5c0b7b 100644
--- a/xi_theory/xi_rp_pi/countpairs_rp_pi.c
+++ b/xi_theory/xi_rp_pi/countpairs_rp_pi.c
@@ -16,7 +16,7 @@
 #include "utils.h" //all of the utilities
 #include "progressbar.h" //for the progressbar
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 #include "avx_calls.h"
 #endif
 
@@ -160,7 +160,7 @@ results_countpairs_rp_pi * countpairs_rp_pi(const int64_t ND1, const DOUBLE *X1,
 #endif
 
 	
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   AVX_FLOATS m_rupp_sqr[nrpbin];
   AVX_FLOATS m_kbin[nrpbin];
   for(int i=0;i<nrpbin;i++) {
@@ -286,7 +286,7 @@ results_countpairs_rp_pi * countpairs_rp_pi(const int64_t ND1, const DOUBLE *X1,
 #endif
 								
 								
-#ifndef USE_AVX	//Beginning of NO AVX section
+#if !(defined(USE_AVX) && defined(__AVX__))	//Beginning of NO AVX section
 								DOUBLE *localx2=x2;
 								DOUBLE *localy2=y2;
 								DOUBLE *localz2=z2;