From 06d1f31ebd44da1b9904e4783cb02cd45000d3fe Mon Sep 17 00:00:00 2001 From: Manodeep Sinha Date: Sat, 6 Feb 2016 07:55:05 +1100 Subject: [PATCH] Enabled USE_AVX in the theory.options and common.options files. However, protected the user in case the compiler doesn't have AVX capability. I think if the compiler has AVX but the cpu doesn't, then the code will compile fine but crash at runtime --- mocks.options | 2 +- theory.options | 2 +- xi_mocks/DDrppi/DDrppi_mocks.c | 2 +- xi_mocks/DDrppi/countpairs_rp_pi_mocks.c | 6 +++--- xi_mocks/tests/tests_mocks.c | 2 +- xi_mocks/vpf/countspheres_mocks.c | 12 ++++++------ xi_mocks/wtheta/DDtheta_mocks.c | 2 +- xi_mocks/wtheta/countpairs_theta_mocks.c | 6 +++--- xi_theory/vpf/countspheres.c | 6 +++--- xi_theory/vpf/vpf.c | 2 +- xi_theory/wp/countpairs_wp.c | 6 +++--- xi_theory/wp/wp.c | 2 +- xi_theory/xi/countpairs_xi.c | 6 +++--- xi_theory/xi/xi.c | 2 +- xi_theory/xi_of_r/DD.c | 2 +- xi_theory/xi_of_r/countpairs.c | 6 +++--- xi_theory/xi_rp_pi/DDrppi.c | 2 +- xi_theory/xi_rp_pi/countpairs_rp_pi.c | 6 +++--- 18 files changed, 37 insertions(+), 37 deletions(-) diff --git a/mocks.options b/mocks.options index c0c310aa..c43387f0 100644 --- a/mocks.options +++ b/mocks.options @@ -8,6 +8,6 @@ OPT += -DLINK_IN_RA #### Code specs for both data Correlation Functions OPT += -DDOUBLE_PREC -#OPT += -DUSE_AVX +OPT += -DUSE_AVX OPT += -DUSE_OMP #OPT += -DFAST_DIVIDE ##replaces divide in DDrppi with approximate divides. If you really must get that extra ~20% performance boost diff --git a/theory.options b/theory.options index 9a8da845..107c86a9 100644 --- a/theory.options +++ b/theory.options @@ -4,7 +4,7 @@ OPT = -DPERIODIC #### Code specs for both theory and data Correlation Functions #OPT += -DDOUBLE_PREC -#OPT += -DUSE_AVX +OPT += -DUSE_AVX OPT += -DUSE_OMP diff --git a/xi_mocks/DDrppi/DDrppi_mocks.c b/xi_mocks/DDrppi/DDrppi_mocks.c index 0ff94e66..155f5a6e 100644 --- a/xi_mocks/DDrppi/DDrppi_mocks.c +++ b/xi_mocks/DDrppi/DDrppi_mocks.c @@ -202,7 +202,7 @@ void Printhelp(void) fprintf(stderr,"Precision = float\n"); #endif -#ifdef USE_AVX +#if defined(USE_AVX) && defined(__AVX__) fprintf(stderr,"Use AVX = True\n"); #else fprintf(stderr,"Use AVX = False\n"); diff --git a/xi_mocks/DDrppi/countpairs_rp_pi_mocks.c b/xi_mocks/DDrppi/countpairs_rp_pi_mocks.c index cfd47f93..efb9059a 100644 --- a/xi_mocks/DDrppi/countpairs_rp_pi_mocks.c +++ b/xi_mocks/DDrppi/countpairs_rp_pi_mocks.c @@ -21,7 +21,7 @@ #include "cosmology_params.h" #include "set_cosmo_dist.h" -#ifdef USE_AVX +#if defined(USE_AVX) && defined(__AVX__) #include "avx_calls.h" #endif @@ -241,7 +241,7 @@ results_countpairs_mocks * countpairs_mocks(const int64_t ND1, DOUBLE *phi1, DOU rupp_sqr[i] = rupp[i]*rupp[i]; } -#ifdef USE_AVX +#if defined(USE_AVX) && defined(__AVX__) AVX_FLOATS m_rupp_sqr[nrpbin]; AVX_FLOATS m_kbin[nrpbin]; for(int i=0;i ngrid-1 ? ngrid-1:ix + bin_refine_factor; for(int iix=min_ix;iix<=max_ix;iix++) { const DOUBLE newxpos = xcen; -#ifdef USE_AVX +#if defined(USE_AVX) && defined(__AVX__) const AVX_FLOATS m_newxpos = AVX_SET_FLOAT(newxpos); #endif @@ -325,7 +325,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal for(int iiy=min_iy;iiy<=max_iy;iiy++) { const DOUBLE newypos = ycen; -#ifdef USE_AVX +#if defined(USE_AVX) && defined(__AVX__) const AVX_FLOATS m_newypos = AVX_SET_FLOAT(newypos); #endif @@ -334,7 +334,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal for(int iiz=min_iz;iiz<=max_iz;iiz++) { const DOUBLE newzpos = zcen; -#ifdef USE_AVX +#if defined(USE_AVX) && defined(__AVX__) const AVX_FLOATS m_newzpos = AVX_SET_FLOAT(newzpos); #endif const int index=iix*ngrid*ngrid + iiy*ngrid + iiz; @@ -344,7 +344,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal DOUBLE *z2 = cellstruct->pos + 2*NVEC; int ipart; for(ipart=0;ipart<=(cellstruct->nelements-NVEC);ipart+=NVEC) { -#ifndef USE_AVX +#if !(defined(USE_AVX) && defined(__AVX__)) int ibin[NVEC]; #if __INTEL_COMPILER #pragma simd vectorlengthfor(DOUBLE) diff --git a/xi_mocks/wtheta/DDtheta_mocks.c b/xi_mocks/wtheta/DDtheta_mocks.c index 6c626a40..f9a6ae03 100644 --- a/xi_mocks/wtheta/DDtheta_mocks.c +++ b/xi_mocks/wtheta/DDtheta_mocks.c @@ -187,7 +187,7 @@ void Printhelp(void) fprintf(stderr,"Precision = float\n"); #endif -#ifdef USE_AVX +#if defined(USE_AVX) && defined(__AVX__) fprintf(stderr,"Use AVX = True\n"); #else fprintf(stderr,"Use AVX = False\n"); diff --git a/xi_mocks/wtheta/countpairs_theta_mocks.c b/xi_mocks/wtheta/countpairs_theta_mocks.c index 33bfbcb1..7ba83c10 100644 --- a/xi_mocks/wtheta/countpairs_theta_mocks.c +++ b/xi_mocks/wtheta/countpairs_theta_mocks.c @@ -30,7 +30,7 @@ #include "progressbar.h" //for the progressbar -#ifdef USE_AVX +#if defined(USE_AVX) && defined(__AVX__) #include "avx_calls.h" #endif @@ -181,7 +181,7 @@ results_countpairs_theta * countpairs_theta_mocks(const int64_t ND1, DOUBLE *phi #endif #endif -#ifdef USE_AVX +#if defined(USE_AVX) && defined(__AVX__) AVX_FLOATS m_costheta_upp[nthetabin] ; for(int i=0;ipos; DOUBLE *y2 = first->pos + NVEC; DOUBLE *z2 = first->pos + 2*NVEC; -#ifndef USE_AVX +#if !(defined(USE_AVX) && defined(__AVX__)) for(int64_t j=0;jnelements;j+=NVEC) { int block_size=first->nelements - j; diff --git a/xi_theory/vpf/vpf.c b/xi_theory/vpf/vpf.c index 58a64870..f25a75e0 100644 --- a/xi_theory/vpf/vpf.c +++ b/xi_theory/vpf/vpf.c @@ -152,7 +152,7 @@ void Printhelp(void) fprintf(stderr,"Precision = float\n"); #endif -#ifdef USE_AVX +#if defined(USE_AVX) && defined(__AVX__) fprintf(stderr,"Use AVX = True\n"); #else fprintf(stderr,"Use AVX = False\n"); diff --git a/xi_theory/wp/countpairs_wp.c b/xi_theory/wp/countpairs_wp.c index 74c05e0c..fb3bcdc7 100644 --- a/xi_theory/wp/countpairs_wp.c +++ b/xi_theory/wp/countpairs_wp.c @@ -23,7 +23,7 @@ #include "sglib.h" -#ifdef USE_AVX +#if defined(USE_AVX) && defined(__AVX__) #include "avx_calls.h" #endif @@ -136,7 +136,7 @@ results_countpairs_wp *countpairs_wp(const int64_t ND1, DOUBLE * restrict X1, DO } -#ifdef USE_AVX +#if defined(USE_AVX) && defined(__AVX__) AVX_FLOATS m_rupp_sqr[nbin]; for(int i=0;i