Merge pull request #21 from manodeep/develop

Develop [ci skip]
manodeep · Feb 5, 2016 · 7e943b1 · 7e943b1
2 parents 544d986 + 06d1f31
commit 7e943b1
Show file tree

Hide file tree

Showing 27 changed files with 354 additions and 1,236,024 deletions.
diff --git a/Corrfunc/call_correlation_functions.py b/Corrfunc/call_correlation_functions.py
@@ -16,51 +16,15 @@
 import re
 import time
 import numpy as np
-try:
-    import pandas as pd
-except ImportError:
-    pd = None
-from Corrfunc import _countpairs, rd
-
+from Corrfunc import _countpairs, rd, utils
+from .utils import read_catalog
 
 def main():
     tstart=time.time()
-    file = os.path.join(os.path.dirname(os.path.abspath(__file__)),"../xi_theory/tests/data/","gals_Mr19.txt")
-    ## Figure out the datatype, use the header file in the include directory
-    ## because that is most likely correct (common.mk might have been modified
-    ## but not recompiled)
-    include_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                                "../include/", "countpairs.h")
-    try:
-        includes = rd(include_file)
-    except (IOError, OSError) as e:
-        print("ERROR: Could not find file {}.\nPlease compile the `Corrfunc' library directly before running python setup.py install".format(include_file))
-        raise
-    vector_type = re.search(r'(\w+)\s*\*\s*rupp\s*\;', includes, re.I).group(1)
-    allowed_types = {"float":np.float32,"double":np.float}
-    if vector_type not in list(allowed_types.keys()):
-        print("Error: Unknown precision={} found in header file {}. Allowed types are `{}'".format(vector_type,include_file,allowed_types))
-        sys.exit()
-
-    dtype = allowed_types[vector_type]
-
-    ### check if pandas is available - much faster to read in the data through pandas
-    t0=time.time()
-    print("Reading in the data...")
-    try:
-        if pd is not None:
-            df  = pd.read_csv(file,header=None,engine="c",dtype={"x":dtype,"y":dtype,"z":dtype},delim_whitespace=True)
-            x  = np.asarray(df[0],dtype=dtype)
-            y = np.asarray(df[1],dtype=dtype)
-            z  = np.asarray(df[2],dtype=dtype)
-        else:
-            x,y,z = np.genfromtxt(file,dtype=dtype,unpack=True)
-    except:
-        pass
-
+    t0 = tstart
+    x,y,z = read_catalog()
     t1=time.time()    
     print("Done reading the data - time taken = {0:10.1f} seconds.\nBeginning Correlation functions calculations".format(t1-t0))
-
     boxsize=420.0
     nthreads=4
     pimax=40.0

diff --git a/Corrfunc/utils.py b/Corrfunc/utils.py
@@ -1,9 +1,11 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
 import sys
+import os
 
-__all__ = ['rd']
+__all__ = ['rd','read_catalog']
 
 if sys.version_info[0] >= 3:
     def rd(filename):
@@ -23,3 +25,130 @@ def rd(filename):
             r = f.read()
 
         return r
+
+
+def read_catalog(filebase=None):
+    """
+    Reads a galaxy/randoms catalog.
+
+    :param filebase: (optional)
+        The fully qualified path to the file. If omitted, reads the
+        theory galaxy catalog under ../xi_theory/tests/data/
+
+    Returns:
+    * ``x y z`` - Unpacked numpy arrays compatible with the installed
+    version of ``Corrfunc``.
+
+    **Note** If the filename is omitted, then first the fast-food file
+    is searched for, and then the ascii file. End-users should always
+    supply the full filename.
+    """
+
+    import re
+    import numpy as np
+
+    def read_ascii(filename,return_dtype=None):
+        if return_dtype is None:
+            raise ValueError("Return data-type must be set and a valid numpy data-type")
+
+        ### check if pandas is available - much faster to read in the data through pandas
+        t0=time.time()
+        print("Reading in the data...")
+        try:
+            import pandas as pd
+            df = pd.read_csv(file,header=None,engine="c",dtype={"x":return_dtype,"y":return_dtype,"z":return_dtype},delim_whitespace=True)
+            x = np.asarray(df[0],dtype=return_dtype)
+            y = np.asarray(df[1],dtype=return_dtype)
+            z = np.asarray(df[2],dtype=return_dtype)
+        except ImportError:
+            print("Warning: Could not read in data with pandas -- due to error : {}. Falling back to slower numpy.".format(sys.exc_info()[0]))
+            x,y,z = np.genfromtxt(file,dtype=return_dtype,unpack=True)
+
+        return x,y,z
+
+    def read_fastfood(filename,return_dtype=None):
+        if return_dtype is None:
+            raise ValueError("Return data-type must be set and a valid numpy data-type")
+
+        import struct
+        with open(filename, "rb") as f:
+            skip1 = struct.unpack('@i',f.read(4))[0]
+            idat  = struct.unpack('@iiiii',f.read(20))[0:5]
+            skip2 = struct.unpack('@i',f.read(4))[0]
+            assert skip1  == 20 and skip2 == 20,"fast-food file seems to be incorrect (reading idat)"
+            ngal = idat[1]
+            ## now read fdat
+            skip1 = struct.unpack('@i',f.read(4))[0]
+            fdat  = struct.unpack('@fffffffff',f.read(36))[0:9]
+            skip2 = struct.unpack('@i',f.read(4))[0]
+            assert skip1  == 36 and skip2 == 36,"fast-food file seems to be incorrect (reading fdat )"
+
+            skip1 = struct.unpack('@i',f.read(4))[0]
+            znow  = struct.unpack('@f',f.read(4))[0]
+            skip2 = struct.unpack('@i',f.read(4))[0]
+            assert skip1  == 4 and skip2 == 4,"fast-food file seems to be incorrect (reading redshift)"
+
+            ## read the padding bytes for the x-positions
+            skip1 = struct.unpack('@i',f.read(4))[0]
+            assert skip1 == ngal*4 or skip1 == ngal*8, "fast-food file seems to be corrupt (padding bytes)"
+
+            ### seek back 4 bytes from current position
+            f.seek(-4, 1)
+            pos = {}
+            for field in 'xyz':
+                skip1 = struct.unpack('@i',f.read(4))[0]
+                assert skip1 == ngal*4 or skip1 == ngal*8, "fast-food file seems to be corrupt (padding bytes a)"
+                input_dtype = np.float32 if skip1/ngal == 4 else np.float
+                array = np.fromfile(f, input_dtype, ngal)
+                skip2 = struct.unpack('@i',f.read(4))[0]
+                pos[field] = array if dtype is None else dtype(array)
+
+        x = pos['x']
+        y = pos['y']
+        z = pos['z']
+
+        return x,y,z
+
+
+    if filebase is None:
+        filename = os.path.join(os.path.dirname(os.path.abspath(__file__)),"../xi_theory/tests/data/","gals_Mr19")
+        ## Figure out the datatype, use the header file in the include directory
+        ## because that is most likely correct (common.mk might have been modified
+        ## but not recompiled)
+        include_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                    "../include/", "countpairs.h")
+        includes = rd(include_file)
+        vector_type = re.search(r'(\w+)\s*\*\s*rupp\s*\;', includes, re.I).group(1)
+        allowed_types = {"float":np.float32,"double":np.float}
+        if vector_type not in list(allowed_types.keys()):
+            print("Error: Unknown precision={} found in header file {}. Allowed types are `{}'".format(vector_type,include_file,allowed_types))
+            sys.exit()
+
+        dtype = allowed_types[vector_type]
+        allowed_exts = {'.ff' :read_fastfood,
+                        '.txt':read_ascii,
+                        '.dat':read_ascii,
+                        '.csv':read_ascii
+                        }
+
+
+        for e in allowed_exts:
+            if os.path.exists(filename+e):
+                f = allowed_exts[e]
+                x,y,z = f(filename+e, dtype)
+                return x,y,z
+        raise IOError("Could not locate {} with any of these extensions = {}".format(filename, exts.keys()))
+    else:
+        ### Likely an user-supplied value
+        if os.path.exists(filebase):
+            extension = os.path.splitext(filebase)[1]
+            f = read_fastfood if u'.ff' in extension else read_ascii
+
+            ### default return is double
+            x,y,z = f(filebase, np.float)
+            return x,y,z
+
+        raise IOError("Could not locate file {}",filebase)
+
+
+
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -7,7 +7,7 @@ recursive-include io                    *.c *.h
 recursive-include utils                 *.c *.h
 
 recursive-include xi_theory/tests       Mr19_* cmass* bins
-recursive-include xi_theory/tests/data  *.ff *.txt
+recursive-include xi_theory/tests/data  *.ff 
 recursive-include xi_mocks/tests        bins angular_bins Mr19*
 recursive-include xi_mocks/tests/data   *.txt *.dat *.ff
 
diff --git a/meta.yaml b/meta.yaml
@@ -0,0 +1,42 @@
+package:
+  name: corrfunc
+  version: "0.2.0"
+
+source:
+  fn: Corrfunc-0.2.0.tar.gz
+  url: https://github.com/manodeep/Corrfunc/archive/0.2.0.tar.gz
+  md5: 8261A1A751963553D33FBA8F0F6303C3
+  sha1: A1877395F66B450C69555B1AB18B99294D9FA808
+  sha256: 0AC524EB41B09B8B6ACF8BC8960B82ACBDF1F7A807A45CDFD47166CF4771D72F
+
+requirements:
+  build:
+    - gcc
+    - gsl
+    - python
+    - numpy
+    - setuptools 
+
+  run:
+    - python
+    - numpy
+
+build:
+  script: make install && python setup.py install 
+  binary_relocation: False
+  skip: True # [win]
+
+test:
+  imports:
+    - Corrfunc
+
+about:
+  home: http://manodeep.github.io/Corrfunc/
+  license: MIT
+  license_file: LICENSE
+
+extra:
+  maintainers:
+    - Manodeep Sinha <[email protected]>
+
+
diff --git a/mocks.options b/mocks.options
@@ -8,6 +8,6 @@ OPT += -DLINK_IN_RA
 
 #### Code specs for both data Correlation Functions
 OPT += -DDOUBLE_PREC
-#OPT += -DUSE_AVX
+OPT += -DUSE_AVX
 OPT += -DUSE_OMP
 #OPT += -DFAST_DIVIDE ##replaces divide in DDrppi with approximate divides. If you really must get that extra ~20% performance boost
diff --git a/setup.py b/setup.py
@@ -146,10 +146,10 @@ def setup_packages():
     ### Now the lib + associated header files have been generated
     ### and put in lib/ and include/
     ### This step must run after ``make install``
-    dirs_patterns = {'xi_theory/tests/data/':['*.ff','*.txt','*.txt.gz','*.dat'],
-                     'xi_mocks/tests/data':['*.ff','*.txt','*.txt.gz','*.dat'],
-                     'xi_theory/tests':['Mr19*','bins*','cmass*'],
-                     'xi_mocks/tests':['Mr19*','bins*','angular_bins*'],
+    dirs_patterns = {'xi_theory/tests/data': ['*.ff','*.txt','*.txt.gz','*.dat'],
+                     'xi_mocks/tests/data' : ['*.ff','*.txt','*.txt.gz','*.dat'],
+                     'xi_theory/tests'     : ['Mr19*','bins*','cmass*'],
+                     'xi_mocks/tests'      : ['Mr19*','bins*','angular_bins*'],
                      'include':['count*.h'],
                      'lib':['libcount*.a']
                      }

diff --git a/theory.options b/theory.options
@@ -4,7 +4,7 @@ OPT = -DPERIODIC
 
 #### Code specs for both theory and data Correlation Functions
 #OPT += -DDOUBLE_PREC
-#OPT += -DUSE_AVX
+OPT += -DUSE_AVX
 OPT += -DUSE_OMP
 
 
diff --git a/xi_mocks/DDrppi/DDrppi_mocks.c b/xi_mocks/DDrppi/DDrppi_mocks.c
@@ -202,7 +202,7 @@ void Printhelp(void)
   fprintf(stderr,"Precision = float\n");
 #endif
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   fprintf(stderr,"Use AVX = True\n");
 #else   
   fprintf(stderr,"Use AVX = False\n");

diff --git a/xi_mocks/DDrppi/countpairs_rp_pi_mocks.c b/xi_mocks/DDrppi/countpairs_rp_pi_mocks.c
@@ -21,7 +21,7 @@
 #include "cosmology_params.h"
 #include "set_cosmo_dist.h"
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 #include "avx_calls.h"
 #endif
 
@@ -241,7 +241,7 @@ results_countpairs_mocks * countpairs_mocks(const int64_t ND1, DOUBLE *phi1, DOU
     rupp_sqr[i] = rupp[i]*rupp[i];
 	}	
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   AVX_FLOATS m_rupp_sqr[nrpbin];
 	AVX_FLOATS m_kbin[nrpbin];
   for(int i=0;i<nrpbin;i++) {
@@ -413,7 +413,7 @@ results_countpairs_mocks * countpairs_mocks(const int64_t ND1, DOUBLE *phi1, DOU
 /* 					const DOUBLE TWO=2.0; */
 /* 					const DOUBLE sqr_d1 = d1[i]*d1[i]; */
 
-#ifndef USE_AVX
+#if !(defined(USE_AVX) && defined(__AVX__))
 
 					DOUBLE *localx2  = x2;
 					DOUBLE *localy2  = y2;

diff --git a/xi_mocks/tests/tests_mocks.c b/xi_mocks/tests/tests_mocks.c
@@ -33,7 +33,7 @@
 #endif
 
 #if !(defined(__INTEL_COMPILER)) && defined(USE_AVX)
-#warning Test suite for mocks will be slow without Intel ICC while USE_AVX is set. 
+#warning Test suite for mocks are faster with Intel compiler, icc, AVX libraries. 
 #endif
 
 #ifndef SILENT

diff --git a/xi_mocks/vpf/countspheres_mocks.c b/xi_mocks/vpf/countspheres_mocks.c
@@ -19,7 +19,7 @@
 #include "set_cosmo_dist.h"//cosmological distance calculations
 #include "cosmology_params.h"//init_cosmology 
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 #include "avx_calls.h"
 #endif
 
@@ -256,7 +256,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
 	const DOUBLE rstep = rmax/(DOUBLE)nbin ;
 	const DOUBLE inv_rstep = ((DOUBLE) 1.0)/rstep;
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   AVX_FLOATS m_rupp_sqr[nbin];
   AVX_FLOATS m_rmax_sqr = AVX_SET_FLOAT(rmax_sqr);
 	for(int k=0;k<nbin;k++) {
@@ -316,7 +316,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
       const int max_ix = ix + bin_refine_factor > ngrid-1 ? ngrid-1:ix + bin_refine_factor;
       for(int iix=min_ix;iix<=max_ix;iix++) {
 				const DOUBLE newxpos = xcen;
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 				const AVX_FLOATS m_newxpos = AVX_SET_FLOAT(newxpos);
 #endif	
 
@@ -325,7 +325,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
 
 				for(int iiy=min_iy;iiy<=max_iy;iiy++) {
 					const DOUBLE newypos = ycen;
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 					const AVX_FLOATS m_newypos = AVX_SET_FLOAT(newypos);
 #endif	
 
@@ -334,7 +334,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
 
 					for(int iiz=min_iz;iiz<=max_iz;iiz++) {
 						const DOUBLE newzpos = zcen;
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
 						const AVX_FLOATS m_newzpos = AVX_SET_FLOAT(newzpos);
 #endif	
 						const int index=iix*ngrid*ngrid + iiy*ngrid + iiz;
@@ -344,7 +344,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
 						DOUBLE *z2 = cellstruct->pos + 2*NVEC;
 						int ipart;
 						for(ipart=0;ipart<=(cellstruct->nelements-NVEC);ipart+=NVEC) {
-#ifndef USE_AVX
+#if !(defined(USE_AVX) && defined(__AVX__))
 							int ibin[NVEC];
 #if  __INTEL_COMPILER
 #pragma simd vectorlengthfor(DOUBLE)

diff --git a/xi_mocks/wtheta/DDtheta_mocks.c b/xi_mocks/wtheta/DDtheta_mocks.c
@@ -187,7 +187,7 @@ void Printhelp(void)
   fprintf(stderr,"Precision = float\n");
 #endif
 
-#ifdef USE_AVX
+#if defined(USE_AVX) && defined(__AVX__)
   fprintf(stderr,"Use AVX = True\n");
 #else   
   fprintf(stderr,"Use AVX = False\n");