Skip to content

Commit

Permalink
Merge pull request #21 from manodeep/develop
Browse files Browse the repository at this point in the history
Develop [ci skip]
  • Loading branch information
manodeep committed Feb 5, 2016
2 parents 544d986 + 06d1f31 commit 7e943b1
Show file tree
Hide file tree
Showing 27 changed files with 354 additions and 1,236,024 deletions.
44 changes: 4 additions & 40 deletions Corrfunc/call_correlation_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,51 +16,15 @@
import re
import time
import numpy as np
try:
import pandas as pd
except ImportError:
pd = None
from Corrfunc import _countpairs, rd

from Corrfunc import _countpairs, rd, utils
from .utils import read_catalog

def main():
tstart=time.time()
file = os.path.join(os.path.dirname(os.path.abspath(__file__)),"../xi_theory/tests/data/","gals_Mr19.txt")
## Figure out the datatype, use the header file in the include directory
## because that is most likely correct (common.mk might have been modified
## but not recompiled)
include_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"../include/", "countpairs.h")
try:
includes = rd(include_file)
except (IOError, OSError) as e:
print("ERROR: Could not find file {}.\nPlease compile the `Corrfunc' library directly before running python setup.py install".format(include_file))
raise
vector_type = re.search(r'(\w+)\s*\*\s*rupp\s*\;', includes, re.I).group(1)
allowed_types = {"float":np.float32,"double":np.float}
if vector_type not in list(allowed_types.keys()):
print("Error: Unknown precision={} found in header file {}. Allowed types are `{}'".format(vector_type,include_file,allowed_types))
sys.exit()

dtype = allowed_types[vector_type]

### check if pandas is available - much faster to read in the data through pandas
t0=time.time()
print("Reading in the data...")
try:
if pd is not None:
df = pd.read_csv(file,header=None,engine="c",dtype={"x":dtype,"y":dtype,"z":dtype},delim_whitespace=True)
x = np.asarray(df[0],dtype=dtype)
y = np.asarray(df[1],dtype=dtype)
z = np.asarray(df[2],dtype=dtype)
else:
x,y,z = np.genfromtxt(file,dtype=dtype,unpack=True)
except:
pass

t0 = tstart
x,y,z = read_catalog()
t1=time.time()
print("Done reading the data - time taken = {0:10.1f} seconds.\nBeginning Correlation functions calculations".format(t1-t0))

boxsize=420.0
nthreads=4
pimax=40.0
Expand Down
133 changes: 131 additions & 2 deletions Corrfunc/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import (absolute_import, division, print_function,
unicode_literals)
import sys
import os

__all__ = ['rd']
__all__ = ['rd','read_catalog']

if sys.version_info[0] >= 3:
def rd(filename):
Expand All @@ -23,3 +25,130 @@ def rd(filename):
r = f.read()

return r


def read_catalog(filebase=None):
"""
Reads a galaxy/randoms catalog.
:param filebase: (optional)
The fully qualified path to the file. If omitted, reads the
theory galaxy catalog under ../xi_theory/tests/data/
Returns:
* ``x y z`` - Unpacked numpy arrays compatible with the installed
version of ``Corrfunc``.
**Note** If the filename is omitted, then first the fast-food file
is searched for, and then the ascii file. End-users should always
supply the full filename.
"""

import re
import numpy as np

def read_ascii(filename,return_dtype=None):
if return_dtype is None:
raise ValueError("Return data-type must be set and a valid numpy data-type")

### check if pandas is available - much faster to read in the data through pandas
t0=time.time()
print("Reading in the data...")
try:
import pandas as pd
df = pd.read_csv(file,header=None,engine="c",dtype={"x":return_dtype,"y":return_dtype,"z":return_dtype},delim_whitespace=True)
x = np.asarray(df[0],dtype=return_dtype)
y = np.asarray(df[1],dtype=return_dtype)
z = np.asarray(df[2],dtype=return_dtype)
except ImportError:
print("Warning: Could not read in data with pandas -- due to error : {}. Falling back to slower numpy.".format(sys.exc_info()[0]))
x,y,z = np.genfromtxt(file,dtype=return_dtype,unpack=True)

return x,y,z

def read_fastfood(filename,return_dtype=None):
if return_dtype is None:
raise ValueError("Return data-type must be set and a valid numpy data-type")

import struct
with open(filename, "rb") as f:
skip1 = struct.unpack('@i',f.read(4))[0]
idat = struct.unpack('@iiiii',f.read(20))[0:5]
skip2 = struct.unpack('@i',f.read(4))[0]
assert skip1 == 20 and skip2 == 20,"fast-food file seems to be incorrect (reading idat)"
ngal = idat[1]
## now read fdat
skip1 = struct.unpack('@i',f.read(4))[0]
fdat = struct.unpack('@fffffffff',f.read(36))[0:9]
skip2 = struct.unpack('@i',f.read(4))[0]
assert skip1 == 36 and skip2 == 36,"fast-food file seems to be incorrect (reading fdat )"

skip1 = struct.unpack('@i',f.read(4))[0]
znow = struct.unpack('@f',f.read(4))[0]
skip2 = struct.unpack('@i',f.read(4))[0]
assert skip1 == 4 and skip2 == 4,"fast-food file seems to be incorrect (reading redshift)"

## read the padding bytes for the x-positions
skip1 = struct.unpack('@i',f.read(4))[0]
assert skip1 == ngal*4 or skip1 == ngal*8, "fast-food file seems to be corrupt (padding bytes)"

### seek back 4 bytes from current position
f.seek(-4, 1)
pos = {}
for field in 'xyz':
skip1 = struct.unpack('@i',f.read(4))[0]
assert skip1 == ngal*4 or skip1 == ngal*8, "fast-food file seems to be corrupt (padding bytes a)"
input_dtype = np.float32 if skip1/ngal == 4 else np.float
array = np.fromfile(f, input_dtype, ngal)
skip2 = struct.unpack('@i',f.read(4))[0]
pos[field] = array if dtype is None else dtype(array)

x = pos['x']
y = pos['y']
z = pos['z']

return x,y,z


if filebase is None:
filename = os.path.join(os.path.dirname(os.path.abspath(__file__)),"../xi_theory/tests/data/","gals_Mr19")
## Figure out the datatype, use the header file in the include directory
## because that is most likely correct (common.mk might have been modified
## but not recompiled)
include_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
"../include/", "countpairs.h")
includes = rd(include_file)
vector_type = re.search(r'(\w+)\s*\*\s*rupp\s*\;', includes, re.I).group(1)
allowed_types = {"float":np.float32,"double":np.float}
if vector_type not in list(allowed_types.keys()):
print("Error: Unknown precision={} found in header file {}. Allowed types are `{}'".format(vector_type,include_file,allowed_types))
sys.exit()

dtype = allowed_types[vector_type]
allowed_exts = {'.ff' :read_fastfood,
'.txt':read_ascii,
'.dat':read_ascii,
'.csv':read_ascii
}


for e in allowed_exts:
if os.path.exists(filename+e):
f = allowed_exts[e]
x,y,z = f(filename+e, dtype)
return x,y,z
raise IOError("Could not locate {} with any of these extensions = {}".format(filename, exts.keys()))
else:
### Likely an user-supplied value
if os.path.exists(filebase):
extension = os.path.splitext(filebase)[1]
f = read_fastfood if u'.ff' in extension else read_ascii

### default return is double
x,y,z = f(filebase, np.float)
return x,y,z

raise IOError("Could not locate file {}",filebase)



2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ recursive-include io *.c *.h
recursive-include utils *.c *.h

recursive-include xi_theory/tests Mr19_* cmass* bins
recursive-include xi_theory/tests/data *.ff *.txt
recursive-include xi_theory/tests/data *.ff
recursive-include xi_mocks/tests bins angular_bins Mr19*
recursive-include xi_mocks/tests/data *.txt *.dat *.ff

42 changes: 42 additions & 0 deletions meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package:
name: corrfunc
version: "0.2.0"

source:
fn: Corrfunc-0.2.0.tar.gz
url: https://github.com/manodeep/Corrfunc/archive/0.2.0.tar.gz
md5: 8261A1A751963553D33FBA8F0F6303C3
sha1: A1877395F66B450C69555B1AB18B99294D9FA808
sha256: 0AC524EB41B09B8B6ACF8BC8960B82ACBDF1F7A807A45CDFD47166CF4771D72F

requirements:
build:
- gcc
- gsl
- python
- numpy
- setuptools

run:
- python
- numpy

build:
script: make install && python setup.py install
binary_relocation: False
skip: True # [win]

test:
imports:
- Corrfunc

about:
home: http://manodeep.github.io/Corrfunc/
license: MIT
license_file: LICENSE

extra:
maintainers:
- Manodeep Sinha <[email protected]>


2 changes: 1 addition & 1 deletion mocks.options
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ OPT += -DLINK_IN_RA

#### Code specs for both data Correlation Functions
OPT += -DDOUBLE_PREC
#OPT += -DUSE_AVX
OPT += -DUSE_AVX
OPT += -DUSE_OMP
#OPT += -DFAST_DIVIDE ##replaces divide in DDrppi with approximate divides. If you really must get that extra ~20% performance boost
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,10 @@ def setup_packages():
### Now the lib + associated header files have been generated
### and put in lib/ and include/
### This step must run after ``make install``
dirs_patterns = {'xi_theory/tests/data/':['*.ff','*.txt','*.txt.gz','*.dat'],
'xi_mocks/tests/data':['*.ff','*.txt','*.txt.gz','*.dat'],
'xi_theory/tests':['Mr19*','bins*','cmass*'],
'xi_mocks/tests':['Mr19*','bins*','angular_bins*'],
dirs_patterns = {'xi_theory/tests/data': ['*.ff','*.txt','*.txt.gz','*.dat'],
'xi_mocks/tests/data' : ['*.ff','*.txt','*.txt.gz','*.dat'],
'xi_theory/tests' : ['Mr19*','bins*','cmass*'],
'xi_mocks/tests' : ['Mr19*','bins*','angular_bins*'],
'include':['count*.h'],
'lib':['libcount*.a']
}
Expand Down
2 changes: 1 addition & 1 deletion theory.options
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ OPT = -DPERIODIC

#### Code specs for both theory and data Correlation Functions
#OPT += -DDOUBLE_PREC
#OPT += -DUSE_AVX
OPT += -DUSE_AVX
OPT += -DUSE_OMP


2 changes: 1 addition & 1 deletion xi_mocks/DDrppi/DDrppi_mocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ void Printhelp(void)
fprintf(stderr,"Precision = float\n");
#endif

#ifdef USE_AVX
#if defined(USE_AVX) && defined(__AVX__)
fprintf(stderr,"Use AVX = True\n");
#else
fprintf(stderr,"Use AVX = False\n");
Expand Down
6 changes: 3 additions & 3 deletions xi_mocks/DDrppi/countpairs_rp_pi_mocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include "cosmology_params.h"
#include "set_cosmo_dist.h"

#ifdef USE_AVX
#if defined(USE_AVX) && defined(__AVX__)
#include "avx_calls.h"
#endif

Expand Down Expand Up @@ -241,7 +241,7 @@ results_countpairs_mocks * countpairs_mocks(const int64_t ND1, DOUBLE *phi1, DOU
rupp_sqr[i] = rupp[i]*rupp[i];
}

#ifdef USE_AVX
#if defined(USE_AVX) && defined(__AVX__)
AVX_FLOATS m_rupp_sqr[nrpbin];
AVX_FLOATS m_kbin[nrpbin];
for(int i=0;i<nrpbin;i++) {
Expand Down Expand Up @@ -413,7 +413,7 @@ results_countpairs_mocks * countpairs_mocks(const int64_t ND1, DOUBLE *phi1, DOU
/* const DOUBLE TWO=2.0; */
/* const DOUBLE sqr_d1 = d1[i]*d1[i]; */

#ifndef USE_AVX
#if !(defined(USE_AVX) && defined(__AVX__))

DOUBLE *localx2 = x2;
DOUBLE *localy2 = y2;
Expand Down
2 changes: 1 addition & 1 deletion xi_mocks/tests/tests_mocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#endif

#if !(defined(__INTEL_COMPILER)) && defined(USE_AVX)
#warning Test suite for mocks will be slow without Intel ICC while USE_AVX is set.
#warning Test suite for mocks are faster with Intel compiler, icc, AVX libraries.
#endif

#ifndef SILENT
Expand Down
12 changes: 6 additions & 6 deletions xi_mocks/vpf/countspheres_mocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include "set_cosmo_dist.h"//cosmological distance calculations
#include "cosmology_params.h"//init_cosmology

#ifdef USE_AVX
#if defined(USE_AVX) && defined(__AVX__)
#include "avx_calls.h"
#endif

Expand Down Expand Up @@ -256,7 +256,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
const DOUBLE rstep = rmax/(DOUBLE)nbin ;
const DOUBLE inv_rstep = ((DOUBLE) 1.0)/rstep;

#ifdef USE_AVX
#if defined(USE_AVX) && defined(__AVX__)
AVX_FLOATS m_rupp_sqr[nbin];
AVX_FLOATS m_rmax_sqr = AVX_SET_FLOAT(rmax_sqr);
for(int k=0;k<nbin;k++) {
Expand Down Expand Up @@ -316,7 +316,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
const int max_ix = ix + bin_refine_factor > ngrid-1 ? ngrid-1:ix + bin_refine_factor;
for(int iix=min_ix;iix<=max_ix;iix++) {
const DOUBLE newxpos = xcen;
#ifdef USE_AVX
#if defined(USE_AVX) && defined(__AVX__)
const AVX_FLOATS m_newxpos = AVX_SET_FLOAT(newxpos);
#endif

Expand All @@ -325,7 +325,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal

for(int iiy=min_iy;iiy<=max_iy;iiy++) {
const DOUBLE newypos = ycen;
#ifdef USE_AVX
#if defined(USE_AVX) && defined(__AVX__)
const AVX_FLOATS m_newypos = AVX_SET_FLOAT(newypos);
#endif

Expand All @@ -334,7 +334,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal

for(int iiz=min_iz;iiz<=max_iz;iiz++) {
const DOUBLE newzpos = zcen;
#ifdef USE_AVX
#if defined(USE_AVX) && defined(__AVX__)
const AVX_FLOATS m_newzpos = AVX_SET_FLOAT(newzpos);
#endif
const int index=iix*ngrid*ngrid + iiy*ngrid + iiz;
Expand All @@ -344,7 +344,7 @@ results_countspheres_mocks * countspheres_mocks(const int64_t Ngal, DOUBLE *xgal
DOUBLE *z2 = cellstruct->pos + 2*NVEC;
int ipart;
for(ipart=0;ipart<=(cellstruct->nelements-NVEC);ipart+=NVEC) {
#ifndef USE_AVX
#if !(defined(USE_AVX) && defined(__AVX__))
int ibin[NVEC];
#if __INTEL_COMPILER
#pragma simd vectorlengthfor(DOUBLE)
Expand Down
2 changes: 1 addition & 1 deletion xi_mocks/wtheta/DDtheta_mocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ void Printhelp(void)
fprintf(stderr,"Precision = float\n");
#endif

#ifdef USE_AVX
#if defined(USE_AVX) && defined(__AVX__)
fprintf(stderr,"Use AVX = True\n");
#else
fprintf(stderr,"Use AVX = False\n");
Expand Down
Loading

0 comments on commit 7e943b1

Please sign in to comment.