Skip to content

Commit

Permalink
Moving stuff around, get rid of c function for acceleration of compf,…
Browse files Browse the repository at this point in the history
… etc
  • Loading branch information
nlhepler committed Aug 4, 2011
1 parent dea0c10 commit f4bf33e
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 195 deletions.
16 changes: 3 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
MAKE = make
CC = gcc-4.6
CFLAGS = -O3 -fno-common -ffast-math -fPIC # -DUSE_SSE2
CONTRIB = contrib
PYDIR = idepi
SUBDIRS = $(wildcard $(CONTRIB)/hmmer*)

compf: libcompf.dylib

libcompf.dylib: idepi/_compf.c
$(CC) $(CFLAGS) -Icontrib/sse_mathfun -c -o _compf.o $<
$(CC) -shared -dylib -o $@ _compf.o
@-rm _compf.o

all:
@$(foreach var, $(SUBDIRS), make -C $(var) all;)
@$(foreach var, $(SUBDIRS), $(MAKE) -C $(var) all;)

clean:
@-$(foreach var, $(SUBDIRS), make -C $(var) clean;)
@-$(foreach var, $(SUBDIRS), $(MAKE) -C $(var) clean;)
@-rm $(PYDIR)/*.pyc
@-rm libcompf.dylib

distclean: clean
@-$(foreach var, $(SUBDIRS), make -C $(var) distclean;)
@-$(foreach var, $(SUBDIRS), $(MAKE) -C $(var) distclean;)
@-rm $(PYDIR)/*.pyc
90 changes: 0 additions & 90 deletions idepi/_compf.c

This file was deleted.

94 changes: 94 additions & 0 deletions idepi/_filter/_phylofilter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@

import json
from os import close, remove
from os.path import dirname, exists, join, realpath
from tempfile import mkstemp

from np import zeros

from Bio import SeqIO

from _hyphy import HyPhy


__all__ = ['PhyloFilter']


class PhyloFilter(object):
MATRIX = HyPhy.THYPHY_TYPE_MATRIX
NUMBER = HyPhy.THYPHY_TYPE_NUMBER
STRING = HyPhy.THYPHY_TYPE_STRING

def __init__(self, seqrecords, batchfile=None):
if batchfile is None:
batchfile = join(dirname(realpath(__file__)), '..', 'res', 'CorrectForPhylogeny.bf')

if not exists(batchfile):
raise ValueError('Please pass a valid (and existing) batchfile to PhyloFilter()')

fd, self.__inputfile = mkstemp(); close(fd)

self.__seqrecords = seqrecords
self.__batchfile = batchfile
self.__commands = commands
self.__hyphy = HyPhy()

self.__ids, self.__mat, self.__ord = PhyloFilter.__run(self, seqrecords)

self.__run = True

def __del__(self):
for file in (self.__inputfile,):
if file and exists(file):
remove(file)

def __get_value(self, variable, type):
_res = self.__hyphy.AskFor(variable)
if type not in (PhyloFilter.MATRIX, PhyloFilter.NUMBER, PhyloFilter.STRING):
raise ValueError('Unknown type supplied: please use one of PhyloFilter.{MATRIX,NUMBER,STRING}')
if (self.__hyphy.CanICast(_res, type):
res = self.__hyphy.CastResult(_res, type)
if type == PhyloFilter.STRING:
return res.castToString().sData
elif type == PhyloFilter.NUMBER:
return res.castToNumber().nValue
elif type == PhyloFilter.MATRIX:
return res.castToMatrix()
else:
# dead code, we assume
assert(0)
else:
raise RuntimeError('Cast failed in HyPhy, assume an incorrect type was supplied for variable `%s\'' % variable)

def __run(self, seqrecords):
with open(self.__inputfile, 'w') as fh:
SeqIO.write(seqrecords, fh, 'fasta')

self.__hyphy.ExecuteBF('ExecuteAFile("%s", { "0": "%s" })' % (self.__batchfile, self.__inputfile))

_ids = PhyloFilter.__get_value(self, 'ids', PhyloFilter.MATRIX)
_mat = PhyloFilter.__get_value(self, 'data', PhyloFilter.MATRIX)
order = PhyloFilter.__get_value(self, 'order', PhyloFilter.STRING).split(',')

assert(_ids.mRows == 0)

ids = [_ids.MatrixCell(0, i) for i in xrange(_ids.mCols)]
mat = zeros((_mat.mRows, _mat.mCols), dtype=float)

for i in xrange(_mat.mRows):
for j in xrange(_mat.mCols):
mat[i, j] = _mat.MatrixCell(i, j)

return ids, mat, order

def names(self, ref_id_func):
if not self.__run:
raise RuntimeError('No phylofiltering model computed')

ref = None
for r in self.__seqrecords:
if apply(ref_id_func, (r.id,)):
ref = str(r.seq)

if ref is None:
raise RuntimeError('No reference sequence found, aborting')
64 changes: 30 additions & 34 deletions idepi/_diffusionkde.py → idepi/_kde/_diffusionkde.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@

from collections import namedtuple
from math import ceil, floor, pi, sqrt

import numpy as np

Expand Down Expand Up @@ -32,7 +31,6 @@ def __init__(self, data, interval=None, n=2 ** 14):
self.__pdf = None
self.__cdf = None
self.__bandwidth_cdf = None
self.__density = None
self.__mesh = None

# try:
Expand Down Expand Up @@ -100,18 +98,18 @@ def __idct1d(data):

@staticmethod
def __compf_py(t, s, I, a2):
return 2. * np.power(pi, (2. * s)) * np.sum(np.multiply(np.multiply(np.power(I, s), a2), np.exp(-I * np.power(np.pi, 2) * t)))
return 2. * np.power(np.pi, (2. * s)) * np.sum(np.multiply(np.multiply(np.power(I, s), a2), np.exp(-I * np.power(np.pi, 2) * t)))

@staticmethod
def __fixed_point(t, N, I, a2):
l = 7
f = DiffusionKde.__compf(t, l, I, a2)
for s in xrange(l-1, 1, -1):
K0 = np.prod(np.arange(1, 2 * s, 2)) / sqrt(2. * pi)
K0 = np.prod(np.arange(1, 2 * s, 2)) / np.sqrt(2. * np.pi)
const = (1. + (0.5 ** (s + 0.5))) / 3.
time = (2. * const * K0 / N / f) ** (2. / (3 + (2 * s)))
f = DiffusionKde.__compf(time, s, I, a2)
return t - ((2. * N * sqrt(pi) * f) ** (-2. / 5))
return t - ((2. * N * np.sqrt(np.pi) * f) ** (-2. / 5))

def __estimate(self, data, interval=None):

Expand All @@ -128,7 +126,7 @@ def __estimate(self, data, interval=None):

nperr = np.seterr(under='ignore')

n = 2 ** ceil(np.log2(self.__n))
n = 2 ** np.ceil(np.log2(self.__n))

R = interval.max - interval.min
dx = 1. * R / (n - 1)
Expand Down Expand Up @@ -159,28 +157,27 @@ def __estimate(self, data, interval=None):
except ValueError:
t_star = 0.28 * (N ** (-2. / 5))

bandwidth = sqrt(t_star) * R
bandwidth = np.sqrt(t_star) * R

a_t = np.multiply(a, np.exp(-(np.arange(n) ** 2) * (np.pi ** 2) * t_star / 2))

density = DiffusionKde.__idct1d(a_t) / R

f = DiffusionKde.__compf(t_star, 1, I, a2)
t_cdf = (sqrt(pi) * f * N) ** (-2. / 3)
t_cdf = (np.sqrt(np.pi) * f * N) ** (-2. / 3)
a_cdf = np.multiply(a, np.exp(-(np.arange(n) ** 2) * (np.pi ** 2) * t_cdf / 2))
pdf = DiffusionKde.__idct1d(a_cdf) * (dx / R)
cdf = np.cumsum(pdf)
bandwidth_cdf = sqrt(t_cdf) * R
bandwidth_cdf = np.sqrt(t_cdf) * R

np.seterr(**nperr)

self.__bandwidth = bandwidth
self.__density = density
self.__mesh = mesh
self.__pdf = pdf / cdf[-1]
self.__pdf = density / cdf[-1]
self.__cdf = cdf / cdf[-1]
self.__bandwidth_cdf = bandwidth_cdf

self.__dx = dx
self.__run = True

return bandwidth
Expand All @@ -192,20 +189,9 @@ def __idx(self, x):
if x < self.__interval.min or x > self.__interval.max:
raise ValueError('x must fit in the interval %s' % str(self.__interval))

return [i for i in xrange(len(self.__mesh)) if self.__mesh[i] >= x][0] - 1
idx = int(np.floor((x - self.__mesh[0]) / self.__dx))

# def density(self, x=None):
# if not self.__run:
# raise RuntimeError('No kernel density estimation computed, aborting')
#
# if x is None:
# return self.__density.copy()
#
# idx = DiffusionKde.__idx(self, x)
# xp = [self.__mesh[idx], self.__mesh[idx+1]]
# yp = [self.__density[idx], self.__density[idx+1]]
#
# return np.interp(x, xp, yp)
return idx
#
# def mesh(self, x=None):
# if not self.__run:
Expand All @@ -230,11 +216,14 @@ def evaluate(self, x):

ret = np.zeros((n,))

for i in xrange(len(_x)):
idx = DiffusionKde.__idx(self, _x[i])
xp = [self.__mesh[idx], self.__mesh[idx+1]]
yp = [self.__pdf[idx], self.__pdf[idx+1]]
ret[i] = np.interp(x, xp, yp)
for i in xrange(n):
try:
idx = DiffusionKde.__idx(self, _x[0][i])
xp = [self.__mesh[idx], self.__mesh[idx+1]]
yp = [self.__pdf[idx], self.__pdf[idx+1]]
ret[i] = np.interp(_x[0][i], xp, yp)
except ValueError:
ret[i] = 0.

return ret

Expand Down Expand Up @@ -266,11 +255,17 @@ def main():

d_3 = data[3]

MAX, MIN = max(data), min(data)
range = MAX - MIN; MAX += range / 4.; MIN -= range / 4.
dx = (MAX - MIN) / (2 ** 7)

mesh = np.arange(MIN, MAX, dx, dtype=float)

begin = time()

kde = DiffusionKde(data)

print d_3, kde(d_3)
print d_3, kde(d_3); pdf = kde(mesh)

runtime = time() - begin

Expand All @@ -279,13 +274,14 @@ def main():
# print runtime, bandwidth, len(density), np.sum(density), len(mesh), sum(pdf), cdf[-1]

print runtime
print sum(pdf) * dx

# import matplotlib.pyplot as plt
import matplotlib.pyplot as plt

# plt.plot(mesh, density)
# plt.plot(mesh, pdf)
plt.plot(mesh, pdf)
# plt.plot(mesh, cdf / 1000.)
# plt.show()
plt.show()

return 0

Expand Down
Loading

0 comments on commit f4bf33e

Please sign in to comment.