Skip to content

Commit

Permalink
merge in master
Browse files Browse the repository at this point in the history
  • Loading branch information
jmeyers314 committed Jul 13, 2016
2 parents 44a7c85 + ec9e888 commit a9a740e
Show file tree
Hide file tree
Showing 15 changed files with 1,551 additions and 381 deletions.
1 change: 1 addition & 0 deletions dpmm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
from .prior import InvGamma2D
from .data import PseudoMarginalData
from .shear import Linear1DShear, Shear, WeakShear
from .gmm import GaussND, GMM
10 changes: 4 additions & 6 deletions dpmm/density.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import numpy as np
from scipy.special import gamma
from utils import vTmv


def multivariate_t_density(nu, mu, Sig, x):
Expand All @@ -10,12 +9,11 @@ def multivariate_t_density(nu, mu, Sig, x):
d = len(mu)
coef = gamma(nu/2.0+d/2.0) * detSig**(-0.5)
coef /= gamma(nu/2.0) * nu**(d/2.0)*np.pi**(d/2.0)
x = np.array(x)
if len(x.shape) == 1:
return coef * (1.0 + 1./nu*vTmv((x-mu).T, invSig)[0, 0])**(-(nu+d)/2.0)
if x.ndim == 1:
einsum = np.dot(x-mu, np.dot(invSig, x-mu))
else:
prod = np.array([vTmv(x_.T, invSig)[0, 0] for x_ in (x-mu)])
return coef * (1.0 + prod/nu)**(-(nu+d)/2.0)
einsum = np.einsum("...i,ij,...j", x-mu, invSig, x-mu) # (x-mu).T * invSig * (x-mu)
return coef * (1.0 + einsum/nu)**(-(nu+d)/2.0)


def t_density(nu, mu, sigsqr, x):
Expand Down
28 changes: 14 additions & 14 deletions dpmm/dpmm.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import itertools

import numpy as np
from utils import pick_discrete
from data import PseudoMarginalData, NullManip
Expand Down Expand Up @@ -43,7 +41,11 @@ def init_phi(self):
self.label = np.zeros((self.n), dtype=int)
self.phi = []
self.nphi = []
for i in xrange(self.n):
# Seed the first data element to it's own cluster.
self.phi.append(self.prior.post(self.mD[0]).sample())
self.nphi.append(1)
# And then let the rest percolate off of that.
for i in xrange(1, self.n):
self.update_c_i(i)

@property
Expand All @@ -63,16 +65,14 @@ def _initD(self):

def draw_new_label(self, i):
# This is essentially Neal (2000) equation (3.6)
# Start off with the probabilities for cloning an existing cluster:
p = [l1 * nphi
for l1, nphi in itertools.izip(self.prior.like1N(self.mD[i], self.phi),
self.nphi)]
# and then append the probability to create a new cluster.
p.append(self.r_i[i])
p = np.array(p)
# Normalize. This essentially takes care of the factors of b/(n-1+alpha) in Neal (2000)
# equation (3.6)
p /= np.sum(p)
# Start with probabilities for cloning an existing cluster, and then append the probability
# to create a new cluster.
p = np.empty(len(self.phi)+1, dtype=float)
p[:-1] = self.prior.like1(self.mD[i], np.array(self.phi)) * np.array(self.nphi)
p[-1] = self.r_i[i]
# Note that the p probabilities are unnormalized here, but pick_discrete will rescale them
# so that the total probability is 1.0. This normalization also captures the factors of
# b/(n-1+alpha) in Neal (2000).
picked = pick_discrete(p)
return picked

Expand Down Expand Up @@ -123,7 +123,7 @@ def update_latent_data(self):
index = np.nonzero(self.label == i)[0]
data = self._D[index] # a PseudoMarginalData instance
# calculate weights for selecting a representative sample
ps = self.prior.like1(self.manip(data.data), *ph) / data.interim_prior
ps = self.prior.like1(self.manip(data.data), ph) / data.interim_prior
ps /= np.sum(ps, axis=1)[:, np.newaxis]
for j, p in enumerate(ps):
self.D[index[j]] = data.data[j, pick_discrete(p)]
Expand Down
40 changes: 38 additions & 2 deletions dpmm/gmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

class GaussND(object):
def __init__(self, mu, Sig):
self.mu = mu
self.Sig = Sig
self.mu = np.atleast_1d(mu)
self.Sig = np.atleast_2d(Sig)
self.d = len(self.mu)

def cond(self, x):
fixed = np.nonzero([x_ is not None for x_ in x])
Expand All @@ -19,12 +20,47 @@ def cond(self, x):
new_Sig = Sig11 - np.dot(Sig12, np.dot(np.linalg.inv(Sig22), Sig12.T))
return GaussND(new_mu, new_Sig)

def sample(self, size=None):
if self.d == 1:
return np.random.normal(self.mu, scale=np.sqrt(self.Sig), size=size)
else:
return np.random.multivariate_normal(self.mu, self.Sig, size=size)


class GMM(object):
def __init__(self, components, proportions):
self.components = components
self.proportions = proportions
self.d = self.components[0].d

def cond(self, x):
components = [c.cond(x) for c in self.components]
return GMM(components, self.proportions)

def sample(self, size=None):
if size is None:
nums = np.random.multinomial(1, self.proportions)
c = nums.index(1) # which class got picked
return self.components[c].sample()
else:
n = np.prod(size)
if self.d == 1:
out = np.empty((n,), dtype=float)
nums = np.random.multinomial(n, self.proportions)
i = 0
for component, num in zip(self.components, nums):
out[i:i+num] = component.sample(size=num)
i += num
out = out.reshape(size)
else:
out = np.empty((n, self.d), dtype=float)
nums = np.random.multinomial(n, self.proportions)
i = 0
for component, num in zip(self.components, nums):
out[i:i+num] = component.sample(size=num)
i += num
if isinstance(size, int):
out = out.reshape((size, self.d))
else:
out = out.reshape(size+(self.d,))
return out
Loading

0 comments on commit a9a740e

Please sign in to comment.