merge in master

jmeyers314 · Jul 13, 2016 · a9a740e · a9a740e
2 parents 44a7c85 + ec9e888
commit a9a740e
Show file tree

Hide file tree

Showing 15 changed files with 1,551 additions and 381 deletions.
diff --git a/dpmm/__init__.py b/dpmm/__init__.py
@@ -7,3 +7,4 @@
 from .prior import InvGamma2D
 from .data import PseudoMarginalData
 from .shear import Linear1DShear, Shear, WeakShear
+from .gmm import GaussND, GMM
diff --git a/dpmm/density.py b/dpmm/density.py
@@ -1,6 +1,5 @@
 import numpy as np
 from scipy.special import gamma
-from utils import vTmv
 
 
 def multivariate_t_density(nu, mu, Sig, x):
@@ -10,12 +9,11 @@ def multivariate_t_density(nu, mu, Sig, x):
     d = len(mu)
     coef = gamma(nu/2.0+d/2.0) * detSig**(-0.5)
     coef /= gamma(nu/2.0) * nu**(d/2.0)*np.pi**(d/2.0)
-    x = np.array(x)
-    if len(x.shape) == 1:
-        return coef * (1.0 + 1./nu*vTmv((x-mu).T, invSig)[0, 0])**(-(nu+d)/2.0)
+    if x.ndim == 1:
+        einsum = np.dot(x-mu, np.dot(invSig, x-mu))
     else:
-        prod = np.array([vTmv(x_.T, invSig)[0, 0] for x_ in (x-mu)])
-        return coef * (1.0 + prod/nu)**(-(nu+d)/2.0)
+        einsum = np.einsum("...i,ij,...j", x-mu, invSig, x-mu) # (x-mu).T * invSig * (x-mu)
+    return coef * (1.0 + einsum/nu)**(-(nu+d)/2.0)
 
 
 def t_density(nu, mu, sigsqr, x):

diff --git a/dpmm/dpmm.py b/dpmm/dpmm.py
@@ -1,5 +1,3 @@
-import itertools
-
 import numpy as np
 from utils import pick_discrete
 from data import PseudoMarginalData, NullManip
@@ -43,7 +41,11 @@ def init_phi(self):
         self.label = np.zeros((self.n), dtype=int)
         self.phi = []
         self.nphi = []
-        for i in xrange(self.n):
+        # Seed the first data element to it's own cluster.
+        self.phi.append(self.prior.post(self.mD[0]).sample())
+        self.nphi.append(1)
+        # And then let the rest percolate off of that.
+        for i in xrange(1, self.n):
             self.update_c_i(i)
 
     @property
@@ -63,16 +65,14 @@ def _initD(self):
 
     def draw_new_label(self, i):
         # This is essentially Neal (2000) equation (3.6)
-        # Start off with the probabilities for cloning an existing cluster:
-        p = [l1 * nphi
-             for l1, nphi in itertools.izip(self.prior.like1N(self.mD[i], self.phi),
-                                            self.nphi)]
-        # and then append the probability to create a new cluster.
-        p.append(self.r_i[i])
-        p = np.array(p)
-        # Normalize.  This essentially takes care of the factors of b/(n-1+alpha) in Neal (2000)
-        # equation (3.6)
-        p /= np.sum(p)
+        # Start with probabilities for cloning an existing cluster, and then append the probability
+        # to create a new cluster.
+        p = np.empty(len(self.phi)+1, dtype=float)
+        p[:-1] = self.prior.like1(self.mD[i], np.array(self.phi)) * np.array(self.nphi)
+        p[-1] = self.r_i[i]
+        # Note that the p probabilities are unnormalized here, but pick_discrete will rescale them
+        # so that the total probability is 1.0.  This normalization also captures the factors of
+        # b/(n-1+alpha) in Neal (2000).
         picked = pick_discrete(p)
         return picked
 
@@ -123,7 +123,7 @@ def update_latent_data(self):
                 index = np.nonzero(self.label == i)[0]
                 data = self._D[index]  # a PseudoMarginalData instance
                 # calculate weights for selecting a representative sample
-                ps = self.prior.like1(self.manip(data.data), *ph) / data.interim_prior
+                ps = self.prior.like1(self.manip(data.data), ph) / data.interim_prior
                 ps /= np.sum(ps, axis=1)[:, np.newaxis]
                 for j, p in enumerate(ps):
                     self.D[index[j]] = data.data[j, pick_discrete(p)]

diff --git a/dpmm/gmm.py b/dpmm/gmm.py
@@ -3,8 +3,9 @@
 
 class GaussND(object):
     def __init__(self, mu, Sig):
-        self.mu = mu
-        self.Sig = Sig
+        self.mu = np.atleast_1d(mu)
+        self.Sig = np.atleast_2d(Sig)
+        self.d = len(self.mu)
 
     def cond(self, x):
         fixed = np.nonzero([x_ is not None for x_ in x])
@@ -19,12 +20,47 @@ def cond(self, x):
         new_Sig = Sig11 - np.dot(Sig12, np.dot(np.linalg.inv(Sig22), Sig12.T))
         return GaussND(new_mu, new_Sig)
 
+    def sample(self, size=None):
+        if self.d == 1:
+            return np.random.normal(self.mu, scale=np.sqrt(self.Sig), size=size)
+        else:
+            return np.random.multivariate_normal(self.mu, self.Sig, size=size)
+
 
 class GMM(object):
     def __init__(self, components, proportions):
         self.components = components
         self.proportions = proportions
+        self.d = self.components[0].d
 
     def cond(self, x):
         components = [c.cond(x) for c in self.components]
         return GMM(components, self.proportions)
+
+    def sample(self, size=None):
+        if size is None:
+            nums = np.random.multinomial(1, self.proportions)
+            c = nums.index(1) # which class got picked
+            return self.components[c].sample()
+        else:
+            n = np.prod(size)
+            if self.d == 1:
+                out = np.empty((n,), dtype=float)
+                nums = np.random.multinomial(n, self.proportions)
+                i = 0
+                for component, num in zip(self.components, nums):
+                    out[i:i+num] = component.sample(size=num)
+                    i += num
+                out = out.reshape(size)
+            else:
+                out = np.empty((n, self.d), dtype=float)
+                nums = np.random.multinomial(n, self.proportions)
+                i = 0
+                for component, num in zip(self.components, nums):
+                    out[i:i+num] = component.sample(size=num)
+                    i += num
+                if isinstance(size, int):
+                    out = out.reshape((size, self.d))
+                else:
+                    out = out.reshape(size+(self.d,))
+            return out