Merge pull request #46 from andriygav/develop

Improve Coverage
andriygav · Mar 31, 2020 · 10b7808 · 10b7808
2 parents 561570e + 338a8d2
commit 10b7808
Show file tree

Hide file tree

Showing 8 changed files with 363 additions and 38 deletions.
diff --git a/src/mixturelib/__init__.py b/src/mixturelib/__init__.py
@@ -1 +1 @@
-__version__ = '0.2.2'
+__version__ = '0.3.0'
diff --git a/src/mixturelib/hyper_models.py b/src/mixturelib/hyper_models.py
@@ -246,9 +246,9 @@ def PredictPi(self, X, HyperParameters):
         :rtype: FloatTensor
         """
         denum = self.m.sum()
-        try:
+        if denum != 0.:
             pi = torch.ones([X.shape[0], self.output_dim]) * (self.m / denum)
-        except:
+        else:
             pi = torch.zeros([X.shape[0], self.output_dim])
         return pi
 
@@ -448,3 +448,162 @@ def PredictPi(self, X, HyperParameters):
         """
         return F.softmax(self.forward(X), dim = -1)
 
+
+class HyperModelGateSparsed(HyperModel):
+    r"""A hyper model for mixture of model. Each :math:`i`-th object from 
+    train dataset has own probability to each model :math:`\pi^i`.
+
+    In this hyper model, the probability of each local model is a vector 
+    from dirichlet distribution with parameter :math:`\mu`, and :math:`l`.
+    
+
+    :param output_dim: The number of local models.
+    :type output_dim: int
+    :param device: The device for pytorch. 
+        Can be 'cpu' or 'gpu'. Default 'cpu'.
+
+    Example:
+
+    >>> _ = torch.random.manual_seed(42) # Set random seed for repeatability
+    >>>
+    >>> w = torch.randn(2, 1) # Generate real parameter vector
+    >>> X = torch.randn(5, 2) # Generate features data
+    >>> Z = torch.distributions.dirichlet.Dirichlet(
+    ...     torch.tensor([0.5, 0.5])).sample(
+    ...         (5,)) # Set corresponding between data and local models.
+    >>> Y = X@w + 0.1*torch.randn(5, 1) # Generate target data with noise 0.1
+    >>>
+    >>> hyper_model = HyperModelGateSparsed(
+    ...     output_dim=2) # Model with Diriclet weighting for each sample
+    >>> hyper_parameters = {} # Withor hyper parameters
+    >>>
+    >>> hyper_model.LogPiExpectation(
+    ...     X, Y, hyper_parameters) # Log of probability before E step
+    tensor([[-1.3863, -1.3863],
+	        [-1.3863, -1.3863],
+	        [-1.3863, -1.3863],
+	        [-1.3863, -1.3863],
+	        [-1.3863, -1.3863]])
+    >>> 
+    >>> hyper_model.E_step(X, Y, Z, hyper_parameters)
+    >>> hyper_model.LogPiExpectation(
+    ...     X, Y, hyper_parameters)  # Log of probability after E step
+    tensor([[-1.9677, -0.4830],
+	        [-1.7785, -0.5417],
+	        [-0.5509, -1.7521],
+	        [-0.7250, -1.3642],
+	        [-0.4839, -1.9644]])
+    """
+    def __init__(self, output_dim=2, gamma=1., mu=torch.ones(2), device='cpu'):
+        """Constructor method
+        """
+        super(HyperModelGateSparsed, self).__init__()
+        self.output_dim = output_dim
+        self.device = device
+
+        self.mu = mu
+        self.mu = self.mu/self.mu.sum()
+        self.gamma = gamma
+        self.mu_posterior = self.mu.clone()
+        self.gamma_posterior = torch.tensor(self.gamma)
+
+    def E_step(self, X, Y, Z, HyperParameters):
+        r"""Doing E-step of EM-algorithm. Finds variational probability `q` 
+        of model parameters.
+
+        Calculate analytical solution for estimate `q` in the class of 
+        normal distributions :math:`q = Dir(m)`, where
+        :math:`m = \mu + \gamma`, where 
+        :math:`\gamma_k = \sum_{i=1}^{num\_elements}Z_{ik}`, and 
+        :math:`\mu` is prior.
+
+        .. warning::
+            Now :math:`\mu_k` is `1` for all `k`, and can not be changed.
+
+        :param X: The tensor of shape 
+            `num_elements` :math:`\times` `num_feature`.
+        :type X: FloatTensor
+        :param Y: The tensor of shape 
+            `num_elements` :math:`\times` `num_answers`.
+        :type Y: FloatTensor
+        :param Z: The tensor of shape 
+            `num_elements` :math:`\times` `num_models`.
+        :type Z: FloatTensor
+        :param HyperParameters: The dictionary of all hyper parametrs.
+            Where `key` is string and `value` is FloatTensor.
+        :type HyperParameters: dict
+        """
+        gamma = Z
+        self.mu_posterior = (self.gamma*self.mu + gamma).detach()
+        self.gamma_posterior = self.mu_posterior.sum(dim=-1).view([-1, 1])
+        self.mu_posterior = self.mu_posterior/self.gamma_posterior
+        pass
+
+    def M_step(self, X, Y, Z, HyperParameters):
+        r"""The method does nothing.
+        
+        :param X: The tensor of shape 
+            `num_elements` :math:`\times` `num_feature`.
+        :type X: FloatTensor
+        :param Y: The tensor of shape 
+            `num_elements` :math:`\times` `num_answers`.
+        :type Y: FloatTensor
+        :param Z: The tensor of shape 
+            `num_elements` :math:`\times` `num_models`.
+        :type Z: FloatTensor
+        :param HyperParameters: The dictionary of all hyper parametrs.
+            Where `key` is string and `value` is FloatTensor.
+        :type HyperParameters: dict
+        """
+        pass
+
+    def LogPiExpectation(self, X, Y, HyperParameters):
+        r"""Returns the expected value of each models log of probability.
+
+        Returns the expectation of :math:`\log \pi` value where 
+        :math:`\pi` is a random value from Dirichlet distribution.
+
+        This function calculates by using :math:`\digamma` function
+        
+        :param X: The tensor of shape 
+            `num_elements` :math:`\times` `num_feature`.
+        :type X: FloatTensor
+        :param Y: The tensor of shape 
+            `num_elements` :math:`\times` `num_answers`.
+        :type Y: FloatTensor
+        :param HyperParameters: The dictionary of all hyper parametrs.
+            Where `key` is string and `value` is FloatTensor.
+        :type HyperParameters: dict
+
+        :return: The tensor of shape 
+            `num_elements` :math:`\times` `num_models`. The espected value of 
+            each models probability.
+        :rtype: FloatTensor
+        """
+        temp_1 = torch.ones([X.shape[0], self.output_dim])
+        temp_2 = (torch.digamma(self.gamma_posterior*self.mu_posterior)
+                  - torch.digamma(self.gamma_posterior))
+
+        return temp_1 * temp_2
+
+    def PredictPi(self, X, HyperParameters):
+        r"""Returns the probability (weight) of each models.
+
+        Return the same vector :math:`\pi` for all object.
+        Each :math:`\pi = \frac{\textbf{m}}{\sum \textbf{m}_k}`, where
+        :math:`\textbf{m}` is a parameter of Dirichlet pdf.
+        
+        :param X: The tensor of shape 
+            `num_elements` :math:`\times` `num_feature`.
+        :type X: FloatTensor
+        :param HyperParameters: The dictionary of all hyper parametrs.
+            Where `key` is string and `value` is FloatTensor.
+        :type HyperParameters: dict
+
+        :return: The tensor of shape 
+            `num_elements` :math:`\times` `num_models`. 
+            The probability (weight) of each models.
+        :rtype: FloatTensor
+        """
+        pi = torch.ones([X.shape[0], self.output_dim]) * self.mu_posterior
+        return pi
diff --git a/src/mixturelib/mixture.py b/src/mixturelib/mixture.py
@@ -134,7 +134,8 @@ def __init__(self,
         """
         super(MixtureEM, self).__init__()
         if ListOfModels is None:
-            return None
+            raise ValueError("""The ListOfModels should be list with 
+                positive length, but given: {}.""".format(ListOfModels))
         else:
             self.ListOfModels = ListOfModels
 
@@ -146,7 +147,8 @@ def __init__(self,
             self.HyperParameters[key] = torch.tensor(HyperParameters[key])
 
         if HyperModel is None:
-            return None
+            raise ValueError("""The HyperModel should be hyper model object 
+                positive length, but given: {}.""".format(HyperModel))
         else:
             self.HyperModel = HyperModel
 
@@ -162,7 +164,6 @@ def __init__(self,
         self.model_type = model_type
 
         self.pZ = None
-        return
 
     def E_step(self, X, Y):
         r"""Doing E-step of EM-algorigthm. This method call E_step for all 

diff --git a/src/mixturelib/regularizers.py b/src/mixturelib/regularizers.py
@@ -188,17 +188,14 @@ def M_step(self, X, Y, Z, HyperParameters):
         ListOfNewW0 = []
 
         for k, w_0 in self.ListOfModelsW0:
-            if len(self.ListOfModels[k].A.shape) == 1:
-                try:
-                    A_inv = torch.diag(1./self.ListOfModels[k].A)
-                except:
-                    A_inv = (2**32)*torch.ones(self.ListOfModels[k].A.shape[0])
-            else:
-                try:
-                    A_inv = torch.inverse(self.ListOfModels[k].A)
-                except:
-                    A_inv = (2**32)*torch.eye(self.ListOfModels[k].A.shape[0])
-
+            A = self.ListOfModels[k].A
+            if len(A.shape) == 1:
+                A = torch.diag(A)
+            try:
+                A_inv = torch.inverse(A)
+            except:
+                A_inv = (2**32)*torch.eye(A.shape[0])
+
             B = self.ListOfModels[k].B
 
             if len(alpha.shape) == 0:
@@ -378,18 +375,13 @@ def M_step(self, X, Y, Z, HyperParameters):
             loss = 0
             for local_model, w0  in zip(self.ListOfModels, W0):
                 if local_model.A is not None:
-                    if len(local_model.A.shape) == 1:
-                        try:
-                            A_inv = torch.diag(1. / local_model.A)
-                        except:
-                            A_inv = (2**32) \
-                                    * torch.ones(local_model.A.shape[0])
-                    else:
-                        try:
-                            A_inv = torch.inverse(local_model.A)
-                        except:
-                            A_inv = (2**32) \
-                                    * torch.eye(local_model.A.shape[0])
+                    A = local_model.A
+                    if len(A.shape) == 1:
+                        A = torch.diag(A)
+                    try:
+                        A_inv = torch.inverse(A)
+                    except:
+                        A_inv = (2**32)*torch.eye(A.shape[0])
 
 
                     loss += -0.5 * (w0 @ A_inv@w0) \

diff --git a/tests/test_hyper_model.py b/tests/test_hyper_model.py
@@ -1,11 +1,28 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+import pytest
 import torch
 
 from mixturelib.hyper_models import HyperModel
 from mixturelib.hyper_models import HyperModelDirichlet
+from mixturelib.hyper_models import HyperModelGateSparsed
 from mixturelib.hyper_models import HyperExpertNN
 
+def test_HyperModel():
+    model = HyperModel()
+
+    with pytest.raises(NotImplementedError):
+        model.E_step(None, None, None, None)
+
+    with pytest.raises(NotImplementedError):
+        model.M_step(None, None, None, None)
+
+    with pytest.raises(NotImplementedError):
+        model.LogPiExpectation(None, None, None)
+
+    with pytest.raises(NotImplementedError):
+        model.PredictPi(None, None)
+
 def test_HyperModelDirichlet_init():
     torch.manual_seed(42)
 
@@ -75,14 +92,26 @@ def test_HyperModelDirichlet_PredictPi():
 
     hyper_model = HyperModelDirichlet(output_dim = 2, device = 'cpu')
 
-
     X = torch.randn(2, 2)
     HyperParameters = {'beta': torch.tensor(1.)}
 
     pi = hyper_model.PredictPi(X, HyperParameters)
 
     assert pi.sum().long().item() == 2
 
+    hyper_model = HyperModelDirichlet(output_dim = 2, device = 'cpu')
+
+    X = torch.randn(2, 2)
+    Z = -1*torch.ones(1, 2)
+    HyperParameters = {'beta': torch.tensor(1.)}
+    hyper_model.E_step(X, None, Z, HyperParameters)
+
+    assert (hyper_model.m == torch.tensor([0., 0.])).all()
+
+    pi = hyper_model.PredictPi(X, HyperParameters)
+    assert (pi == 0.).all()
+
+
 
 def test_HyperExpertNN_init():
     torch.manual_seed(42)
@@ -201,3 +230,81 @@ def test_HyperExpertNN_PredictPi():
 
     assert pi.sum().long().item() == 2
 
+
+
+
+def test_HyperModelGateSparsed_init():
+    torch.manual_seed(42)
+
+    hyper_model = HyperModelGateSparsed(output_dim = 2, device = 'cpu')
+
+    assert hyper_model.output_dim == 2
+    assert hyper_model.device == 'cpu'
+    assert (hyper_model.mu == 0.5*torch.ones(2)).all()
+    assert hyper_model.gamma == 1.
+
+
+def test_HyperModelGateSparsed_E_step():
+    torch.manual_seed(42)
+
+    hyper_model = HyperModelGateSparsed(output_dim = 2, device = 'cpu')
+
+
+    X = torch.randn(2, 2)
+    Y = torch.randn(2, 1)
+    Z = torch.randn(2, 2)
+    HyperParameters = {'beta': torch.tensor(1.)}
+
+    hyper_model.E_step(X, Y, Z, HyperParameters)
+
+    assert hyper_model.output_dim == 2
+    assert hyper_model.device == 'cpu'
+    assert (hyper_model.mu == 0.5*torch.ones(2)).all()
+    assert (torch.round(hyper_model.mu_posterior) == 
+            torch.tensor([[1., 0.], [1., 0.]])).all()
+    assert (torch.round(hyper_model.gamma_posterior) == 
+            torch.tensor([[3.], [2.]])).all()
+
+def test_HyperModelGateSparsed_M_step():
+    torch.manual_seed(42)
+
+    hyper_model = HyperModelGateSparsed(output_dim = 2, device = 'cpu')
+
+
+    X = torch.randn(2, 2)
+    Y = torch.randn(2, 1)
+    Z = torch.randn(2, 2)
+    HyperParameters = {'beta': torch.tensor(1.)}
+
+    hyper_model.M_step(X, Y, Z, HyperParameters)
+
+    assert hyper_model.output_dim == 2
+    assert hyper_model.device == 'cpu'
+    assert (hyper_model.mu == 0.5*torch.ones(2)).all()
+    assert hyper_model.gamma == 1.
+
+def test_HyperModelGateSparsed_LogPiExpectation():
+    torch.manual_seed(42)
+
+    hyper_model = HyperModelGateSparsed(output_dim = 2, device = 'cpu')
+
+
+    X = torch.randn(2, 2)
+    Y = torch.randn(2, 1)
+    HyperParameters = {'beta': torch.tensor(1.)}
+
+    log_pi = hyper_model.LogPiExpectation(X, Y, HyperParameters)
+
+    assert log_pi.sum().long().item() == -5.
+
+def test_HyperModelGateSparsed_PredictPi():
+    torch.manual_seed(42)
+
+    hyper_model = HyperModelGateSparsed(output_dim = 2, device = 'cpu')
+
+    X = torch.randn(2, 2)
+    HyperParameters = {'beta': torch.tensor(1.)}
+
+    pi = hyper_model.PredictPi(X, HyperParameters)
+
+    assert pi.sum().long().item() == 2