Skip to content

Commit

Permalink
Merge pull request #46 from andriygav/develop
Browse files Browse the repository at this point in the history
Improve Coverage
  • Loading branch information
andriygav authored Mar 31, 2020
2 parents 561570e + 338a8d2 commit 10b7808
Show file tree
Hide file tree
Showing 8 changed files with 363 additions and 38 deletions.
2 changes: 1 addition & 1 deletion src/mixturelib/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.2.2'
__version__ = '0.3.0'
163 changes: 161 additions & 2 deletions src/mixturelib/hyper_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,9 @@ def PredictPi(self, X, HyperParameters):
:rtype: FloatTensor
"""
denum = self.m.sum()
try:
if denum != 0.:
pi = torch.ones([X.shape[0], self.output_dim]) * (self.m / denum)
except:
else:
pi = torch.zeros([X.shape[0], self.output_dim])
return pi

Expand Down Expand Up @@ -448,3 +448,162 @@ def PredictPi(self, X, HyperParameters):
"""
return F.softmax(self.forward(X), dim = -1)


class HyperModelGateSparsed(HyperModel):
r"""A hyper model for mixture of model. Each :math:`i`-th object from
train dataset has own probability to each model :math:`\pi^i`.
In this hyper model, the probability of each local model is a vector
from dirichlet distribution with parameter :math:`\mu`, and :math:`l`.
:param output_dim: The number of local models.
:type output_dim: int
:param device: The device for pytorch.
Can be 'cpu' or 'gpu'. Default 'cpu'.
Example:
>>> _ = torch.random.manual_seed(42) # Set random seed for repeatability
>>>
>>> w = torch.randn(2, 1) # Generate real parameter vector
>>> X = torch.randn(5, 2) # Generate features data
>>> Z = torch.distributions.dirichlet.Dirichlet(
... torch.tensor([0.5, 0.5])).sample(
... (5,)) # Set corresponding between data and local models.
>>> Y = X@w + 0.1*torch.randn(5, 1) # Generate target data with noise 0.1
>>>
>>> hyper_model = HyperModelGateSparsed(
... output_dim=2) # Model with Diriclet weighting for each sample
>>> hyper_parameters = {} # Withor hyper parameters
>>>
>>> hyper_model.LogPiExpectation(
... X, Y, hyper_parameters) # Log of probability before E step
tensor([[-1.3863, -1.3863],
[-1.3863, -1.3863],
[-1.3863, -1.3863],
[-1.3863, -1.3863],
[-1.3863, -1.3863]])
>>>
>>> hyper_model.E_step(X, Y, Z, hyper_parameters)
>>> hyper_model.LogPiExpectation(
... X, Y, hyper_parameters) # Log of probability after E step
tensor([[-1.9677, -0.4830],
[-1.7785, -0.5417],
[-0.5509, -1.7521],
[-0.7250, -1.3642],
[-0.4839, -1.9644]])
"""
def __init__(self, output_dim=2, gamma=1., mu=torch.ones(2), device='cpu'):
"""Constructor method
"""
super(HyperModelGateSparsed, self).__init__()
self.output_dim = output_dim
self.device = device

self.mu = mu
self.mu = self.mu/self.mu.sum()
self.gamma = gamma
self.mu_posterior = self.mu.clone()
self.gamma_posterior = torch.tensor(self.gamma)

def E_step(self, X, Y, Z, HyperParameters):
r"""Doing E-step of EM-algorithm. Finds variational probability `q`
of model parameters.
Calculate analytical solution for estimate `q` in the class of
normal distributions :math:`q = Dir(m)`, where
:math:`m = \mu + \gamma`, where
:math:`\gamma_k = \sum_{i=1}^{num\_elements}Z_{ik}`, and
:math:`\mu` is prior.
.. warning::
Now :math:`\mu_k` is `1` for all `k`, and can not be changed.
:param X: The tensor of shape
`num_elements` :math:`\times` `num_feature`.
:type X: FloatTensor
:param Y: The tensor of shape
`num_elements` :math:`\times` `num_answers`.
:type Y: FloatTensor
:param Z: The tensor of shape
`num_elements` :math:`\times` `num_models`.
:type Z: FloatTensor
:param HyperParameters: The dictionary of all hyper parametrs.
Where `key` is string and `value` is FloatTensor.
:type HyperParameters: dict
"""
gamma = Z
self.mu_posterior = (self.gamma*self.mu + gamma).detach()
self.gamma_posterior = self.mu_posterior.sum(dim=-1).view([-1, 1])
self.mu_posterior = self.mu_posterior/self.gamma_posterior
pass

def M_step(self, X, Y, Z, HyperParameters):
r"""The method does nothing.
:param X: The tensor of shape
`num_elements` :math:`\times` `num_feature`.
:type X: FloatTensor
:param Y: The tensor of shape
`num_elements` :math:`\times` `num_answers`.
:type Y: FloatTensor
:param Z: The tensor of shape
`num_elements` :math:`\times` `num_models`.
:type Z: FloatTensor
:param HyperParameters: The dictionary of all hyper parametrs.
Where `key` is string and `value` is FloatTensor.
:type HyperParameters: dict
"""
pass

def LogPiExpectation(self, X, Y, HyperParameters):
r"""Returns the expected value of each models log of probability.
Returns the expectation of :math:`\log \pi` value where
:math:`\pi` is a random value from Dirichlet distribution.
This function calculates by using :math:`\digamma` function
:param X: The tensor of shape
`num_elements` :math:`\times` `num_feature`.
:type X: FloatTensor
:param Y: The tensor of shape
`num_elements` :math:`\times` `num_answers`.
:type Y: FloatTensor
:param HyperParameters: The dictionary of all hyper parametrs.
Where `key` is string and `value` is FloatTensor.
:type HyperParameters: dict
:return: The tensor of shape
`num_elements` :math:`\times` `num_models`. The espected value of
each models probability.
:rtype: FloatTensor
"""
temp_1 = torch.ones([X.shape[0], self.output_dim])
temp_2 = (torch.digamma(self.gamma_posterior*self.mu_posterior)
- torch.digamma(self.gamma_posterior))

return temp_1 * temp_2

def PredictPi(self, X, HyperParameters):
r"""Returns the probability (weight) of each models.
Return the same vector :math:`\pi` for all object.
Each :math:`\pi = \frac{\textbf{m}}{\sum \textbf{m}_k}`, where
:math:`\textbf{m}` is a parameter of Dirichlet pdf.
:param X: The tensor of shape
`num_elements` :math:`\times` `num_feature`.
:type X: FloatTensor
:param HyperParameters: The dictionary of all hyper parametrs.
Where `key` is string and `value` is FloatTensor.
:type HyperParameters: dict
:return: The tensor of shape
`num_elements` :math:`\times` `num_models`.
The probability (weight) of each models.
:rtype: FloatTensor
"""
pi = torch.ones([X.shape[0], self.output_dim]) * self.mu_posterior
return pi
7 changes: 4 additions & 3 deletions src/mixturelib/mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ def __init__(self,
"""
super(MixtureEM, self).__init__()
if ListOfModels is None:
return None
raise ValueError("""The ListOfModels should be list with
positive length, but given: {}.""".format(ListOfModels))
else:
self.ListOfModels = ListOfModels

Expand All @@ -146,7 +147,8 @@ def __init__(self,
self.HyperParameters[key] = torch.tensor(HyperParameters[key])

if HyperModel is None:
return None
raise ValueError("""The HyperModel should be hyper model object
positive length, but given: {}.""".format(HyperModel))
else:
self.HyperModel = HyperModel

Expand All @@ -162,7 +164,6 @@ def __init__(self,
self.model_type = model_type

self.pZ = None
return

def E_step(self, X, Y):
r"""Doing E-step of EM-algorigthm. This method call E_step for all
Expand Down
38 changes: 15 additions & 23 deletions src/mixturelib/regularizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,17 +188,14 @@ def M_step(self, X, Y, Z, HyperParameters):
ListOfNewW0 = []

for k, w_0 in self.ListOfModelsW0:
if len(self.ListOfModels[k].A.shape) == 1:
try:
A_inv = torch.diag(1./self.ListOfModels[k].A)
except:
A_inv = (2**32)*torch.ones(self.ListOfModels[k].A.shape[0])
else:
try:
A_inv = torch.inverse(self.ListOfModels[k].A)
except:
A_inv = (2**32)*torch.eye(self.ListOfModels[k].A.shape[0])

A = self.ListOfModels[k].A
if len(A.shape) == 1:
A = torch.diag(A)
try:
A_inv = torch.inverse(A)
except:
A_inv = (2**32)*torch.eye(A.shape[0])

B = self.ListOfModels[k].B

if len(alpha.shape) == 0:
Expand Down Expand Up @@ -378,18 +375,13 @@ def M_step(self, X, Y, Z, HyperParameters):
loss = 0
for local_model, w0 in zip(self.ListOfModels, W0):
if local_model.A is not None:
if len(local_model.A.shape) == 1:
try:
A_inv = torch.diag(1. / local_model.A)
except:
A_inv = (2**32) \
* torch.ones(local_model.A.shape[0])
else:
try:
A_inv = torch.inverse(local_model.A)
except:
A_inv = (2**32) \
* torch.eye(local_model.A.shape[0])
A = local_model.A
if len(A.shape) == 1:
A = torch.diag(A)
try:
A_inv = torch.inverse(A)
except:
A_inv = (2**32)*torch.eye(A.shape[0])


loss += -0.5 * (w0 @ A_inv@w0) \
Expand Down
109 changes: 108 additions & 1 deletion tests/test_hyper_model.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,28 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pytest
import torch

from mixturelib.hyper_models import HyperModel
from mixturelib.hyper_models import HyperModelDirichlet
from mixturelib.hyper_models import HyperModelGateSparsed
from mixturelib.hyper_models import HyperExpertNN

def test_HyperModel():
model = HyperModel()

with pytest.raises(NotImplementedError):
model.E_step(None, None, None, None)

with pytest.raises(NotImplementedError):
model.M_step(None, None, None, None)

with pytest.raises(NotImplementedError):
model.LogPiExpectation(None, None, None)

with pytest.raises(NotImplementedError):
model.PredictPi(None, None)

def test_HyperModelDirichlet_init():
torch.manual_seed(42)

Expand Down Expand Up @@ -75,14 +92,26 @@ def test_HyperModelDirichlet_PredictPi():

hyper_model = HyperModelDirichlet(output_dim = 2, device = 'cpu')


X = torch.randn(2, 2)
HyperParameters = {'beta': torch.tensor(1.)}

pi = hyper_model.PredictPi(X, HyperParameters)

assert pi.sum().long().item() == 2

hyper_model = HyperModelDirichlet(output_dim = 2, device = 'cpu')

X = torch.randn(2, 2)
Z = -1*torch.ones(1, 2)
HyperParameters = {'beta': torch.tensor(1.)}
hyper_model.E_step(X, None, Z, HyperParameters)

assert (hyper_model.m == torch.tensor([0., 0.])).all()

pi = hyper_model.PredictPi(X, HyperParameters)
assert (pi == 0.).all()



def test_HyperExpertNN_init():
torch.manual_seed(42)
Expand Down Expand Up @@ -201,3 +230,81 @@ def test_HyperExpertNN_PredictPi():

assert pi.sum().long().item() == 2




def test_HyperModelGateSparsed_init():
torch.manual_seed(42)

hyper_model = HyperModelGateSparsed(output_dim = 2, device = 'cpu')

assert hyper_model.output_dim == 2
assert hyper_model.device == 'cpu'
assert (hyper_model.mu == 0.5*torch.ones(2)).all()
assert hyper_model.gamma == 1.


def test_HyperModelGateSparsed_E_step():
torch.manual_seed(42)

hyper_model = HyperModelGateSparsed(output_dim = 2, device = 'cpu')


X = torch.randn(2, 2)
Y = torch.randn(2, 1)
Z = torch.randn(2, 2)
HyperParameters = {'beta': torch.tensor(1.)}

hyper_model.E_step(X, Y, Z, HyperParameters)

assert hyper_model.output_dim == 2
assert hyper_model.device == 'cpu'
assert (hyper_model.mu == 0.5*torch.ones(2)).all()
assert (torch.round(hyper_model.mu_posterior) ==
torch.tensor([[1., 0.], [1., 0.]])).all()
assert (torch.round(hyper_model.gamma_posterior) ==
torch.tensor([[3.], [2.]])).all()

def test_HyperModelGateSparsed_M_step():
torch.manual_seed(42)

hyper_model = HyperModelGateSparsed(output_dim = 2, device = 'cpu')


X = torch.randn(2, 2)
Y = torch.randn(2, 1)
Z = torch.randn(2, 2)
HyperParameters = {'beta': torch.tensor(1.)}

hyper_model.M_step(X, Y, Z, HyperParameters)

assert hyper_model.output_dim == 2
assert hyper_model.device == 'cpu'
assert (hyper_model.mu == 0.5*torch.ones(2)).all()
assert hyper_model.gamma == 1.

def test_HyperModelGateSparsed_LogPiExpectation():
torch.manual_seed(42)

hyper_model = HyperModelGateSparsed(output_dim = 2, device = 'cpu')


X = torch.randn(2, 2)
Y = torch.randn(2, 1)
HyperParameters = {'beta': torch.tensor(1.)}

log_pi = hyper_model.LogPiExpectation(X, Y, HyperParameters)

assert log_pi.sum().long().item() == -5.

def test_HyperModelGateSparsed_PredictPi():
torch.manual_seed(42)

hyper_model = HyperModelGateSparsed(output_dim = 2, device = 'cpu')

X = torch.randn(2, 2)
HyperParameters = {'beta': torch.tensor(1.)}

pi = hyper_model.PredictPi(X, HyperParameters)

assert pi.sum().long().item() == 2
Loading

0 comments on commit 10b7808

Please sign in to comment.