diff7
diff --git a/‎GM.py
Lines changed: 41 additions & 26 deletions b/‎GM.py
Lines changed: 41 additions & 26 deletions
diff --git a/‎MI.py
Lines changed: 6 additions & 2 deletions b/‎MI.py
Lines changed: 6 additions & 2 deletions
diff --git a/‎__pycache__/GM.cpython-310.pyc
463 Bytes b/‎__pycache__/GM.cpython-310.pyc
463 Bytes
diff --git a/‎__pycache__/MI.cpython-310.pyc
45 Bytes b/‎__pycache__/MI.cpython-310.pyc
45 Bytes
diff --git a/‎example_em_.pdf
-31.5 KB b/‎example_em_.pdf
-31.5 KB
diff --git a/‎example_em_sample.pdf
-31.2 KB b/‎example_em_sample.pdf
-31.2 KB
diff --git a/‎test_diff.py
Lines changed: 9 additions & 4 deletions b/‎test_diff.py
Lines changed: 9 additions & 4 deletions
diff --git a/‎test_pdf.py renamed to ‎test_gm.py
Lines changed: 8 additions & 9 deletions b/‎test_pdf.py renamed to ‎test_gm.py
Lines changed: 8 additions & 9 deletions
@@ -65,11 +65,13 @@ def __init__(
         n_components,
         n_features,
         covariance_type="full",
-        eps=1.0e-3,
-        init_means="random",
+        eps=1.0e-8,
+        cov_reg=1e-6,
+        init_means="kmeans",
         mu_init=None,
         var_init=None,
         verbose=True,
+        device="cpu",
     ):
         """
         Initializes the model and brings all tensors into their required shape.
@@ -108,11 +110,13 @@ def __init__(
 
         self.covariance_type = covariance_type
         self.init_means = init_means
+        self.cov_reg = cov_reg
 
         assert self.covariance_type in ["full", "diag"]
         assert self.init_means in ["kmeans", "random"]
 
         self.verbose = verbose
+        self.device = device
         self._init_params()
 
     def _init_params(self):
@@ -182,7 +186,10 @@ def _init_params(self):
             requires_grad=True,
         )
 
-        self.params = [self.pi, self.mu, self.var]
+        self.mu.to(self.device)
+        self.var.to(self.device)
+        self.pi.to(self.device)
+
         self.fitted = False
 
     def _finish_optimization(self):
@@ -208,6 +215,7 @@ def _set_marginal(self, indices=[]):
             self.mu.data = torch.zeros(
                 1, self.n_components, len(indices), device=device
             )
+
             for i, ii in enumerate(indices):
                 self.mu.data[:, :, i] = self.mu_chached[:, :, ii]
 
@@ -268,6 +276,7 @@ def fit_em(self, x, delta=1e-5, n_iter=300, warm_start=False):
             n_iter:     int
             warm_start: bool
         """
+
         if not warm_start and self.fitted:
             self._init_params()
 
@@ -289,22 +298,12 @@ def fit_em(self, x, delta=1e-5, n_iter=300, warm_start=False):
             self.__em(x)
             self.log_likelihood = self.__score(x)
             self.print_verbose(f"score {self.log_likelihood.item()}")
+
             if torch.isinf(self.log_likelihood.abs()) or torch.isnan(
                 self.log_likelihood
             ):
-
                 # When the log-likelihood assumes unbound values, reinitialize model
-                self.__init__(
-                    self.n_components,
-                    self.n_features,
-                    covariance_type=self.covariance_type,
-                    mu_init=self.mu_init,
-                    var_init=self.var_init,
-                    eps=self.eps,
-                )
-
-                if self.init_means == "kmeans":
-                    (self.mu.data,) = self.get_kmeans_mu(x, n_centers=self.n_components)
+                self.__reset(x)
 
             i += 1
             j = self.log_likelihood - log_likelihood_old
@@ -316,6 +315,12 @@ def fit_em(self, x, delta=1e-5, n_iter=300, warm_start=False):
 
         self._finish_optimization()
 
+    def __reset(self, x):
+        print("RESET")
+        self._init_params()
+        if self.init_means == "kmeans":
+            self.mu.data = self.get_kmeans_mu(x, n_centers=self.n_components)
+
     def fit_grad(self, x, n_iter=1000, learning_rate=1e-1):
 
         # TODO make sure constrains for self.var & self.pi are satisfied
@@ -448,8 +453,8 @@ def _estimate_log_prob(self, x):
         x = self.check_size(x)
 
         if self.covariance_type == "full":
-            mu = self.mu.detach()
-            var = self.var.detach()
+            mu = self.mu.detach().to(x.device)
+            var = self.var.detach().to(x.device)
 
             if var.shape[2] == 1:
                 precision = 1 / var
@@ -490,12 +495,17 @@ def _calculate_log_det(self, var):
             var:            torch.Tensor (1, k, d, d)
         """
         log_det = torch.empty(size=(self.n_components,)).to(var.device)
-
         for k in range(self.n_components):
-            log_det[k] = (
-                2 * torch.log(torch.diagonal(torch.linalg.cholesky(var[0, k]))).sum()
-            )
-
+            try:
+                dI = self.cov_reg * torch.eye(var[0, k].shape[0]).to(var.device)
+                log_det[k] = (
+                    2
+                    * torch.log(
+                        torch.diagonal(torch.linalg.cholesky(var[0, k] + dI))
+                    ).sum()
+                )
+            except:
+                log_det[k] = torch.logdet(var[0, k])
         return log_det.unsqueeze(-1)
 
     def _e_step(self, x):
@@ -555,7 +565,6 @@ def _m_step(self, x, log_resp):
             var = x2 - 2 * xmu + mu2 + self.eps
 
         pi = pi / x.shape[0]
-
         return pi, mu, var
 
     def __em(self, x):
@@ -582,7 +591,10 @@ def __score(self, x, as_average=True):
             (or)
             per_sample_score:   torch.Tensor (n)
         """
-        weighted_log_prob = self._estimate_log_prob(x) + torch.log(self.pi).detach()
+
+        weighted_log_prob = self._estimate_log_prob(x) + torch.log(self.pi).detach().to(
+            x.device
+        )
         per_sample_score = torch.logsumexp(weighted_log_prob, dim=1)
 
         if as_average:
@@ -668,10 +680,9 @@ def __update_pi(self, pi):
             self.n_components,
             1,
         )
-
         self.pi.data = pi
 
-    def get_kmeans_mu(self, x, n_centers, init_times=50, min_delta=1e-3):
+    def get_kmeans_mu(self, x, n_centers, init_times=2, min_delta=1e-2):
         """
         Find an initial value for the mean. Requires a threshold min_delta for the k-means algorithm to stop iterating.
         The algorithm is repeated init_times often, after which the best centerpoint is returned.
@@ -687,6 +698,10 @@ def get_kmeans_mu(self, x, n_centers, init_times=50, min_delta=1e-3):
 
         min_cost = np.inf
 
+        center = x[
+            np.random.choice(np.arange(x.shape[0]), size=n_centers, replace=False),
+            ...,
+        ]
         for i in range(init_times):
             tmp_center = x[
                 np.random.choice(np.arange(x.shape[0]), size=n_centers, replace=False),
 
@@ -8,27 +8,31 @@ def __init__(
         n_components,
         n_features,
         covariance_type="full",
-        eps=1.0e-3,
+        eps=1.0e-6,
+        cov_reg=1e-6,
         init_means="kmeans",
         mu_init=None,
         var_init=None,
         verbose=True,
         fit_mode="em",
         n_iter=1e2,
-        delta=1e-3,
+        delta=1e-6,
         learning_rate=1e-2,
         warm_start=False,
+        device="cpu",
     ):
 
         super().__init__(
             n_components,
             n_features,
             covariance_type,
             eps,
+            cov_reg,
             init_means,
             mu_init,
             var_init,
             verbose,
+            device,
         )
 
         assert fit_mode in [
 
@@ -67,7 +67,6 @@ def test(model, device, test_loader, MILoss):
             data, target = data.to(device), target.to(device)
             output = model(data)
             # loss = -
-            test_loss += MILoss(output, target).item()
             # F.nll_loss(
             #     output, target, reduction="sum"
             # ).item()  # sum up batch loss
@@ -94,14 +93,14 @@ def main():
     parser.add_argument(
         "--batch-size",
         type=int,
-        default=64,
+        default=256,
         metavar="N",
         help="input batch size for training (default: 64)",
     )
     parser.add_argument(
         "--test-batch-size",
         type=int,
-        default=1000,
+        default=500,
         metavar="N",
         help="input batch size for testing (default: 1000)",
     )
@@ -188,7 +187,13 @@ def main():
     def MILoss(predict, yhat):
         yohe = torch.nn.functional.one_hot(yhat, num_classes=10)
         sample = torch.cat([predict, yohe], dim=1)
-        model = MIGM(10, 20, init_means="kmeans", verbose=False)
+        model = MIGM(
+            n_components=4,
+            n_features=20,
+            init_means="kmeans",
+            verbose=False,
+            device=device,
+        )
         model.to(device)
         model.fit(sample)
         indices = [i for i in range(0, 20)]
 
@@ -14,8 +14,9 @@
 
 
 def main():
-    n_components = 1
-    n, d = 100, 2
+    n_components = 10
+    n, d = 1500, 2
+    use_plots = False
 
     data = []
     for i in range(n_components):
@@ -30,26 +31,24 @@ def main():
         data.append(torch.cat([x_, y_], 1))
 
     data = torch.cat(data, 0)
-    print(data.shape)
-    # Next, the Gaussian mixture is instantiated and ..
 
     model = GaussianMixture(n_components, d)
     model.fit_em(data)
 
     # .. used to predict the data points as they where shifted
     y = model.predict(data)
-    # model.set_marginal(indices=[0])
     x1 = model.predict(data[:, 0], marginals=[0])
-    # model.set_marginal(indices=[1])
     x2 = model.predict(data[:, 1], marginals=[1])
 
-    plot(data, y, x1, x2, n)
+    if use_plots:
+        plot(data, y, x1, x2, n)
 
-    # model.set_marginal(indices=[])
     data, y = model.sample(n * n_components)
     x1, _ = model.sample(n * n_components, marginals=[0])
     x2, _ = model.sample(n * n_components, marginals=[1])
-    plot(data, y, x1, x2, n, sample=True)
+
+    if use_plots:
+        plot(data, y, x1, x2, n, sample=True)
 
 
 def plot(data, y, x1, x2, n, sample=False):